src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to inhibit detection of binary files through null bytes.  */
 384 int inhibit_null_byte_detection;
 385
 386 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 387 int inherit_process_coding_system;
 388
 389 /* Coding system to be used to encode text for terminal display when
 390    terminal coding system is nil.  */
 391 struct coding_system safe_terminal_coding;
 392
 393 Lisp_Object Vfile_coding_system_alist;
 394 Lisp_Object Vprocess_coding_system_alist;
 395 Lisp_Object Vnetwork_coding_system_alist;
 396
 397 Lisp_Object Vlocale_coding_system;
 398
 399 #endif /* emacs */
 400
 401 /* Flag to tell if we look up translation table on character code
 402    conversion.  */
 403 Lisp_Object Venable_character_translation;
 404 /* Standard translation table to look up on decoding (reading).  */
 405 Lisp_Object Vstandard_translation_table_for_decode;
 406 /* Standard translation table to look up on encoding (writing).  */
 407 Lisp_Object Vstandard_translation_table_for_encode;
 408
 409 Lisp_Object Qtranslation_table;
 410 Lisp_Object Qtranslation_table_id;
 411 Lisp_Object Qtranslation_table_for_decode;
 412 Lisp_Object Qtranslation_table_for_encode;
 413
 414 /* Alist of charsets vs revision number.  */
 415 static Lisp_Object Vcharset_revision_table;
 416
 417 /* Default coding systems used for process I/O.  */
 418 Lisp_Object Vdefault_process_coding_system;
 419
 420 /* Char table for translating Quail and self-inserting input.  */
 421 Lisp_Object Vtranslation_table_for_input;
 422
 423 /* Two special coding systems.  */
 424 Lisp_Object Vsjis_coding_system;
 425 Lisp_Object Vbig5_coding_system;
 426
 427 /* ISO2022 section */
 428
 429 #define CODING_ISO_INITIAL(coding, reg)                 \
 430   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 431                      coding_attr_iso_initial),          \
 432                reg)))
 433
 434
 435 #define CODING_ISO_REQUEST(coding, charset_id)          \
 436   (((charset_id) <= (coding)->max_charset_id            \
 437     ? ((coding)->safe_charsets[charset_id] != 255       \
 438        ? (coding)->safe_charsets[charset_id]            \
 439        : -1)                                            \
 440     : -1))
 441
 442
 443 #define CODING_ISO_FLAGS(coding)        \
 444   ((coding)->spec.iso_2022.flags)
 445 #define CODING_ISO_DESIGNATION(coding, reg)     \
 446   ((coding)->spec.iso_2022.current_designation[reg])
 447 #define CODING_ISO_INVOCATION(coding, plane)    \
 448   ((coding)->spec.iso_2022.current_invocation[plane])
 449 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 450   ((coding)->spec.iso_2022.single_shifting)
 451 #define CODING_ISO_BOL(coding)  \
 452   ((coding)->spec.iso_2022.bol)
 453 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 454   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 455 #define CODING_ISO_CMP_STATUS(coding)   \
 456   (&(coding)->spec.iso_2022.cmp_status)
 457 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 458   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 459 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 460   ((coding)->spec.iso_2022.embedded_utf_8)
 461
 462 /* Control characters of ISO2022.  */
 463                         /* code */      /* function */
 464 #define ISO_CODE_LF     0x0A            /* line-feed */
 465 #define ISO_CODE_CR     0x0D            /* carriage-return */
 466 #define ISO_CODE_SO     0x0E            /* shift-out */
 467 #define ISO_CODE_SI     0x0F            /* shift-in */
 468 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 469 #define ISO_CODE_ESC    0x1B            /* escape */
 470 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 471 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 472 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 473
 474 /* All code (1-byte) of ISO2022 is classified into one of the
 475    followings.  */
 476 enum iso_code_class_type
 477   {
 478     ISO_control_0,              /* Control codes in the range
 479                                    0x00..0x1F and 0x7F, except for the
 480                                    following 5 codes.  */
 481     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 482     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 483     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 484     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 485     ISO_control_1,              /* Control codes in the range
 486                                    0x80..0x9F, except for the
 487                                    following 3 codes.  */
 488     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 489     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 490     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 491     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 492     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 493     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 494     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 495   };
 496
 497 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 498     `iso-flags' attribute of an iso2022 coding system.  */
 499
 500 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 501    instead of the correct short-form sequence (e.g. ESC $ A).  */
 502 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 503
 504 /* If set, reset graphic planes and registers at end-of-line to the
 505    initial state.  */
 506 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 507
 508 /* If set, reset graphic planes and registers before any control
 509    characters to the initial state.  */
 510 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 511
 512 /* If set, encode by 7-bit environment.  */
 513 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 514
 515 /* If set, use locking-shift function.  */
 516 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 517
 518 /* If set, use single-shift function.  Overwrite
 519    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 520 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 521
 522 /* If set, use designation escape sequence.  */
 523 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 524
 525 /* If set, produce revision number sequence.  */
 526 #define CODING_ISO_FLAG_REVISION        0x0080
 527
 528 /* If set, produce ISO6429's direction specifying sequence.  */
 529 #define CODING_ISO_FLAG_DIRECTION       0x0100
 530
 531 /* If set, assume designation states are reset at beginning of line on
 532    output.  */
 533 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 534
 535 /* If set, designation sequence should be placed at beginning of line
 536    on output.  */
 537 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 538
 539 /* If set, do not encode unsafe charactes on output.  */
 540 #define CODING_ISO_FLAG_SAFE            0x0800
 541
 542 /* If set, extra latin codes (128..159) are accepted as a valid code
 543    on input.  */
 544 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 545
 546 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 547
 548 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 549
 550 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 551
 552 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 553
 554 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 555
 556 /* A character to be produced on output if encoding of the original
 557    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 558 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 559
 560 /* UTF-8 section */
 561 #define CODING_UTF_8_BOM(coding)        \
 562   ((coding)->spec.utf_8_bom)
 563
 564 /* UTF-16 section */
 565 #define CODING_UTF_16_BOM(coding)       \
 566   ((coding)->spec.utf_16.bom)
 567
 568 #define CODING_UTF_16_ENDIAN(coding)    \
 569   ((coding)->spec.utf_16.endian)
 570
 571 #define CODING_UTF_16_SURROGATE(coding) \
 572   ((coding)->spec.utf_16.surrogate)
 573
 574
 575 /* CCL section */
 576 #define CODING_CCL_DECODER(coding)      \
 577   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 578 #define CODING_CCL_ENCODER(coding)      \
 579   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 580 #define CODING_CCL_VALIDS(coding)                                          \
 581   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 582
 583 /* Index for each coding category in `coding_categories' */
 584
 585 enum coding_category
 586   {
 587     coding_category_iso_7,
 588     coding_category_iso_7_tight,
 589     coding_category_iso_8_1,
 590     coding_category_iso_8_2,
 591     coding_category_iso_7_else,
 592     coding_category_iso_8_else,
 593     coding_category_utf_8_auto,
 594     coding_category_utf_8_nosig,
 595     coding_category_utf_8_sig,
 596     coding_category_utf_16_auto,
 597     coding_category_utf_16_be,
 598     coding_category_utf_16_le,
 599     coding_category_utf_16_be_nosig,
 600     coding_category_utf_16_le_nosig,
 601     coding_category_charset,
 602     coding_category_sjis,
 603     coding_category_big5,
 604     coding_category_ccl,
 605     coding_category_emacs_mule,
 606     /* All above are targets of code detection.  */
 607     coding_category_raw_text,
 608     coding_category_undecided,
 609     coding_category_max
 610   };
 611
 612 /* Definitions of flag bits used in detect_coding_XXXX.  */
 613 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 614 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 615 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 616 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 617 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 618 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 619 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 620 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 621 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 622 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 623 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 624 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 625 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 626 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 627 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 628 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 629 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 630 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 631 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 632 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 633
 634 /* This value is returned if detect_coding_mask () find nothing other
 635    than ASCII characters.  */
 636 #define CATEGORY_MASK_ANY               \
 637   (CATEGORY_MASK_ISO_7                  \
 638    | CATEGORY_MASK_ISO_7_TIGHT          \
 639    | CATEGORY_MASK_ISO_8_1              \
 640    | CATEGORY_MASK_ISO_8_2              \
 641    | CATEGORY_MASK_ISO_7_ELSE           \
 642    | CATEGORY_MASK_ISO_8_ELSE           \
 643    | CATEGORY_MASK_UTF_8_AUTO           \
 644    | CATEGORY_MASK_UTF_8_NOSIG          \
 645    | CATEGORY_MASK_UTF_8_SIG            \
 646    | CATEGORY_MASK_UTF_16_AUTO          \
 647    | CATEGORY_MASK_UTF_16_BE            \
 648    | CATEGORY_MASK_UTF_16_LE            \
 649    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 650    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 651    | CATEGORY_MASK_CHARSET              \
 652    | CATEGORY_MASK_SJIS                 \
 653    | CATEGORY_MASK_BIG5                 \
 654    | CATEGORY_MASK_CCL                  \
 655    | CATEGORY_MASK_EMACS_MULE)
 656
 657
 658 #define CATEGORY_MASK_ISO_7BIT \
 659   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 660
 661 #define CATEGORY_MASK_ISO_8BIT \
 662   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 663
 664 #define CATEGORY_MASK_ISO_ELSE \
 665   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 666
 667 #define CATEGORY_MASK_ISO_ESCAPE        \
 668   (CATEGORY_MASK_ISO_7                  \
 669    | CATEGORY_MASK_ISO_7_TIGHT          \
 670    | CATEGORY_MASK_ISO_7_ELSE           \
 671    | CATEGORY_MASK_ISO_8_ELSE)
 672
 673 #define CATEGORY_MASK_ISO       \
 674   (  CATEGORY_MASK_ISO_7BIT     \
 675      | CATEGORY_MASK_ISO_8BIT   \
 676      | CATEGORY_MASK_ISO_ELSE)
 677
 678 #define CATEGORY_MASK_UTF_16            \
 679   (CATEGORY_MASK_UTF_16_AUTO            \
 680    | CATEGORY_MASK_UTF_16_BE            \
 681    | CATEGORY_MASK_UTF_16_LE            \
 682    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 683    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 684
 685 #define CATEGORY_MASK_UTF_8     \
 686   (CATEGORY_MASK_UTF_8_AUTO     \
 687    | CATEGORY_MASK_UTF_8_NOSIG  \
 688    | CATEGORY_MASK_UTF_8_SIG)
 689
 690 /* List of symbols `coding-category-xxx' ordered by priority.  This
 691    variable is exposed to Emacs Lisp.  */
 692 static Lisp_Object Vcoding_category_list;
 693
 694 /* Table of coding categories (Lisp symbols).  This variable is for
 695    internal use oly.  */
 696 static Lisp_Object Vcoding_category_table;
 697
 698 /* Table of coding-categories ordered by priority.  */
 699 static enum coding_category coding_priorities[coding_category_max];
 700
 701 /* Nth element is a coding context for the coding system bound to the
 702    Nth coding category.  */
 703 static struct coding_system coding_categories[coding_category_max];
 704
 705 /*** Commonly used macros and functions ***/
 706
 707 #ifndef min
 708 #define min(a, b) ((a) < (b) ? (a) : (b))
 709 #endif
 710 #ifndef max
 711 #define max(a, b) ((a) > (b) ? (a) : (b))
 712 #endif
 713
 714 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 715   do {                                                  \
 716     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 717     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 718   } while (0)
 719
 720
 721 /* Safely get one byte from the source text pointed by SRC which ends
 722    at SRC_END, and set C to that byte.  If there are not enough bytes
 723    in the source, it jumps to `no_more_source'.  If multibytep is
 724    nonzero, and a multibyte character is found at SRC, set C to the
 725    negative value of the character code.  The caller should declare
 726    and set these variables appropriately in advance:
 727         src, src_end, multibytep */
 728
 729 #define ONE_MORE_BYTE(c)                                \
 730   do {                                                  \
 731     if (src == src_end)                                 \
 732       {                                                 \
 733         if (src_base < src)                             \
 734           record_conversion_result                      \
 735             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 736         goto no_more_source;                            \
 737       }                                                 \
 738     c = *src++;                                         \
 739     if (multibytep && (c & 0x80))                       \
 740       {                                                 \
 741         if ((c & 0xFE) == 0xC0)                         \
 742           c = ((c & 1) << 6) | *src++;                  \
 743         else                                            \
 744           {                                             \
 745             src--;                                      \
 746             c = - string_char (src, &src, NULL);        \
 747             record_conversion_result                    \
 748               (coding, CODING_RESULT_INVALID_SRC);      \
 749           }                                             \
 750       }                                                 \
 751     consumed_chars++;                                   \
 752   } while (0)
 753
 754 /* Safely get two bytes from the source text pointed by SRC which ends
 755    at SRC_END, and set C1 and C2 to those bytes while skipping the
 756    heading multibyte characters.  If there are not enough bytes in the
 757    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 758    a multibyte character is found for C2, set C2 to the negative value
 759    of the character code.  The caller should declare and set these
 760    variables appropriately in advance:
 761         src, src_end, multibytep
 762    It is intended that this macro is used in detect_coding_utf_16.  */
 763
 764 #define TWO_MORE_BYTES(c1, c2)                          \
 765   do {                                                  \
 766     do {                                                \
 767       if (src == src_end)                               \
 768         goto no_more_source;                            \
 769       c1 = *src++;                                      \
 770       if (multibytep && (c1 & 0x80))                    \
 771         {                                               \
 772           if ((c1 & 0xFE) == 0xC0)                      \
 773             c1 = ((c1 & 1) << 6) | *src++;              \
 774           else                                          \
 775             {                                           \
 776               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 777               c1 = -1;                                  \
 778             }                                           \
 779         }                                               \
 780     } while (c1 < 0);                                   \
 781     if (src == src_end)                                 \
 782       goto no_more_source;                              \
 783     c2 = *src++;                                        \
 784     if (multibytep && (c2 & 0x80))                      \
 785       {                                                 \
 786         if ((c2 & 0xFE) == 0xC0)                        \
 787           c2 = ((c2 & 1) << 6) | *src++;                \
 788         else                                            \
 789           c2 = -1;                                      \
 790       }                                                 \
 791   } while (0)
 792
 793
 794 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 795   do {                                                  \
 796     c = *src++;                                         \
 797     if (multibytep && (c & 0x80))                       \
 798       {                                                 \
 799         if ((c & 0xFE) == 0xC0)                         \
 800           c = ((c & 1) << 6) | *src++;                  \
 801         else                                            \
 802           {                                             \
 803             src--;                                      \
 804             c = - string_char (src, &src, NULL);        \
 805             record_conversion_result                    \
 806               (coding, CODING_RESULT_INVALID_SRC);      \
 807           }                                             \
 808       }                                                 \
 809     consumed_chars++;                                   \
 810   } while (0)
 811
 812
 813 /* Store a byte C in the place pointed by DST and increment DST to the
 814    next free point, and increment PRODUCED_CHARS.  The caller should
 815    assure that C is 0..127, and declare and set the variable `dst'
 816    appropriately in advance.
 817 */
 818
 819
 820 #define EMIT_ONE_ASCII_BYTE(c)  \
 821   do {                          \
 822     produced_chars++;           \
 823     *dst++ = (c);               \
 824   } while (0)
 825
 826
 827 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 828
 829 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 830   do {                                  \
 831     produced_chars += 2;                \
 832     *dst++ = (c1), *dst++ = (c2);       \
 833   } while (0)
 834
 835
 836 /* Store a byte C in the place pointed by DST and increment DST to the
 837    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 838    nonzero, store in an appropriate multibyte from.  The caller should
 839    declare and set the variables `dst' and `multibytep' appropriately
 840    in advance.  */
 841
 842 #define EMIT_ONE_BYTE(c)                \
 843   do {                                  \
 844     produced_chars++;                   \
 845     if (multibytep)                     \
 846       {                                 \
 847         int ch = (c);                   \
 848         if (ch >= 0x80)                 \
 849           ch = BYTE8_TO_CHAR (ch);      \
 850         CHAR_STRING_ADVANCE (ch, dst);  \
 851       }                                 \
 852     else                                \
 853       *dst++ = (c);                     \
 854   } while (0)
 855
 856
 857 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 858
 859 #define EMIT_TWO_BYTES(c1, c2)          \
 860   do {                                  \
 861     produced_chars += 2;                \
 862     if (multibytep)                     \
 863       {                                 \
 864         int ch;                         \
 865                                         \
 866         ch = (c1);                      \
 867         if (ch >= 0x80)                 \
 868           ch = BYTE8_TO_CHAR (ch);      \
 869         CHAR_STRING_ADVANCE (ch, dst);  \
 870         ch = (c2);                      \
 871         if (ch >= 0x80)                 \
 872           ch = BYTE8_TO_CHAR (ch);      \
 873         CHAR_STRING_ADVANCE (ch, dst);  \
 874       }                                 \
 875     else                                \
 876       {                                 \
 877         *dst++ = (c1);                  \
 878         *dst++ = (c2);                  \
 879       }                                 \
 880   } while (0)
 881
 882
 883 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 884   do {                                  \
 885     EMIT_ONE_BYTE (c1);                 \
 886     EMIT_TWO_BYTES (c2, c3);            \
 887   } while (0)
 888
 889
 890 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 891   do {                                          \
 892     EMIT_TWO_BYTES (c1, c2);                    \
 893     EMIT_TWO_BYTES (c3, c4);                    \
 894   } while (0)
 895
 896
 897 /* Prototypes for static functions.  */
 898 static void record_conversion_result P_ ((struct coding_system *coding,
 899                                           enum coding_result_code result));
 900 static int detect_coding_utf_8 P_ ((struct coding_system *,
 901                                     struct coding_detection_info *info));
 902 static void decode_coding_utf_8 P_ ((struct coding_system *));
 903 static int encode_coding_utf_8 P_ ((struct coding_system *));
 904
 905 static int detect_coding_utf_16 P_ ((struct coding_system *,
 906                                      struct coding_detection_info *info));
 907 static void decode_coding_utf_16 P_ ((struct coding_system *));
 908 static int encode_coding_utf_16 P_ ((struct coding_system *));
 909
 910 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 911                                        struct coding_detection_info *info));
 912 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 913 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 914
 915 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 916                                          struct coding_detection_info *info));
 917 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 918 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 919
 920 static int detect_coding_sjis P_ ((struct coding_system *,
 921                                    struct coding_detection_info *info));
 922 static void decode_coding_sjis P_ ((struct coding_system *));
 923 static int encode_coding_sjis P_ ((struct coding_system *));
 924
 925 static int detect_coding_big5 P_ ((struct coding_system *,
 926                                    struct coding_detection_info *info));
 927 static void decode_coding_big5 P_ ((struct coding_system *));
 928 static int encode_coding_big5 P_ ((struct coding_system *));
 929
 930 static int detect_coding_ccl P_ ((struct coding_system *,
 931                                   struct coding_detection_info *info));
 932 static void decode_coding_ccl P_ ((struct coding_system *));
 933 static int encode_coding_ccl P_ ((struct coding_system *));
 934
 935 static void decode_coding_raw_text P_ ((struct coding_system *));
 936 static int encode_coding_raw_text P_ ((struct coding_system *));
 937
 938 static void coding_set_source P_ ((struct coding_system *));
 939 static void coding_set_destination P_ ((struct coding_system *));
 940 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 941 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 942                                             EMACS_INT, EMACS_INT));
 943 static unsigned char *alloc_destination P_ ((struct coding_system *,
 944                                              EMACS_INT, unsigned char *));
 945 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 946 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 947                                                      int *, int *,
 948                                                      unsigned char *));
 949 static int detect_eol P_ ((const unsigned char *,
 950                            EMACS_INT, enum coding_category));
 951 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 952 static void decode_eol P_ ((struct coding_system *));
 953 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 954 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 955 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 956 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 957                                         EMACS_INT));
 958 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 959 static int decode_coding P_ ((struct coding_system *));
 960 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 961                                                       struct coding_system *,
 962                                                       int *, EMACS_INT *));
 963 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 964                                                   struct coding_system *,
 965                                                   int *, EMACS_INT *));
 966 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 967 static int encode_coding P_ ((struct coding_system *));
 968 static Lisp_Object make_conversion_work_buffer P_ ((int));
 969 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 970 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 971 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 972
 973 static void
 974 record_conversion_result (struct coding_system *coding,
 975                           enum coding_result_code result)
 976 {
 977   coding->result = result;
 978   switch (result)
 979     {
 980     case CODING_RESULT_INSUFFICIENT_SRC:
 981       Vlast_code_conversion_error = Qinsufficient_source;
 982       break;
 983     case CODING_RESULT_INCONSISTENT_EOL:
 984       Vlast_code_conversion_error = Qinconsistent_eol;
 985       break;
 986     case CODING_RESULT_INVALID_SRC:
 987       Vlast_code_conversion_error = Qinvalid_source;
 988       break;
 989     case CODING_RESULT_INTERRUPT:
 990       Vlast_code_conversion_error = Qinterrupted;
 991       break;
 992     case CODING_RESULT_INSUFFICIENT_MEM:
 993       Vlast_code_conversion_error = Qinsufficient_memory;
 994       break;
 995     default:
 996       Vlast_code_conversion_error = intern ("Unknown error");
 997     }
 998 }
 999
1000 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1001   do {                                                                       \
1002     charset_map_loaded = 0;                                                  \
1003     c = DECODE_CHAR (charset, code);                                         \
1004     if (charset_map_loaded)                                                  \
1005       {                                                                      \
1006         const unsigned char *orig = coding->source;                          \
1007         EMACS_INT offset;                                                    \
1008                                                                              \
1009         coding_set_source (coding);                                          \
1010         offset = coding->source - orig;                                      \
1011         src += offset;                                                       \
1012         src_base += offset;                                                  \
1013         src_end += offset;                                                   \
1014       }                                                                      \
1015   } while (0)
1016
1017
1018 /* If there are at least BYTES length of room at dst, allocate memory
1019    for coding->destination and update dst and dst_end.  We don't have
1020    to take care of coding->source which will be relocated.  It is
1021    handled by calling coding_set_source in encode_coding.  */
1022
1023 #define ASSURE_DESTINATION(bytes)                               \
1024   do {                                                          \
1025     if (dst + (bytes) >= dst_end)                               \
1026       {                                                         \
1027         int more_bytes = charbuf_end - charbuf + (bytes);       \
1028                                                                 \
1029         dst = alloc_destination (coding, more_bytes, dst);      \
1030         dst_end = coding->destination + coding->dst_bytes;      \
1031       }                                                         \
1032   } while (0)
1033
1034
1035 /* Store multibyte form of the character C in P, and advance P to the
1036    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1037    never calls MAYBE_UNIFY_CHAR.  */
1038
1039 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1040   do {                                          \
1041     if ((c) <= MAX_1_BYTE_CHAR)                 \
1042       *(p)++ = (c);                             \
1043     else if ((c) <= MAX_2_BYTE_CHAR)            \
1044       *(p)++ = (0xC0 | ((c) >> 6)),             \
1045         *(p)++ = (0x80 | ((c) & 0x3F));         \
1046     else if ((c) <= MAX_3_BYTE_CHAR)            \
1047       *(p)++ = (0xE0 | ((c) >> 12)),            \
1048         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1049         *(p)++ = (0x80 | ((c) & 0x3F));         \
1050     else if ((c) <= MAX_4_BYTE_CHAR)            \
1051       *(p)++ = (0xF0 | (c >> 18)),              \
1052         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1053         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1054         *(p)++ = (0x80 | (c & 0x3F));           \
1055     else if ((c) <= MAX_5_BYTE_CHAR)            \
1056       *(p)++ = 0xF8,                            \
1057         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1058         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1059         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1060         *(p)++ = (0x80 | (c & 0x3F));           \
1061     else                                        \
1062       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1063   } while (0)
1064
1065
1066 /* Return the character code of character whose multibyte form is at
1067    P, and advance P to the end of the multibyte form.  This is like
1068    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1069
1070 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1071   (!((p)[0] & 0x80)                                             \
1072    ? *(p)++                                                     \
1073    : ! ((p)[0] & 0x20)                                          \
1074    ? ((p) += 2,                                                 \
1075       ((((p)[-2] & 0x1F) << 6)                                  \
1076        | ((p)[-1] & 0x3F)                                       \
1077        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1078    : ! ((p)[0] & 0x10)                                          \
1079    ? ((p) += 3,                                                 \
1080       ((((p)[-3] & 0x0F) << 12)                                 \
1081        | (((p)[-2] & 0x3F) << 6)                                \
1082        | ((p)[-1] & 0x3F)))                                     \
1083    : ! ((p)[0] & 0x08)                                          \
1084    ? ((p) += 4,                                                 \
1085       ((((p)[-4] & 0xF) << 18)                                  \
1086        | (((p)[-3] & 0x3F) << 12)                               \
1087        | (((p)[-2] & 0x3F) << 6)                                \
1088        | ((p)[-1] & 0x3F)))                                     \
1089    : ((p) += 5,                                                 \
1090       ((((p)[-4] & 0x3F) << 18)                                 \
1091        | (((p)[-3] & 0x3F) << 12)                               \
1092        | (((p)[-2] & 0x3F) << 6)                                \
1093        | ((p)[-1] & 0x3F))))
1094
1095
1096 static void
1097 coding_set_source (coding)
1098      struct coding_system *coding;
1099 {
1100   if (BUFFERP (coding->src_object))
1101     {
1102       struct buffer *buf = XBUFFER (coding->src_object);
1103
1104       if (coding->src_pos < 0)
1105         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1106       else
1107         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1108     }
1109   else if (STRINGP (coding->src_object))
1110     {
1111       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1112     }
1113   else
1114     /* Otherwise, the source is C string and is never relocated
1115        automatically.  Thus we don't have to update anything.  */
1116     ;
1117 }
1118
1119 static void
1120 coding_set_destination (coding)
1121      struct coding_system *coding;
1122 {
1123   if (BUFFERP (coding->dst_object))
1124     {
1125       if (coding->src_pos < 0)
1126         {
1127           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1128           coding->dst_bytes = (GAP_END_ADDR
1129                                - (coding->src_bytes - coding->consumed)
1130                                - coding->destination);
1131         }
1132       else
1133         {
1134           /* We are sure that coding->dst_pos_byte is before the gap
1135              of the buffer. */
1136           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1137                                  + coding->dst_pos_byte - BEG_BYTE);
1138           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1139                                - coding->destination);
1140         }
1141     }
1142   else
1143     /* Otherwise, the destination is C string and is never relocated
1144        automatically.  Thus we don't have to update anything.  */
1145     ;
1146 }
1147
1148
1149 static void
1150 coding_alloc_by_realloc (coding, bytes)
1151      struct coding_system *coding;
1152      EMACS_INT bytes;
1153 {
1154   coding->destination = (unsigned char *) xrealloc (coding->destination,
1155                                                     coding->dst_bytes + bytes);
1156   coding->dst_bytes += bytes;
1157 }
1158
1159 static void
1160 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1161      struct coding_system *coding;
1162      EMACS_INT gap_head_used, bytes;
1163 {
1164   if (EQ (coding->src_object, coding->dst_object))
1165     {
1166       /* The gap may contain the produced data at the head and not-yet
1167          consumed data at the tail.  To preserve those data, we at
1168          first make the gap size to zero, then increase the gap
1169          size.  */
1170       EMACS_INT add = GAP_SIZE;
1171
1172       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1173       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1174       make_gap (bytes);
1175       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1176       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1177     }
1178   else
1179     {
1180       Lisp_Object this_buffer;
1181
1182       this_buffer = Fcurrent_buffer ();
1183       set_buffer_internal (XBUFFER (coding->dst_object));
1184       make_gap (bytes);
1185       set_buffer_internal (XBUFFER (this_buffer));
1186     }
1187 }
1188
1189
1190 static unsigned char *
1191 alloc_destination (coding, nbytes, dst)
1192      struct coding_system *coding;
1193      EMACS_INT nbytes;
1194      unsigned char *dst;
1195 {
1196   EMACS_INT offset = dst - coding->destination;
1197
1198   if (BUFFERP (coding->dst_object))
1199     {
1200       struct buffer *buf = XBUFFER (coding->dst_object);
1201
1202       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1203     }
1204   else
1205     coding_alloc_by_realloc (coding, nbytes);
1206   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1207   coding_set_destination (coding);
1208   dst = coding->destination + offset;
1209   return dst;
1210 }
1211
1212 /** Macros for annotations.  */
1213
1214 /* An annotation data is stored in the array coding->charbuf in this
1215    format:
1216      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1217    LENGTH is the number of elements in the annotation.
1218    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1219    NCHARS is the number of characters in the text annotated.
1220
1221    The format of the following elements depend on ANNOTATION_MASK.
1222
1223    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1224    follows:
1225      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1226
1227    NBYTES is the number of bytes specified in the header part of
1228    old-style emacs-mule encoding, or 0 for the other kind of
1229    composition.
1230
1231    METHOD is one of enum composition_method.
1232
1233    Optionnal COMPOSITION-COMPONENTS are characters and composition
1234    rules.
1235
1236    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1237    follows.
1238
1239    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1240    recover from an invalid annotation, and should be skipped by
1241    produce_annotation.  */
1242
1243 /* Maximum length of the header of annotation data.  */
1244 #define MAX_ANNOTATION_LENGTH 5
1245
1246 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1247   do {                                                  \
1248     *(buf)++ = -(len);                                  \
1249     *(buf)++ = (mask);                                  \
1250     *(buf)++ = (nchars);                                \
1251     coding->annotated = 1;                              \
1252   } while (0);
1253
1254 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1255   do {                                                                      \
1256     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1257     *buf++ = nbytes;                                                        \
1258     *buf++ = method;                                                        \
1259   } while (0)
1260
1261
1262 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1263   do {                                                                  \
1264     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1265     *buf++ = id;                                                        \
1266   } while (0)
1267
1268 \f
1269 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1270
1271
1272
1273 \f
1274 /*** 3. UTF-8 ***/
1275
1276 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1277    Check if a text is encoded in UTF-8.  If it is, return 1, else
1278    return 0.  */
1279
1280 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1281 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1282 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1283 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1284 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1285 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1286
1287 #define UTF_BOM 0xFEFF
1288 #define UTF_8_BOM_1 0xEF
1289 #define UTF_8_BOM_2 0xBB
1290 #define UTF_8_BOM_3 0xBF
1291
1292 static int
1293 detect_coding_utf_8 (coding, detect_info)
1294      struct coding_system *coding;
1295      struct coding_detection_info *detect_info;
1296 {
1297   const unsigned char *src = coding->source, *src_base;
1298   const unsigned char *src_end = coding->source + coding->src_bytes;
1299   int multibytep = coding->src_multibyte;
1300   int consumed_chars = 0;
1301   int bom_found = 0;
1302   int found = 0;
1303
1304   detect_info->checked |= CATEGORY_MASK_UTF_8;
1305   /* A coding system of this category is always ASCII compatible.  */
1306   src += coding->head_ascii;
1307
1308   while (1)
1309     {
1310       int c, c1, c2, c3, c4;
1311
1312       src_base = src;
1313       ONE_MORE_BYTE (c);
1314       if (c < 0 || UTF_8_1_OCTET_P (c))
1315         continue;
1316       ONE_MORE_BYTE (c1);
1317       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1318         break;
1319       if (UTF_8_2_OCTET_LEADING_P (c))
1320         {
1321           found = 1;
1322           continue;
1323         }
1324       ONE_MORE_BYTE (c2);
1325       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1326         break;
1327       if (UTF_8_3_OCTET_LEADING_P (c))
1328         {
1329           found = 1;
1330           if (src_base == coding->source
1331               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1332             bom_found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c3);
1336       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1337         break;
1338       if (UTF_8_4_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           continue;
1342         }
1343       ONE_MORE_BYTE (c4);
1344       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1345         break;
1346       if (UTF_8_5_OCTET_LEADING_P (c))
1347         {
1348           found = 1;
1349           continue;
1350         }
1351       break;
1352     }
1353   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1354   return 0;
1355
1356  no_more_source:
1357   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1358     {
1359       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1360       return 0;
1361     }
1362   if (bom_found)
1363     {
1364       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1365       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1366     }
1367   else
1368     {
1369       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1370       if (found)
1371         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1372     }
1373   return 1;
1374 }
1375
1376
1377 static void
1378 decode_coding_utf_8 (coding)
1379      struct coding_system *coding;
1380 {
1381   const unsigned char *src = coding->source + coding->consumed;
1382   const unsigned char *src_end = coding->source + coding->src_bytes;
1383   const unsigned char *src_base;
1384   int *charbuf = coding->charbuf + coding->charbuf_used;
1385   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1386   int consumed_chars = 0, consumed_chars_base = 0;
1387   int multibytep = coding->src_multibyte;
1388   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1389   Lisp_Object attr, charset_list;
1390   int eol_crlf =
1391     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1392   int byte_after_cr = -1;
1393
1394   CODING_GET_INFO (coding, attr, charset_list);
1395
1396   if (bom != utf_without_bom)
1397     {
1398       int c1, c2, c3;
1399
1400       src_base = src;
1401       ONE_MORE_BYTE (c1);
1402       if (! UTF_8_3_OCTET_LEADING_P (c1))
1403         src = src_base;
1404       else
1405         {
1406           ONE_MORE_BYTE (c2);
1407           if (! UTF_8_EXTRA_OCTET_P (c2))
1408             src = src_base;
1409           else
1410             {
1411               ONE_MORE_BYTE (c3);
1412               if (! UTF_8_EXTRA_OCTET_P (c3))
1413                 src = src_base;
1414               else
1415                 {
1416                   if ((c1 != UTF_8_BOM_1)
1417                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1418                     src = src_base;
1419                   else
1420                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1421                 }
1422             }
1423         }
1424     }
1425   CODING_UTF_8_BOM (coding) = utf_without_bom;
1426
1427
1428
1429   while (1)
1430     {
1431       int c, c1, c2, c3, c4, c5;
1432
1433       src_base = src;
1434       consumed_chars_base = consumed_chars;
1435
1436       if (charbuf >= charbuf_end)
1437         {
1438           if (byte_after_cr >= 0)
1439             src_base--;
1440           break;
1441         }
1442
1443       if (byte_after_cr >= 0)
1444         c1 = byte_after_cr, byte_after_cr = -1;
1445       else
1446         ONE_MORE_BYTE (c1);
1447       if (c1 < 0)
1448         {
1449           c = - c1;
1450         }
1451       else if (UTF_8_1_OCTET_P(c1))
1452         {
1453           if (eol_crlf && c1 == '\r')
1454             ONE_MORE_BYTE (byte_after_cr);
1455           c = c1;
1456         }
1457       else
1458         {
1459           ONE_MORE_BYTE (c2);
1460           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1461             goto invalid_code;
1462           if (UTF_8_2_OCTET_LEADING_P (c1))
1463             {
1464               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1465               /* Reject overlong sequences here and below.  Encoders
1466                  producing them are incorrect, they can be misleading,
1467                  and they mess up read/write invariance.  */
1468               if (c < 128)
1469                 goto invalid_code;
1470             }
1471           else
1472             {
1473               ONE_MORE_BYTE (c3);
1474               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1475                 goto invalid_code;
1476               if (UTF_8_3_OCTET_LEADING_P (c1))
1477                 {
1478                   c = (((c1 & 0xF) << 12)
1479                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1480                   if (c < 0x800
1481                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1482                     goto invalid_code;
1483                 }
1484               else
1485                 {
1486                   ONE_MORE_BYTE (c4);
1487                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1488                     goto invalid_code;
1489                   if (UTF_8_4_OCTET_LEADING_P (c1))
1490                     {
1491                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1492                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1493                     if (c < 0x10000)
1494                       goto invalid_code;
1495                     }
1496                   else
1497                     {
1498                       ONE_MORE_BYTE (c5);
1499                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1500                         goto invalid_code;
1501                       if (UTF_8_5_OCTET_LEADING_P (c1))
1502                         {
1503                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1504                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1505                                | (c5 & 0x3F));
1506                           if ((c > MAX_CHAR) || (c < 0x200000))
1507                             goto invalid_code;
1508                         }
1509                       else
1510                         goto invalid_code;
1511                     }
1512                 }
1513             }
1514         }
1515
1516       *charbuf++ = c;
1517       continue;
1518
1519     invalid_code:
1520       src = src_base;
1521       consumed_chars = consumed_chars_base;
1522       ONE_MORE_BYTE (c);
1523       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1524       coding->errors++;
1525     }
1526
1527  no_more_source:
1528   coding->consumed_char += consumed_chars_base;
1529   coding->consumed = src_base - coding->source;
1530   coding->charbuf_used = charbuf - coding->charbuf;
1531 }
1532
1533
1534 static int
1535 encode_coding_utf_8 (coding)
1536      struct coding_system *coding;
1537 {
1538   int multibytep = coding->dst_multibyte;
1539   int *charbuf = coding->charbuf;
1540   int *charbuf_end = charbuf + coding->charbuf_used;
1541   unsigned char *dst = coding->destination + coding->produced;
1542   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1543   int produced_chars = 0;
1544   int c;
1545
1546   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1547     {
1548       ASSURE_DESTINATION (3);
1549       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1550       CODING_UTF_8_BOM (coding) = utf_without_bom;
1551     }
1552
1553   if (multibytep)
1554     {
1555       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1556
1557       while (charbuf < charbuf_end)
1558         {
1559           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1560
1561           ASSURE_DESTINATION (safe_room);
1562           c = *charbuf++;
1563           if (CHAR_BYTE8_P (c))
1564             {
1565               c = CHAR_TO_BYTE8 (c);
1566               EMIT_ONE_BYTE (c);
1567             }
1568           else
1569             {
1570               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1571               for (p = str; p < pend; p++)
1572                 EMIT_ONE_BYTE (*p);
1573             }
1574         }
1575     }
1576   else
1577     {
1578       int safe_room = MAX_MULTIBYTE_LENGTH;
1579
1580       while (charbuf < charbuf_end)
1581         {
1582           ASSURE_DESTINATION (safe_room);
1583           c = *charbuf++;
1584           if (CHAR_BYTE8_P (c))
1585             *dst++ = CHAR_TO_BYTE8 (c);
1586           else
1587             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1588           produced_chars++;
1589         }
1590     }
1591   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1592   coding->produced_char += produced_chars;
1593   coding->produced = dst - coding->destination;
1594   return 0;
1595 }
1596
1597
1598 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1599    Check if a text is encoded in one of UTF-16 based coding systems.
1600    If it is, return 1, else return 0.  */
1601
1602 #define UTF_16_HIGH_SURROGATE_P(val) \
1603   (((val) & 0xFC00) == 0xD800)
1604
1605 #define UTF_16_LOW_SURROGATE_P(val) \
1606   (((val) & 0xFC00) == 0xDC00)
1607
1608 #define UTF_16_INVALID_P(val)   \
1609   (((val) == 0xFFFE)            \
1610    || ((val) == 0xFFFF)         \
1611    || UTF_16_LOW_SURROGATE_P (val))
1612
1613
1614 static int
1615 detect_coding_utf_16 (coding, detect_info)
1616      struct coding_system *coding;
1617      struct coding_detection_info *detect_info;
1618 {
1619   const unsigned char *src = coding->source, *src_base = src;
1620   const unsigned char *src_end = coding->source + coding->src_bytes;
1621   int multibytep = coding->src_multibyte;
1622   int consumed_chars = 0;
1623   int c1, c2;
1624
1625   detect_info->checked |= CATEGORY_MASK_UTF_16;
1626   if (coding->mode & CODING_MODE_LAST_BLOCK
1627       && (coding->src_chars & 1))
1628     {
1629       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1630       return 0;
1631     }
1632
1633   TWO_MORE_BYTES (c1, c2);
1634   if ((c1 == 0xFF) && (c2 == 0xFE))
1635     {
1636       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1637                              | CATEGORY_MASK_UTF_16_AUTO);
1638       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1639                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1640                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1641     }
1642   else if ((c1 == 0xFE) && (c2 == 0xFF))
1643     {
1644       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1645                              | CATEGORY_MASK_UTF_16_AUTO);
1646       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1647                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1648                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1649     }
1650   else if (c2 < 0)
1651     {
1652       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1653       return 0;
1654     }
1655   else
1656     {
1657       /* We check the dispersion of Eth and Oth bytes where E is even and
1658          O is odd.  If both are high, we assume binary data.*/
1659       unsigned char e[256], o[256];
1660       unsigned e_num = 1, o_num = 1;
1661
1662       memset (e, 0, 256);
1663       memset (o, 0, 256);
1664       e[c1] = 1;
1665       o[c2] = 1;
1666
1667       detect_info->rejected
1668         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1669
1670       while (1)
1671         {
1672           TWO_MORE_BYTES (c1, c2);
1673           if (c2 < 0)
1674             break;
1675           if (! e[c1])
1676             {
1677               e[c1] = 1;
1678               e_num++;
1679               if (e_num >= 128)
1680                 break;
1681             }
1682           if (! o[c2])
1683             {
1684               o[c1] = 1;
1685               o_num++;
1686               if (o_num >= 128)
1687                 break;
1688             }
1689         }
1690       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1691       return 0;
1692     }
1693
1694  no_more_source:
1695   return 1;
1696 }
1697
1698 static void
1699 decode_coding_utf_16 (coding)
1700      struct coding_system *coding;
1701 {
1702   const unsigned char *src = coding->source + coding->consumed;
1703   const unsigned char *src_end = coding->source + coding->src_bytes;
1704   const unsigned char *src_base;
1705   int *charbuf = coding->charbuf + coding->charbuf_used;
1706   /* We may produces at most 3 chars in one loop.  */
1707   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1708   int consumed_chars = 0, consumed_chars_base = 0;
1709   int multibytep = coding->src_multibyte;
1710   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1711   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1712   int surrogate = CODING_UTF_16_SURROGATE (coding);
1713   Lisp_Object attr, charset_list;
1714   int eol_crlf =
1715     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1716   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1717
1718   CODING_GET_INFO (coding, attr, charset_list);
1719
1720   if (bom == utf_with_bom)
1721     {
1722       int c, c1, c2;
1723
1724       src_base = src;
1725       ONE_MORE_BYTE (c1);
1726       ONE_MORE_BYTE (c2);
1727       c = (c1 << 8) | c2;
1728
1729       if (endian == utf_16_big_endian
1730           ? c != 0xFEFF : c != 0xFFFE)
1731         {
1732           /* The first two bytes are not BOM.  Treat them as bytes
1733              for a normal character.  */
1734           src = src_base;
1735           coding->errors++;
1736         }
1737       CODING_UTF_16_BOM (coding) = utf_without_bom;
1738     }
1739   else if (bom == utf_detect_bom)
1740     {
1741       /* We have already tried to detect BOM and failed in
1742          detect_coding.  */
1743       CODING_UTF_16_BOM (coding) = utf_without_bom;
1744     }
1745
1746   while (1)
1747     {
1748       int c, c1, c2;
1749
1750       src_base = src;
1751       consumed_chars_base = consumed_chars;
1752
1753       if (charbuf >= charbuf_end)
1754         {
1755           if (byte_after_cr1 >= 0)
1756             src_base -= 2;
1757           break;
1758         }
1759
1760       if (byte_after_cr1 >= 0)
1761         c1 = byte_after_cr1, byte_after_cr1 = -1;
1762       else
1763         ONE_MORE_BYTE (c1);
1764       if (c1 < 0)
1765         {
1766           *charbuf++ = -c1;
1767           continue;
1768         }
1769       if (byte_after_cr2 >= 0)
1770         c2 = byte_after_cr2, byte_after_cr2 = -1;
1771       else
1772         ONE_MORE_BYTE (c2);
1773       if (c2 < 0)
1774         {
1775           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1776           *charbuf++ = -c2;
1777           continue;
1778         }
1779       c = (endian == utf_16_big_endian
1780            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1781
1782       if (surrogate)
1783         {
1784           if (! UTF_16_LOW_SURROGATE_P (c))
1785             {
1786               if (endian == utf_16_big_endian)
1787                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1788               else
1789                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1790               *charbuf++ = c1;
1791               *charbuf++ = c2;
1792               coding->errors++;
1793               if (UTF_16_HIGH_SURROGATE_P (c))
1794                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1795               else
1796                 *charbuf++ = c;
1797             }
1798           else
1799             {
1800               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1801               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1802               *charbuf++ = 0x10000 + c;
1803             }
1804         }
1805       else
1806         {
1807           if (UTF_16_HIGH_SURROGATE_P (c))
1808             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1809           else
1810             {
1811               if (eol_crlf && c == '\r')
1812                 {
1813                   ONE_MORE_BYTE (byte_after_cr1);
1814                   ONE_MORE_BYTE (byte_after_cr2);
1815                 }
1816               *charbuf++ = c;
1817             }
1818         }
1819     }
1820
1821  no_more_source:
1822   coding->consumed_char += consumed_chars_base;
1823   coding->consumed = src_base - coding->source;
1824   coding->charbuf_used = charbuf - coding->charbuf;
1825 }
1826
1827 static int
1828 encode_coding_utf_16 (coding)
1829      struct coding_system *coding;
1830 {
1831   int multibytep = coding->dst_multibyte;
1832   int *charbuf = coding->charbuf;
1833   int *charbuf_end = charbuf + coding->charbuf_used;
1834   unsigned char *dst = coding->destination + coding->produced;
1835   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1836   int safe_room = 8;
1837   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1838   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1839   int produced_chars = 0;
1840   Lisp_Object attrs, charset_list;
1841   int c;
1842
1843   CODING_GET_INFO (coding, attrs, charset_list);
1844
1845   if (bom != utf_without_bom)
1846     {
1847       ASSURE_DESTINATION (safe_room);
1848       if (big_endian)
1849         EMIT_TWO_BYTES (0xFE, 0xFF);
1850       else
1851         EMIT_TWO_BYTES (0xFF, 0xFE);
1852       CODING_UTF_16_BOM (coding) = utf_without_bom;
1853     }
1854
1855   while (charbuf < charbuf_end)
1856     {
1857       ASSURE_DESTINATION (safe_room);
1858       c = *charbuf++;
1859       if (c >= MAX_UNICODE_CHAR)
1860         c = coding->default_char;
1861
1862       if (c < 0x10000)
1863         {
1864           if (big_endian)
1865             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1866           else
1867             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1868         }
1869       else
1870         {
1871           int c1, c2;
1872
1873           c -= 0x10000;
1874           c1 = (c >> 10) + 0xD800;
1875           c2 = (c & 0x3FF) + 0xDC00;
1876           if (big_endian)
1877             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1878           else
1879             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1880         }
1881     }
1882   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1883   coding->produced = dst - coding->destination;
1884   coding->produced_char += produced_chars;
1885   return 0;
1886 }
1887
1888 \f
1889 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1890
1891 /* Emacs' internal format for representation of multiple character
1892    sets is a kind of multi-byte encoding, i.e. characters are
1893    represented by variable-length sequences of one-byte codes.
1894
1895    ASCII characters and control characters (e.g. `tab', `newline') are
1896    represented by one-byte sequences which are their ASCII codes, in
1897    the range 0x00 through 0x7F.
1898
1899    8-bit characters of the range 0x80..0x9F are represented by
1900    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1901    code + 0x20).
1902
1903    8-bit characters of the range 0xA0..0xFF are represented by
1904    one-byte sequences which are their 8-bit code.
1905
1906    The other characters are represented by a sequence of `base
1907    leading-code', optional `extended leading-code', and one or two
1908    `position-code's.  The length of the sequence is determined by the
1909    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1910    whereas extended leading-code and position-code take the range 0xA0
1911    through 0xFF.  See `charset.h' for more details about leading-code
1912    and position-code.
1913
1914    --- CODE RANGE of Emacs' internal format ---
1915    character set        range
1916    -------------        -----
1917    ascii                0x00..0x7F
1918    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1919    eight-bit-graphic    0xA0..0xBF
1920    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1921    ---------------------------------------------
1922
1923    As this is the internal character representation, the format is
1924    usually not used externally (i.e. in a file or in a data sent to a
1925    process).  But, it is possible to have a text externally in this
1926    format (i.e. by encoding by the coding system `emacs-mule').
1927
1928    In that case, a sequence of one-byte codes has a slightly different
1929    form.
1930
1931    At first, all characters in eight-bit-control are represented by
1932    one-byte sequences which are their 8-bit code.
1933
1934    Next, character composition data are represented by the byte
1935    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1936    where,
1937         METHOD is 0xF2 plus one of composition method (enum
1938         composition_method),
1939
1940         BYTES is 0xA0 plus a byte length of this composition data,
1941
1942         CHARS is 0xA0 plus a number of characters composed by this
1943         data,
1944
1945         COMPONENTs are characters of multibye form or composition
1946         rules encoded by two-byte of ASCII codes.
1947
1948    In addition, for backward compatibility, the following formats are
1949    also recognized as composition data on decoding.
1950
1951    0x80 MSEQ ...
1952    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1953
1954    Here,
1955         MSEQ is a multibyte form but in these special format:
1956           ASCII: 0xA0 ASCII_CODE+0x80,
1957           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1958         RULE is a one byte code of the range 0xA0..0xF0 that
1959         represents a composition rule.
1960   */
1961
1962 char emacs_mule_bytes[256];
1963
1964
1965 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1966    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1967    else return 0.  */
1968
1969 static int
1970 detect_coding_emacs_mule (coding, detect_info)
1971      struct coding_system *coding;
1972      struct coding_detection_info *detect_info;
1973 {
1974   const unsigned char *src = coding->source, *src_base;
1975   const unsigned char *src_end = coding->source + coding->src_bytes;
1976   int multibytep = coding->src_multibyte;
1977   int consumed_chars = 0;
1978   int c;
1979   int found = 0;
1980
1981   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1982   /* A coding system of this category is always ASCII compatible.  */
1983   src += coding->head_ascii;
1984
1985   while (1)
1986     {
1987       src_base = src;
1988       ONE_MORE_BYTE (c);
1989       if (c < 0)
1990         continue;
1991       if (c == 0x80)
1992         {
1993           /* Perhaps the start of composite character.  We simply skip
1994              it because analyzing it is too heavy for detecting.  But,
1995              at least, we check that the composite character
1996              constitutes of more than 4 bytes.  */
1997           const unsigned char *src_base;
1998
1999         repeat:
2000           src_base = src;
2001           do
2002             {
2003               ONE_MORE_BYTE (c);
2004             }
2005           while (c >= 0xA0);
2006
2007           if (src - src_base <= 4)
2008             break;
2009           found = CATEGORY_MASK_EMACS_MULE;
2010           if (c == 0x80)
2011             goto repeat;
2012         }
2013
2014       if (c < 0x80)
2015         {
2016           if (c < 0x20
2017               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2018             break;
2019         }
2020       else
2021         {
2022           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2023
2024           while (more_bytes > 0)
2025             {
2026               ONE_MORE_BYTE (c);
2027               if (c < 0xA0)
2028                 {
2029                   src--;        /* Unread the last byte.  */
2030                   break;
2031                 }
2032               more_bytes--;
2033             }
2034           if (more_bytes != 0)
2035             break;
2036           found = CATEGORY_MASK_EMACS_MULE;
2037         }
2038     }
2039   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2040   return 0;
2041
2042  no_more_source:
2043   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2044     {
2045       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2046       return 0;
2047     }
2048   detect_info->found |= found;
2049   return 1;
2050 }
2051
2052
2053 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2054    character.  If CMP_STATUS indicates that we must expect MSEQ or
2055    RULE described above, decode it and return the negative value of
2056    the deocded character or rule.  If an invalid byte is found, return
2057    -1.  If SRC is too short, return -2.  */
2058
2059 int
2060 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2061      struct coding_system *coding;
2062      const unsigned char *src;
2063      int *nbytes, *nchars, *id;
2064      struct composition_status *cmp_status;
2065 {
2066   const unsigned char *src_end = coding->source + coding->src_bytes;
2067   const unsigned char *src_base = src;
2068   int multibytep = coding->src_multibyte;
2069   struct charset *charset;
2070   unsigned code;
2071   int c;
2072   int consumed_chars = 0;
2073   int mseq_found = 0;
2074
2075   ONE_MORE_BYTE (c);
2076   if (c < 0)
2077     {
2078       c = -c;
2079       charset = emacs_mule_charset[0];
2080     }
2081   else
2082     {
2083       if (c >= 0xA0)
2084         {
2085           if (cmp_status->state != COMPOSING_NO
2086               && cmp_status->old_form)
2087             {
2088               if (cmp_status->state == COMPOSING_CHAR)
2089                 {
2090                   if (c == 0xA0)
2091                     {
2092                       ONE_MORE_BYTE (c);
2093                       c -= 0x80;
2094                       if (c < 0)
2095                         goto invalid_code;
2096                     }
2097                   else
2098                     c -= 0x20;
2099                   mseq_found = 1;
2100                 }
2101               else
2102                 {
2103                   *nbytes = src - src_base;
2104                   *nchars = consumed_chars;
2105                   return -c;
2106                 }
2107             }
2108           else
2109             goto invalid_code;
2110         }
2111
2112       switch (emacs_mule_bytes[c])
2113         {
2114         case 2:
2115           if (! (charset = emacs_mule_charset[c]))
2116             goto invalid_code;
2117           ONE_MORE_BYTE (c);
2118           if (c < 0xA0)
2119             goto invalid_code;
2120           code = c & 0x7F;
2121           break;
2122
2123         case 3:
2124           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2125               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2126             {
2127               ONE_MORE_BYTE (c);
2128               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2129                 goto invalid_code;
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0)
2132                 goto invalid_code;
2133               code = c & 0x7F;
2134             }
2135           else
2136             {
2137               if (! (charset = emacs_mule_charset[c]))
2138                 goto invalid_code;
2139               ONE_MORE_BYTE (c);
2140               if (c < 0xA0)
2141                 goto invalid_code;
2142               code = (c & 0x7F) << 8;
2143               ONE_MORE_BYTE (c);
2144               if (c < 0xA0)
2145                 goto invalid_code;
2146               code |= c & 0x7F;
2147             }
2148           break;
2149
2150         case 4:
2151           ONE_MORE_BYTE (c);
2152           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2153             goto invalid_code;
2154           ONE_MORE_BYTE (c);
2155           if (c < 0xA0)
2156             goto invalid_code;
2157           code = (c & 0x7F) << 8;
2158           ONE_MORE_BYTE (c);
2159           if (c < 0xA0)
2160             goto invalid_code;
2161           code |= c & 0x7F;
2162           break;
2163
2164         case 1:
2165           code = c;
2166           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2167                                      ? charset_ascii : charset_eight_bit);
2168           break;
2169
2170         default:
2171           abort ();
2172         }
2173       c = DECODE_CHAR (charset, code);
2174       if (c < 0)
2175         goto invalid_code;
2176     }
2177   *nbytes = src - src_base;
2178   *nchars = consumed_chars;
2179   if (id)
2180     *id = charset->id;
2181   return (mseq_found ? -c : c);
2182
2183  no_more_source:
2184   return -2;
2185
2186  invalid_code:
2187   return -1;
2188 }
2189
2190
2191 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2192
2193 /* Handle these composition sequence ('|': the end of header elements,
2194    BYTES and CHARS >= 0xA0):
2195
2196    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2197    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2198    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2199
2200    and these old form:
2201
2202    (4) relative composition: 0x80 | MSEQ ... MSEQ
2203    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2204
2205    When the starter 0x80 and the following header elements are found,
2206    this annotation header is produced.
2207
2208         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2209
2210    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2211    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2212
2213    Then, upon reading the following elements, these codes are produced
2214    until the composition end is found:
2215
2216    (1) CHAR ... CHAR
2217    (2) ALT ... ALT CHAR ... CHAR
2218    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2219    (4) CHAR ... CHAR
2220    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2221
2222    When the composition end is found, LENGTH and NCHARS in the
2223    annotation header is updated as below:
2224
2225    (1) LENGTH: unchanged, NCHARS: unchanged
2226    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2227    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2228    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2229    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2230
2231    If an error is found while composing, the annotation header is
2232    changed to the original composition header (plus filler -1s) as
2233    below:
2234
2235    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2236    (5)          [ 0x80 0xFF -1 -1- -1 ]
2237
2238    and the sequence [ -2 DECODED-RULE ] is changed to the original
2239    byte sequence as below:
2240         o the original byte sequence is B: [ B -1 ]
2241         o the original byte sequence is B1 B2: [ B1 B2 ]
2242
2243    Most of the routines are implemented by macros because many
2244    variables and labels in the caller decode_coding_emacs_mule must be
2245    accessible, and they are usually called just once (thus doesn't
2246    increase the size of compiled object).  */
2247
2248 /* Decode a composition rule represented by C as a component of
2249    composition sequence of Emacs 20 style.  Set RULE to the decoded
2250    rule. */
2251
2252 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2253   do {                                                  \
2254     int gref, nref;                                     \
2255                                                         \
2256     c -= 0xA0;                                          \
2257     if (c < 0 || c >= 81)                               \
2258       goto invalid_code;                                \
2259     gref = c / 9, nref = c % 9;                         \
2260     if (gref == 4) gref = 10;                           \
2261     if (nref == 4) nref = 10;                           \
2262     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2263   } while (0)
2264
2265
2266 /* Decode a composition rule represented by C and the following byte
2267    at SRC as a component of composition sequence of Emacs 21 style.
2268    Set RULE to the decoded rule.  */
2269
2270 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2271   do {                                                  \
2272     int gref, nref;                                     \
2273                                                         \
2274     gref = c - 0x20;                                    \
2275     if (gref < 0 || gref >= 81)                         \
2276       goto invalid_code;                                \
2277     ONE_MORE_BYTE (c);                                  \
2278     nref = c - 0x20;                                    \
2279     if (nref < 0 || nref >= 81)                         \
2280       goto invalid_code;                                \
2281     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2282   } while (0)
2283
2284
2285 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2286    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2287    byte length of this composition information, CHARS is the number of
2288    characters composed by this composition.  */
2289
2290 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2291   do {                                                                  \
2292     enum composition_method method = c - 0xF2;                          \
2293     int *charbuf_base = charbuf;                                        \
2294     int nbytes, nchars;                                                 \
2295                                                                         \
2296     ONE_MORE_BYTE (c);                                                  \
2297     if (c < 0)                                                          \
2298       goto invalid_code;                                                \
2299     nbytes = c - 0xA0;                                                  \
2300     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2301       goto invalid_code;                                                \
2302     ONE_MORE_BYTE (c);                                                  \
2303     nchars = c - 0xA0;                                                  \
2304     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2305       goto invalid_code;                                                \
2306     cmp_status->old_form = 0;                                           \
2307     cmp_status->method = method;                                        \
2308     if (method == COMPOSITION_RELATIVE)                                 \
2309       cmp_status->state = COMPOSING_CHAR;                               \
2310     else                                                                \
2311       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2312     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2313     cmp_status->nchars = nchars;                                        \
2314     cmp_status->ncomps = nbytes - 4;                                    \
2315     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2316   } while (0)
2317
2318
2319 /* Start of Emacs 20 style format for relative composition.  */
2320
2321 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2322   do {                                                          \
2323     cmp_status->old_form = 1;                                   \
2324     cmp_status->method = COMPOSITION_RELATIVE;                  \
2325     cmp_status->state = COMPOSING_CHAR;                         \
2326     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2327     cmp_status->nchars = cmp_status->ncomps = 0;                \
2328     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2329   } while (0)
2330
2331
2332 /* Start of Emacs 20 style format for rule-base composition.  */
2333
2334 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2335   do {                                                          \
2336     cmp_status->old_form = 1;                                   \
2337     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2338     cmp_status->state = COMPOSING_CHAR;                         \
2339     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2340     cmp_status->nchars = cmp_status->ncomps = 0;                \
2341     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2342   } while (0)
2343
2344
2345 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2346   do {                                                  \
2347     const unsigned char *current_src = src;             \
2348                                                         \
2349     ONE_MORE_BYTE (c);                                  \
2350     if (c < 0)                                          \
2351       goto invalid_code;                                \
2352     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2353         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2354       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2355     else if (c < 0xA0)                                  \
2356       goto invalid_code;                                \
2357     else if (c < 0xC0)                                  \
2358       {                                                 \
2359         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2360         /* Re-read C as a composition component.  */    \
2361         src = current_src;                              \
2362       }                                                 \
2363     else if (c == 0xFF)                                 \
2364       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2365     else                                                \
2366       goto invalid_code;                                \
2367   } while (0)
2368
2369 #define EMACS_MULE_COMPOSITION_END()                            \
2370   do {                                                          \
2371     int idx = - cmp_status->length;                             \
2372                                                                 \
2373     if (cmp_status->old_form)                                   \
2374       charbuf[idx + 2] = cmp_status->nchars;                    \
2375     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2376       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2377     cmp_status->state = COMPOSING_NO;                           \
2378   } while (0)
2379
2380
2381 static int
2382 emacs_mule_finish_composition (charbuf, cmp_status)
2383      int *charbuf;
2384      struct composition_status *cmp_status;
2385 {
2386   int idx = - cmp_status->length;
2387   int new_chars;
2388
2389   if (cmp_status->old_form && cmp_status->nchars > 0)
2390     {
2391       charbuf[idx + 2] = cmp_status->nchars;
2392       new_chars = 0;
2393       if (cmp_status->method == COMPOSITION_WITH_RULE
2394           && cmp_status->state == COMPOSING_CHAR)
2395         {
2396           /* The last rule was invalid.  */
2397           int rule = charbuf[-1] + 0xA0;
2398
2399           charbuf[-2] = BYTE8_TO_CHAR (rule);
2400           charbuf[-1] = -1;
2401           new_chars = 1;
2402         }
2403     }
2404   else
2405     {
2406       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2407
2408       if (cmp_status->method == COMPOSITION_WITH_RULE)
2409         {
2410           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2411           charbuf[idx++] = -3;
2412           charbuf[idx++] = 0;
2413           new_chars = 1;
2414         }
2415       else
2416         {
2417           int nchars = charbuf[idx + 1] + 0xA0;
2418           int nbytes = charbuf[idx + 2] + 0xA0;
2419
2420           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2421           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2422           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2423           charbuf[idx++] = -1;
2424           new_chars = 4;
2425         }
2426     }
2427   cmp_status->state = COMPOSING_NO;
2428   return new_chars;
2429 }
2430
2431 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2432   do {                                                                    \
2433     if (cmp_status->state != COMPOSING_NO)                                \
2434       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2435   } while (0)
2436
2437
2438 static void
2439 decode_coding_emacs_mule (coding)
2440      struct coding_system *coding;
2441 {
2442   const unsigned char *src = coding->source + coding->consumed;
2443   const unsigned char *src_end = coding->source + coding->src_bytes;
2444   const unsigned char *src_base;
2445   int *charbuf = coding->charbuf + coding->charbuf_used;
2446   /* We may produce two annocations (charset and composition) in one
2447      loop and one more charset annocation at the end.  */
2448   int *charbuf_end
2449     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2450   int consumed_chars = 0, consumed_chars_base;
2451   int multibytep = coding->src_multibyte;
2452   Lisp_Object attrs, charset_list;
2453   int char_offset = coding->produced_char;
2454   int last_offset = char_offset;
2455   int last_id = charset_ascii;
2456   int eol_crlf =
2457     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2458   int byte_after_cr = -1;
2459   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2460
2461   CODING_GET_INFO (coding, attrs, charset_list);
2462
2463   if (cmp_status->state != COMPOSING_NO)
2464     {
2465       int i;
2466
2467       for (i = 0; i < cmp_status->length; i++)
2468         *charbuf++ = cmp_status->carryover[i];
2469       coding->annotated = 1;
2470     }
2471
2472   while (1)
2473     {
2474       int c, id;
2475
2476       src_base = src;
2477       consumed_chars_base = consumed_chars;
2478
2479       if (charbuf >= charbuf_end)
2480         {
2481           if (byte_after_cr >= 0)
2482             src_base--;
2483           break;
2484         }
2485
2486       if (byte_after_cr >= 0)
2487         c = byte_after_cr, byte_after_cr = -1;
2488       else
2489         ONE_MORE_BYTE (c);
2490
2491       if (c < 0 || c == 0x80)
2492         {
2493           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2494           if (c < 0)
2495             {
2496               *charbuf++ = -c;
2497               char_offset++;
2498             }
2499           else
2500             DECODE_EMACS_MULE_COMPOSITION_START ();
2501           continue;
2502         }
2503
2504       if (c < 0x80)
2505         {
2506           if (eol_crlf && c == '\r')
2507             ONE_MORE_BYTE (byte_after_cr);
2508           id = charset_ascii;
2509           if (cmp_status->state != COMPOSING_NO)
2510             {
2511               if (cmp_status->old_form)
2512                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2513               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2514                 cmp_status->ncomps--;
2515             }
2516         }
2517       else
2518         {
2519           int nchars, nbytes;
2520
2521           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2522                                cmp_status);
2523           if (c < 0)
2524             {
2525               if (c == -1)
2526                 goto invalid_code;
2527               if (c == -2)
2528                 break;
2529             }
2530           src = src_base + nbytes;
2531           consumed_chars = consumed_chars_base + nchars;
2532           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2533             cmp_status->ncomps -= nchars;
2534         }
2535
2536       /* Now if C >= 0, we found a normally encoded characer, if C <
2537          0, we found an old-style composition component character or
2538          rule.  */
2539
2540       if (cmp_status->state == COMPOSING_NO)
2541         {
2542           if (last_id != id)
2543             {
2544               if (last_id != charset_ascii)
2545                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2546                                   last_id);
2547               last_id = id;
2548               last_offset = char_offset;
2549             }
2550           *charbuf++ = c;
2551           char_offset++;
2552         }
2553       else if (cmp_status->state == COMPOSING_CHAR)
2554         {
2555           if (cmp_status->old_form)
2556             {
2557               if (c >= 0)
2558                 {
2559                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2560                   *charbuf++ = c;
2561                   char_offset++;
2562                 }
2563               else
2564                 {
2565                   *charbuf++ = -c;
2566                   cmp_status->nchars++;
2567                   cmp_status->length++;
2568                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2569                     EMACS_MULE_COMPOSITION_END ();
2570                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2571                     cmp_status->state = COMPOSING_RULE;
2572                 }
2573             }
2574           else
2575             {
2576               *charbuf++ = c;
2577               cmp_status->length++;
2578               cmp_status->nchars--;
2579               if (cmp_status->nchars == 0)
2580                 EMACS_MULE_COMPOSITION_END ();
2581             }
2582         }
2583       else if (cmp_status->state == COMPOSING_RULE)
2584         {
2585           int rule;
2586
2587           if (c >= 0)
2588             {
2589               EMACS_MULE_COMPOSITION_END ();
2590               *charbuf++ = c;
2591               char_offset++;
2592             }
2593           else
2594             {
2595               c = -c;
2596               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2597               if (rule < 0)
2598                 goto invalid_code;
2599               *charbuf++ = -2;
2600               *charbuf++ = rule;
2601               cmp_status->length += 2;
2602               cmp_status->state = COMPOSING_CHAR;
2603             }
2604         }
2605       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2606         {
2607           *charbuf++ = c;
2608           cmp_status->length++;
2609           if (cmp_status->ncomps == 0)
2610             cmp_status->state = COMPOSING_CHAR;
2611           else if (cmp_status->ncomps > 0)
2612             {
2613               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2614                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2615             }
2616           else
2617             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2618         }
2619       else                      /* COMPOSING_COMPONENT_RULE */
2620         {
2621           int rule;
2622
2623           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2624           if (rule < 0)
2625             goto invalid_code;
2626           *charbuf++ = -2;
2627           *charbuf++ = rule;
2628           cmp_status->length += 2;
2629           cmp_status->ncomps--;
2630           if (cmp_status->ncomps > 0)
2631             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2632           else
2633             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2634         }
2635       continue;
2636
2637     retry:
2638       src = src_base;
2639       consumed_chars = consumed_chars_base;
2640       continue;
2641
2642     invalid_code:
2643       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2644       src = src_base;
2645       consumed_chars = consumed_chars_base;
2646       ONE_MORE_BYTE (c);
2647       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2648       char_offset++;
2649       coding->errors++;
2650     }
2651
2652  no_more_source:
2653   if (cmp_status->state != COMPOSING_NO)
2654     {
2655       if (coding->mode & CODING_MODE_LAST_BLOCK)
2656         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2657       else
2658         {
2659           int i;
2660
2661           charbuf -= cmp_status->length;
2662           for (i = 0; i < cmp_status->length; i++)
2663             cmp_status->carryover[i] = charbuf[i];
2664         }
2665     }
2666   if (last_id != charset_ascii)
2667     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2668   coding->consumed_char += consumed_chars_base;
2669   coding->consumed = src_base - coding->source;
2670   coding->charbuf_used = charbuf - coding->charbuf;
2671 }
2672
2673
2674 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2675   do {                                          \
2676     if (id < 0xA0)                              \
2677       codes[0] = id, codes[1] = 0;              \
2678     else if (id < 0xE0)                         \
2679       codes[0] = 0x9A, codes[1] = id;           \
2680     else if (id < 0xF0)                         \
2681       codes[0] = 0x9B, codes[1] = id;           \
2682     else if (id < 0xF5)                         \
2683       codes[0] = 0x9C, codes[1] = id;           \
2684     else                                        \
2685       codes[0] = 0x9D, codes[1] = id;           \
2686   } while (0);
2687
2688
2689 static int
2690 encode_coding_emacs_mule (coding)
2691      struct coding_system *coding;
2692 {
2693   int multibytep = coding->dst_multibyte;
2694   int *charbuf = coding->charbuf;
2695   int *charbuf_end = charbuf + coding->charbuf_used;
2696   unsigned char *dst = coding->destination + coding->produced;
2697   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2698   int safe_room = 8;
2699   int produced_chars = 0;
2700   Lisp_Object attrs, charset_list;
2701   int c;
2702   int preferred_charset_id = -1;
2703
2704   CODING_GET_INFO (coding, attrs, charset_list);
2705   if (! EQ (charset_list, Vemacs_mule_charset_list))
2706     {
2707       CODING_ATTR_CHARSET_LIST (attrs)
2708         = charset_list = Vemacs_mule_charset_list;
2709     }
2710
2711   while (charbuf < charbuf_end)
2712     {
2713       ASSURE_DESTINATION (safe_room);
2714       c = *charbuf++;
2715
2716       if (c < 0)
2717         {
2718           /* Handle an annotation.  */
2719           switch (*charbuf)
2720             {
2721             case CODING_ANNOTATE_COMPOSITION_MASK:
2722               /* Not yet implemented.  */
2723               break;
2724             case CODING_ANNOTATE_CHARSET_MASK:
2725               preferred_charset_id = charbuf[3];
2726               if (preferred_charset_id >= 0
2727                   && NILP (Fmemq (make_number (preferred_charset_id),
2728                                   charset_list)))
2729                 preferred_charset_id = -1;
2730               break;
2731             default:
2732               abort ();
2733             }
2734           charbuf += -c - 1;
2735           continue;
2736         }
2737
2738       if (ASCII_CHAR_P (c))
2739         EMIT_ONE_ASCII_BYTE (c);
2740       else if (CHAR_BYTE8_P (c))
2741         {
2742           c = CHAR_TO_BYTE8 (c);
2743           EMIT_ONE_BYTE (c);
2744         }
2745       else
2746         {
2747           struct charset *charset;
2748           unsigned code;
2749           int dimension;
2750           int emacs_mule_id;
2751           unsigned char leading_codes[2];
2752
2753           if (preferred_charset_id >= 0)
2754             {
2755               charset = CHARSET_FROM_ID (preferred_charset_id);
2756               if (CHAR_CHARSET_P (c, charset))
2757                 code = ENCODE_CHAR (charset, c);
2758               else
2759                 charset = char_charset (c, charset_list, &code);
2760             }
2761           else
2762             charset = char_charset (c, charset_list, &code);
2763           if (! charset)
2764             {
2765               c = coding->default_char;
2766               if (ASCII_CHAR_P (c))
2767                 {
2768                   EMIT_ONE_ASCII_BYTE (c);
2769                   continue;
2770                 }
2771               charset = char_charset (c, charset_list, &code);
2772             }
2773           dimension = CHARSET_DIMENSION (charset);
2774           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2775           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2776           EMIT_ONE_BYTE (leading_codes[0]);
2777           if (leading_codes[1])
2778             EMIT_ONE_BYTE (leading_codes[1]);
2779           if (dimension == 1)
2780             EMIT_ONE_BYTE (code | 0x80);
2781           else
2782             {
2783               code |= 0x8080;
2784               EMIT_ONE_BYTE (code >> 8);
2785               EMIT_ONE_BYTE (code & 0xFF);
2786             }
2787         }
2788     }
2789   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2790   coding->produced_char += produced_chars;
2791   coding->produced = dst - coding->destination;
2792   return 0;
2793 }
2794
2795 \f
2796 /*** 7. ISO2022 handlers ***/
2797
2798 /* The following note describes the coding system ISO2022 briefly.
2799    Since the intention of this note is to help understand the
2800    functions in this file, some parts are NOT ACCURATE or are OVERLY
2801    SIMPLIFIED.  For thorough understanding, please refer to the
2802    original document of ISO2022.  This is equivalent to the standard
2803    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2804
2805    ISO2022 provides many mechanisms to encode several character sets
2806    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2807    is encoded using bytes less than 128.  This may make the encoded
2808    text a little bit longer, but the text passes more easily through
2809    several types of gateway, some of which strip off the MSB (Most
2810    Significant Bit).
2811
2812    There are two kinds of character sets: control character sets and
2813    graphic character sets.  The former contain control characters such
2814    as `newline' and `escape' to provide control functions (control
2815    functions are also provided by escape sequences).  The latter
2816    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2817    two control character sets and many graphic character sets.
2818
2819    Graphic character sets are classified into one of the following
2820    four classes, according to the number of bytes (DIMENSION) and
2821    number of characters in one dimension (CHARS) of the set:
2822    - DIMENSION1_CHARS94
2823    - DIMENSION1_CHARS96
2824    - DIMENSION2_CHARS94
2825    - DIMENSION2_CHARS96
2826
2827    In addition, each character set is assigned an identification tag,
2828    unique for each set, called the "final character" (denoted as <F>
2829    hereafter).  The <F> of each character set is decided by ECMA(*)
2830    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2831    (0x30..0x3F are for private use only).
2832
2833    Note (*): ECMA = European Computer Manufacturers Association
2834
2835    Here are examples of graphic character sets [NAME(<F>)]:
2836         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2837         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2838         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2839         o DIMENSION2_CHARS96 -- none for the moment
2840
2841    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2842         C0 [0x00..0x1F] -- control character plane 0
2843         GL [0x20..0x7F] -- graphic character plane 0
2844         C1 [0x80..0x9F] -- control character plane 1
2845         GR [0xA0..0xFF] -- graphic character plane 1
2846
2847    A control character set is directly designated and invoked to C0 or
2848    C1 by an escape sequence.  The most common case is that:
2849    - ISO646's  control character set is designated/invoked to C0, and
2850    - ISO6429's control character set is designated/invoked to C1,
2851    and usually these designations/invocations are omitted in encoded
2852    text.  In a 7-bit environment, only C0 can be used, and a control
2853    character for C1 is encoded by an appropriate escape sequence to
2854    fit into the environment.  All control characters for C1 are
2855    defined to have corresponding escape sequences.
2856
2857    A graphic character set is at first designated to one of four
2858    graphic registers (G0 through G3), then these graphic registers are
2859    invoked to GL or GR.  These designations and invocations can be
2860    done independently.  The most common case is that G0 is invoked to
2861    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2862    these invocations and designations are omitted in encoded text.
2863    In a 7-bit environment, only GL can be used.
2864
2865    When a graphic character set of CHARS94 is invoked to GL, codes
2866    0x20 and 0x7F of the GL area work as control characters SPACE and
2867    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2868    be used.
2869
2870    There are two ways of invocation: locking-shift and single-shift.
2871    With locking-shift, the invocation lasts until the next different
2872    invocation, whereas with single-shift, the invocation affects the
2873    following character only and doesn't affect the locking-shift
2874    state.  Invocations are done by the following control characters or
2875    escape sequences:
2876
2877    ----------------------------------------------------------------------
2878    abbrev  function                  cntrl escape seq   description
2879    ----------------------------------------------------------------------
2880    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2881    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2882    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2883    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2884    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2885    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2886    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2887    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2888    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2889    ----------------------------------------------------------------------
2890    (*) These are not used by any known coding system.
2891
2892    Control characters for these functions are defined by macros
2893    ISO_CODE_XXX in `coding.h'.
2894
2895    Designations are done by the following escape sequences:
2896    ----------------------------------------------------------------------
2897    escape sequence      description
2898    ----------------------------------------------------------------------
2899    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2900    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2901    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2902    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2903    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2904    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2905    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2906    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2907    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2908    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2909    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2910    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2911    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2912    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2913    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2914    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2915    ----------------------------------------------------------------------
2916
2917    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2918    of dimension 1, chars 94, and final character <F>, etc...
2919
2920    Note (*): Although these designations are not allowed in ISO2022,
2921    Emacs accepts them on decoding, and produces them on encoding
2922    CHARS96 character sets in a coding system which is characterized as
2923    7-bit environment, non-locking-shift, and non-single-shift.
2924
2925    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2926    '(' must be omitted.  We refer to this as "short-form" hereafter.
2927
2928    Now you may notice that there are a lot of ways of encoding the
2929    same multilingual text in ISO2022.  Actually, there exist many
2930    coding systems such as Compound Text (used in X11's inter client
2931    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2932    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2933    localized platforms), and all of these are variants of ISO2022.
2934
2935    In addition to the above, Emacs handles two more kinds of escape
2936    sequences: ISO6429's direction specification and Emacs' private
2937    sequence for specifying character composition.
2938
2939    ISO6429's direction specification takes the following form:
2940         o CSI ']'      -- end of the current direction
2941         o CSI '0' ']'  -- end of the current direction
2942         o CSI '1' ']'  -- start of left-to-right text
2943         o CSI '2' ']'  -- start of right-to-left text
2944    The control character CSI (0x9B: control sequence introducer) is
2945    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2946
2947    Character composition specification takes the following form:
2948         o ESC '0' -- start relative composition
2949         o ESC '1' -- end composition
2950         o ESC '2' -- start rule-base composition (*)
2951         o ESC '3' -- start relative composition with alternate chars  (**)
2952         o ESC '4' -- start rule-base composition with alternate chars  (**)
2953   Since these are not standard escape sequences of any ISO standard,
2954   the use of them with these meanings is restricted to Emacs only.
2955
2956   (*) This form is used only in Emacs 20.7 and older versions,
2957   but newer versions can safely decode it.
2958   (**) This form is used only in Emacs 21.1 and newer versions,
2959   and older versions can't decode it.
2960
2961   Here's a list of example usages of these composition escape
2962   sequences (categorized by `enum composition_method').
2963
2964   COMPOSITION_RELATIVE:
2965         ESC 0 CHAR [ CHAR ] ESC 1
2966   COMPOSITION_WITH_RULE:
2967         ESC 2 CHAR [ RULE CHAR ] ESC 1
2968   COMPOSITION_WITH_ALTCHARS:
2969         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2970   COMPOSITION_WITH_RULE_ALTCHARS:
2971         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2972
2973 enum iso_code_class_type iso_code_class[256];
2974
2975 #define SAFE_CHARSET_P(coding, id)      \
2976   ((id) <= (coding)->max_charset_id     \
2977    && (coding)->safe_charsets[id] != 255)
2978
2979
2980 #define SHIFT_OUT_OK(category)  \
2981   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2982
2983 static void
2984 setup_iso_safe_charsets (attrs)
2985      Lisp_Object attrs;
2986 {
2987   Lisp_Object charset_list, safe_charsets;
2988   Lisp_Object request;
2989   Lisp_Object reg_usage;
2990   Lisp_Object tail;
2991   int reg94, reg96;
2992   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2993   int max_charset_id;
2994
2995   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2996   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2997       && ! EQ (charset_list, Viso_2022_charset_list))
2998     {
2999       CODING_ATTR_CHARSET_LIST (attrs)
3000         = charset_list = Viso_2022_charset_list;
3001       ASET (attrs, coding_attr_safe_charsets, Qnil);
3002     }
3003
3004   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3005     return;
3006
3007   max_charset_id = 0;
3008   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3009     {
3010       int id = XINT (XCAR (tail));
3011       if (max_charset_id < id)
3012         max_charset_id = id;
3013     }
3014
3015   safe_charsets = make_uninit_string (max_charset_id + 1);
3016   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3017   request = AREF (attrs, coding_attr_iso_request);
3018   reg_usage = AREF (attrs, coding_attr_iso_usage);
3019   reg94 = XINT (XCAR (reg_usage));
3020   reg96 = XINT (XCDR (reg_usage));
3021
3022   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3023     {
3024       Lisp_Object id;
3025       Lisp_Object reg;
3026       struct charset *charset;
3027
3028       id = XCAR (tail);
3029       charset = CHARSET_FROM_ID (XINT (id));
3030       reg = Fcdr (Fassq (id, request));
3031       if (! NILP (reg))
3032         SSET (safe_charsets, XINT (id), XINT (reg));
3033       else if (charset->iso_chars_96)
3034         {
3035           if (reg96 < 4)
3036             SSET (safe_charsets, XINT (id), reg96);
3037         }
3038       else
3039         {
3040           if (reg94 < 4)
3041             SSET (safe_charsets, XINT (id), reg94);
3042         }
3043     }
3044   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3045 }
3046
3047
3048 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3049    Check if a text is encoded in one of ISO-2022 based codig systems.
3050    If it is, return 1, else return 0.  */
3051
3052 static int
3053 detect_coding_iso_2022 (coding, detect_info)
3054      struct coding_system *coding;
3055      struct coding_detection_info *detect_info;
3056 {
3057   const unsigned char *src = coding->source, *src_base = src;
3058   const unsigned char *src_end = coding->source + coding->src_bytes;
3059   int multibytep = coding->src_multibyte;
3060   int single_shifting = 0;
3061   int id;
3062   int c, c1;
3063   int consumed_chars = 0;
3064   int i;
3065   int rejected = 0;
3066   int found = 0;
3067   int composition_count = -1;
3068
3069   detect_info->checked |= CATEGORY_MASK_ISO;
3070
3071   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3072     {
3073       struct coding_system *this = &(coding_categories[i]);
3074       Lisp_Object attrs, val;
3075
3076       if (this->id < 0)
3077         continue;
3078       attrs = CODING_ID_ATTRS (this->id);
3079       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3080           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3081         setup_iso_safe_charsets (attrs);
3082       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3083       this->max_charset_id = SCHARS (val) - 1;
3084       this->safe_charsets = SDATA (val);
3085     }
3086
3087   /* A coding system of this category is always ASCII compatible.  */
3088   src += coding->head_ascii;
3089
3090   while (rejected != CATEGORY_MASK_ISO)
3091     {
3092       src_base = src;
3093       ONE_MORE_BYTE (c);
3094       switch (c)
3095         {
3096         case ISO_CODE_ESC:
3097           if (inhibit_iso_escape_detection)
3098             break;
3099           single_shifting = 0;
3100           ONE_MORE_BYTE (c);
3101           if (c >= '(' && c <= '/')
3102             {
3103               /* Designation sequence for a charset of dimension 1.  */
3104               ONE_MORE_BYTE (c1);
3105               if (c1 < ' ' || c1 >= 0x80
3106                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3107                 /* Invalid designation sequence.  Just ignore.  */
3108                 break;
3109             }
3110           else if (c == '$')
3111             {
3112               /* Designation sequence for a charset of dimension 2.  */
3113               ONE_MORE_BYTE (c);
3114               if (c >= '@' && c <= 'B')
3115                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3116                 id = iso_charset_table[1][0][c];
3117               else if (c >= '(' && c <= '/')
3118                 {
3119                   ONE_MORE_BYTE (c1);
3120                   if (c1 < ' ' || c1 >= 0x80
3121                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3122                     /* Invalid designation sequence.  Just ignore.  */
3123                     break;
3124                 }
3125               else
3126                 /* Invalid designation sequence.  Just ignore it.  */
3127                 break;
3128             }
3129           else if (c == 'N' || c == 'O')
3130             {
3131               /* ESC <Fe> for SS2 or SS3.  */
3132               single_shifting = 1;
3133               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3134               break;
3135             }
3136           else if (c == '1')
3137             {
3138               /* End of composition.  */
3139               if (composition_count < 0
3140                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3141                 /* Invalid */
3142                 break;
3143               composition_count = -1;
3144               found |= CATEGORY_MASK_ISO;
3145             }
3146           else if (c >= '0' && c <= '4')
3147             {
3148               /* ESC <Fp> for start/end composition.  */
3149               composition_count = 0;
3150               break;
3151             }
3152           else
3153             {
3154               /* Invalid escape sequence.  Just ignore it.  */
3155               break;
3156             }
3157
3158           /* We found a valid designation sequence for CHARSET.  */
3159           rejected |= CATEGORY_MASK_ISO_8BIT;
3160           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3161                               id))
3162             found |= CATEGORY_MASK_ISO_7;
3163           else
3164             rejected |= CATEGORY_MASK_ISO_7;
3165           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3166                               id))
3167             found |= CATEGORY_MASK_ISO_7_TIGHT;
3168           else
3169             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3170           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3171                               id))
3172             found |= CATEGORY_MASK_ISO_7_ELSE;
3173           else
3174             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3175           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3176                               id))
3177             found |= CATEGORY_MASK_ISO_8_ELSE;
3178           else
3179             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3180           break;
3181
3182         case ISO_CODE_SO:
3183         case ISO_CODE_SI:
3184           /* Locking shift out/in.  */
3185           if (inhibit_iso_escape_detection)
3186             break;
3187           single_shifting = 0;
3188           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3189           break;
3190
3191         case ISO_CODE_CSI:
3192           /* Control sequence introducer.  */
3193           single_shifting = 0;
3194           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3195           found |= CATEGORY_MASK_ISO_8_ELSE;
3196           goto check_extra_latin;
3197
3198         case ISO_CODE_SS2:
3199         case ISO_CODE_SS3:
3200           /* Single shift.   */
3201           if (inhibit_iso_escape_detection)
3202             break;
3203           single_shifting = 0;
3204           rejected |= CATEGORY_MASK_ISO_7BIT;
3205           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3206               & CODING_ISO_FLAG_SINGLE_SHIFT)
3207             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3208           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3209               & CODING_ISO_FLAG_SINGLE_SHIFT)
3210             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3211           if (single_shifting)
3212             break;
3213           goto check_extra_latin;
3214
3215         default:
3216           if (c < 0)
3217             continue;
3218           if (c < 0x80)
3219             {
3220               if (composition_count >= 0)
3221                 composition_count++;
3222               single_shifting = 0;
3223               break;
3224             }
3225           if (c >= 0xA0)
3226             {
3227               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3228               found |= CATEGORY_MASK_ISO_8_1;
3229               /* Check the length of succeeding codes of the range
3230                  0xA0..0FF.  If the byte length is even, we include
3231                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3232                  only when we are not single shifting.  */
3233               if (! single_shifting
3234                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3235                 {
3236                   int i = 1;
3237                   while (src < src_end)
3238                     {
3239                       ONE_MORE_BYTE (c);
3240                       if (c < 0xA0)
3241                         break;
3242                       i++;
3243                     }
3244
3245                   if (i & 1 && src < src_end)
3246                     {
3247                       rejected |= CATEGORY_MASK_ISO_8_2;
3248                       if (composition_count >= 0)
3249                         composition_count += i;
3250                     }
3251                   else
3252                     {
3253                       found |= CATEGORY_MASK_ISO_8_2;
3254                       if (composition_count >= 0)
3255                         composition_count += i / 2;
3256                     }
3257                 }
3258               break;
3259             }
3260         check_extra_latin:
3261           single_shifting = 0;
3262           if (! VECTORP (Vlatin_extra_code_table)
3263               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3264             {
3265               rejected = CATEGORY_MASK_ISO;
3266               break;
3267             }
3268           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3269               & CODING_ISO_FLAG_LATIN_EXTRA)
3270             found |= CATEGORY_MASK_ISO_8_1;
3271           else
3272             rejected |= CATEGORY_MASK_ISO_8_1;
3273           rejected |= CATEGORY_MASK_ISO_8_2;
3274         }
3275     }
3276   detect_info->rejected |= CATEGORY_MASK_ISO;
3277   return 0;
3278
3279  no_more_source:
3280   detect_info->rejected |= rejected;
3281   detect_info->found |= (found & ~rejected);
3282   return 1;
3283 }
3284
3285
3286 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3287    escape sequence should be kept.  */
3288 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3289   do {                                                                  \
3290     int id, prev;                                                       \
3291                                                                         \
3292     if (final < '0' || final >= 128                                     \
3293         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3294         || !SAFE_CHARSET_P (coding, id))                                \
3295       {                                                                 \
3296         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3297         chars_96 = -1;                                                  \
3298         break;                                                          \
3299       }                                                                 \
3300     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3301     if (id == charset_jisx0201_roman)                                   \
3302       {                                                                 \
3303         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3304           id = charset_ascii;                                           \
3305       }                                                                 \
3306     else if (id == charset_jisx0208_1978)                               \
3307       {                                                                 \
3308         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3309           id = charset_jisx0208;                                        \
3310       }                                                                 \
3311     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3312     /* If there was an invalid designation to REG previously, and this  \
3313        designation is ASCII to REG, we should keep this designation     \
3314        sequence.  */                                                    \
3315     if (prev == -2 && id == charset_ascii)                              \
3316       chars_96 = -1;                                                    \
3317   } while (0)
3318
3319
3320 /* Handle these composition sequence (ALT: alternate char):
3321
3322    (1) relative composition: ESC 0 CHAR ... ESC 1
3323    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3324    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3325    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3326
3327    When the start sequence (ESC 0/2/3/4) is found, this annotation
3328    header is produced.
3329
3330         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3331
3332    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3333    produced until the end sequence (ESC 1) is found:
3334
3335    (1) CHAR ... CHAR
3336    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3337    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3338    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3339
3340    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3341    annotation header is updated as below:
3342
3343    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3344    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3345    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3346    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3347
3348    If an error is found while composing, the annotation header is
3349    changed to:
3350
3351         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3352
3353    and the sequence [ -2 DECODED-RULE ] is changed to the original
3354    byte sequence as below:
3355         o the original byte sequence is B: [ B -1 ]
3356         o the original byte sequence is B1 B2: [ B1 B2 ]
3357    and the sequence [ -1 -1 ] is changed to the original byte
3358    sequence:
3359         [ ESC '0' ]
3360 */
3361
3362 /* Decode a composition rule C1 and maybe one more byte from the
3363    source, and set RULE to the encoded composition rule, NBYTES to the
3364    length of the composition rule.  If the rule is invalid, set RULE
3365    to some negative value.  */
3366
3367 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3368   do {                                                                  \
3369     rule = c1 - 32;                                                     \
3370     if (rule < 0)                                                       \
3371       break;                                                            \
3372     if (rule < 81)              /* old format (before ver.21) */        \
3373       {                                                                 \
3374         int gref = (rule) / 9;                                          \
3375         int nref = (rule) % 9;                                          \
3376         if (gref == 4) gref = 10;                                       \
3377         if (nref == 4) nref = 10;                                       \
3378         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3379         nbytes = 1;                                                     \
3380       }                                                                 \
3381     else                        /* new format (after ver.21) */         \
3382       {                                                                 \
3383         int c;                                                          \
3384                                                                         \
3385         ONE_MORE_BYTE (c);                                              \
3386         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3387         if (rule >= 0)                                                  \
3388           rule += 0x100;   /* to destinguish it from the old format */  \
3389         nbytes = 2;                                                     \
3390       }                                                                 \
3391   } while (0)
3392
3393 #define ENCODE_COMPOSITION_RULE(rule)                           \
3394   do {                                                          \
3395     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3396                                                                 \
3397     if (rule < 0x100)           /* old format */                \
3398       {                                                         \
3399         if (gref == 10) gref = 4;                               \
3400         if (nref == 10) nref = 4;                               \
3401         charbuf[idx] = 32 + gref * 9 + nref;                    \
3402         charbuf[idx + 1] = -1;                                  \
3403         new_chars++;                                            \
3404       }                                                         \
3405     else                                /* new format */        \
3406       {                                                         \
3407         charbuf[idx] = 32 + 81 + gref;                          \
3408         charbuf[idx + 1] = 32 + nref;                           \
3409         new_chars += 2;                                         \
3410       }                                                         \
3411   } while (0)
3412
3413 /* Finish the current composition as invalid.  */
3414
3415 static int finish_composition P_ ((int *, struct composition_status *));
3416
3417 static int
3418 finish_composition (charbuf, cmp_status)
3419      int *charbuf;
3420      struct composition_status *cmp_status;
3421 {
3422   int idx = - cmp_status->length;
3423   int new_chars;
3424
3425   /* Recover the original ESC sequence */
3426   charbuf[idx++] = ISO_CODE_ESC;
3427   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3428                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3429                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3430                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3431                     : '4');
3432   charbuf[idx++] = -2;
3433   charbuf[idx++] = 0;
3434   charbuf[idx++] = -1;
3435   new_chars = cmp_status->nchars;
3436   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3437     for (; idx < 0; idx++)
3438       {
3439         int elt = charbuf[idx];
3440
3441         if (elt == -2)
3442           {
3443             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3444             idx++;
3445           }
3446         else if (elt == -1)
3447           {
3448             charbuf[idx++] = ISO_CODE_ESC;
3449             charbuf[idx] = '0';
3450             new_chars += 2;
3451           }
3452       }
3453   cmp_status->state = COMPOSING_NO;
3454   return new_chars;
3455 }
3456
3457 /* If characers are under composition, finish the composition.  */
3458 #define MAYBE_FINISH_COMPOSITION()                              \
3459   do {                                                          \
3460     if (cmp_status->state != COMPOSING_NO)                      \
3461       char_offset += finish_composition (charbuf, cmp_status);  \
3462   } while (0)
3463
3464 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3465
3466    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3467    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3468    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3469    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3470
3471    Produce this annotation sequence now:
3472
3473    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3474 */
3475
3476 #define DECODE_COMPOSITION_START(c1)                                       \
3477   do {                                                                     \
3478     if (c1 == '0'                                                          \
3479         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3480              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3481             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3482                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3483       {                                                                    \
3484         *charbuf++ = -1;                                                   \
3485         *charbuf++= -1;                                                    \
3486         cmp_status->state = COMPOSING_CHAR;                                \
3487         cmp_status->length += 2;                                           \
3488       }                                                                    \
3489     else                                                                   \
3490       {                                                                    \
3491         MAYBE_FINISH_COMPOSITION ();                                       \
3492         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3493                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3494                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3495                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3496         cmp_status->state                                                  \
3497           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3498         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3499         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3500         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3501         coding->annotated = 1;                                             \
3502       }                                                                    \
3503   } while (0)
3504
3505
3506 /* Handle composition end sequence ESC 1.  */
3507
3508 #define DECODE_COMPOSITION_END()                                        \
3509   do {                                                                  \
3510     if (cmp_status->nchars == 0                                         \
3511         || ((cmp_status->state == COMPOSING_CHAR)                       \
3512             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3513       {                                                                 \
3514         MAYBE_FINISH_COMPOSITION ();                                    \
3515         goto invalid_code;                                              \
3516       }                                                                 \
3517     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3518       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3519     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3520       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3521     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3522     char_offset += cmp_status->nchars;                                  \
3523     cmp_status->state = COMPOSING_NO;                                   \
3524   } while (0)
3525
3526 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3527
3528 #define STORE_COMPOSITION_RULE(rule)    \
3529   do {                                  \
3530     *charbuf++ = -2;                    \
3531     *charbuf++ = rule;                  \
3532     cmp_status->length += 2;            \
3533     cmp_status->state--;                \
3534   } while (0)
3535
3536 /* Store a composed char or a component char C in charbuf, and update
3537    cmp_status.  */
3538
3539 #define STORE_COMPOSITION_CHAR(c)                                       \
3540   do {                                                                  \
3541     *charbuf++ = (c);                                                   \
3542     cmp_status->length++;                                               \
3543     if (cmp_status->state == COMPOSING_CHAR)                            \
3544       cmp_status->nchars++;                                             \
3545     else                                                                \
3546       cmp_status->ncomps++;                                             \
3547     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3548         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3549             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3550       cmp_status->state++;                                              \
3551   } while (0)
3552
3553
3554 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3555
3556 static void
3557 decode_coding_iso_2022 (coding)
3558      struct coding_system *coding;
3559 {
3560   const unsigned char *src = coding->source + coding->consumed;
3561   const unsigned char *src_end = coding->source + coding->src_bytes;
3562   const unsigned char *src_base;
3563   int *charbuf = coding->charbuf + coding->charbuf_used;
3564   /* We may produce two annocations (charset and composition) in one
3565      loop and one more charset annocation at the end.  */
3566   int *charbuf_end
3567     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3568   int consumed_chars = 0, consumed_chars_base;
3569   int multibytep = coding->src_multibyte;
3570   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3571   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3572   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3573   int charset_id_2, charset_id_3;
3574   struct charset *charset;
3575   int c;
3576   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3577   Lisp_Object attrs, charset_list;
3578   int char_offset = coding->produced_char;
3579   int last_offset = char_offset;
3580   int last_id = charset_ascii;
3581   int eol_crlf =
3582     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3583   int byte_after_cr = -1;
3584   int i;
3585
3586   CODING_GET_INFO (coding, attrs, charset_list);
3587   setup_iso_safe_charsets (attrs);
3588   /* Charset list may have been changed.  */
3589   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3590   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3591
3592   if (cmp_status->state != COMPOSING_NO)
3593     {
3594       for (i = 0; i < cmp_status->length; i++)
3595         *charbuf++ = cmp_status->carryover[i];
3596       coding->annotated = 1;
3597     }
3598
3599   while (1)
3600     {
3601       int c1, c2;
3602
3603       src_base = src;
3604       consumed_chars_base = consumed_chars;
3605
3606       if (charbuf >= charbuf_end)
3607         {
3608           if (byte_after_cr >= 0)
3609             src_base--;
3610           break;
3611         }
3612
3613       if (byte_after_cr >= 0)
3614         c1 = byte_after_cr, byte_after_cr = -1;
3615       else
3616         ONE_MORE_BYTE (c1);
3617       if (c1 < 0)
3618         goto invalid_code;
3619
3620       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3621         {
3622           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3623           char_offset++;
3624           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3625           continue;
3626         }
3627
3628       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3629         {
3630           if (c1 == ISO_CODE_ESC)
3631             {
3632               if (src + 1 >= src_end)
3633                 goto no_more_source;
3634               *charbuf++ = ISO_CODE_ESC;
3635               char_offset++;
3636               if (src[0] == '%' && src[1] == '@')
3637                 {
3638                   src += 2;
3639                   consumed_chars += 2;
3640                   char_offset += 2;
3641                   /* We are sure charbuf can contain two more chars. */
3642                   *charbuf++ = '%';
3643                   *charbuf++ = '@';
3644                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3645                 }
3646             }
3647           else
3648             {
3649               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3650               char_offset++;
3651             }
3652           continue;
3653         }
3654
3655       if ((cmp_status->state == COMPOSING_RULE
3656            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3657           && c1 != ISO_CODE_ESC)
3658         {
3659           int rule, nbytes;
3660
3661           DECODE_COMPOSITION_RULE (rule, nbytes);
3662           if (rule < 0)
3663             goto invalid_code;
3664           STORE_COMPOSITION_RULE (rule);
3665           continue;
3666         }
3667
3668       /* We produce at most one character.  */
3669       switch (iso_code_class [c1])
3670         {
3671         case ISO_0x20_or_0x7F:
3672           if (charset_id_0 < 0
3673               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3674             /* This is SPACE or DEL.  */
3675             charset = CHARSET_FROM_ID (charset_ascii);
3676           else
3677             charset = CHARSET_FROM_ID (charset_id_0);
3678           break;
3679
3680         case ISO_graphic_plane_0:
3681           if (charset_id_0 < 0)
3682             charset = CHARSET_FROM_ID (charset_ascii);
3683           else
3684             charset = CHARSET_FROM_ID (charset_id_0);
3685           break;
3686
3687         case ISO_0xA0_or_0xFF:
3688           if (charset_id_1 < 0
3689               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3690               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3691             goto invalid_code;
3692           /* This is a graphic character, we fall down ... */
3693
3694         case ISO_graphic_plane_1:
3695           if (charset_id_1 < 0)
3696             goto invalid_code;
3697           charset = CHARSET_FROM_ID (charset_id_1);
3698           break;
3699
3700         case ISO_control_0:
3701           if (eol_crlf && c1 == '\r')
3702             ONE_MORE_BYTE (byte_after_cr);
3703           MAYBE_FINISH_COMPOSITION ();
3704           charset = CHARSET_FROM_ID (charset_ascii);
3705           break;
3706
3707         case ISO_control_1:
3708           goto invalid_code;
3709
3710         case ISO_shift_out:
3711           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3712               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3713             goto invalid_code;
3714           CODING_ISO_INVOCATION (coding, 0) = 1;
3715           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3716           continue;
3717
3718         case ISO_shift_in:
3719           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3720             goto invalid_code;
3721           CODING_ISO_INVOCATION (coding, 0) = 0;
3722           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3723           continue;
3724
3725         case ISO_single_shift_2_7:
3726         case ISO_single_shift_2:
3727           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3728             goto invalid_code;
3729           /* SS2 is handled as an escape sequence of ESC 'N' */
3730           c1 = 'N';
3731           goto label_escape_sequence;
3732
3733         case ISO_single_shift_3:
3734           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3735             goto invalid_code;
3736           /* SS2 is handled as an escape sequence of ESC 'O' */
3737           c1 = 'O';
3738           goto label_escape_sequence;
3739
3740         case ISO_control_sequence_introducer:
3741           /* CSI is handled as an escape sequence of ESC '[' ...  */
3742           c1 = '[';
3743           goto label_escape_sequence;
3744
3745         case ISO_escape:
3746           ONE_MORE_BYTE (c1);
3747         label_escape_sequence:
3748           /* Escape sequences handled here are invocation,
3749              designation, direction specification, and character
3750              composition specification.  */
3751           switch (c1)
3752             {
3753             case '&':           /* revision of following character set */
3754               ONE_MORE_BYTE (c1);
3755               if (!(c1 >= '@' && c1 <= '~'))
3756                 goto invalid_code;
3757               ONE_MORE_BYTE (c1);
3758               if (c1 != ISO_CODE_ESC)
3759                 goto invalid_code;
3760               ONE_MORE_BYTE (c1);
3761               goto label_escape_sequence;
3762
3763             case '$':           /* designation of 2-byte character set */
3764               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3765                 goto invalid_code;
3766               {
3767                 int reg, chars96;
3768
3769                 ONE_MORE_BYTE (c1);
3770                 if (c1 >= '@' && c1 <= 'B')
3771                   {     /* designation of JISX0208.1978, GB2312.1980,
3772                            or JISX0208.1980 */
3773                     reg = 0, chars96 = 0;
3774                   }
3775                 else if (c1 >= 0x28 && c1 <= 0x2B)
3776                   { /* designation of DIMENSION2_CHARS94 character set */
3777                     reg = c1 - 0x28, chars96 = 0;
3778                     ONE_MORE_BYTE (c1);
3779                   }
3780                 else if (c1 >= 0x2C && c1 <= 0x2F)
3781                   { /* designation of DIMENSION2_CHARS96 character set */
3782                     reg = c1 - 0x2C, chars96 = 1;
3783                     ONE_MORE_BYTE (c1);
3784                   }
3785                 else
3786                   goto invalid_code;
3787                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3788                 /* We must update these variables now.  */
3789                 if (reg == 0)
3790                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3791                 else if (reg == 1)
3792                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3793                 if (chars96 < 0)
3794                   goto invalid_code;
3795               }
3796               continue;
3797
3798             case 'n':           /* invocation of locking-shift-2 */
3799               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3800                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3801                 goto invalid_code;
3802               CODING_ISO_INVOCATION (coding, 0) = 2;
3803               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3804               continue;
3805
3806             case 'o':           /* invocation of locking-shift-3 */
3807               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3808                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3809                 goto invalid_code;
3810               CODING_ISO_INVOCATION (coding, 0) = 3;
3811               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3812               continue;
3813
3814             case 'N':           /* invocation of single-shift-2 */
3815               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3816                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3817                 goto invalid_code;
3818               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3819               if (charset_id_2 < 0)
3820                 charset = CHARSET_FROM_ID (charset_ascii);
3821               else
3822                 charset = CHARSET_FROM_ID (charset_id_2);
3823               ONE_MORE_BYTE (c1);
3824               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3825                 goto invalid_code;
3826               break;
3827
3828             case 'O':           /* invocation of single-shift-3 */
3829               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3830                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3831                 goto invalid_code;
3832               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3833               if (charset_id_3 < 0)
3834                 charset = CHARSET_FROM_ID (charset_ascii);
3835               else
3836                 charset = CHARSET_FROM_ID (charset_id_3);
3837               ONE_MORE_BYTE (c1);
3838               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3839                 goto invalid_code;
3840               break;
3841
3842             case '0': case '2': case '3': case '4': /* start composition */
3843               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3844                 goto invalid_code;
3845               if (last_id != charset_ascii)
3846                 {
3847                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3848                   last_id = charset_ascii;
3849                   last_offset = char_offset;
3850                 }
3851               DECODE_COMPOSITION_START (c1);
3852               continue;
3853
3854             case '1':           /* end composition */
3855               if (cmp_status->state == COMPOSING_NO)
3856                 goto invalid_code;
3857               DECODE_COMPOSITION_END ();
3858               continue;
3859
3860             case '[':           /* specification of direction */
3861               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3862                 goto invalid_code;
3863               /* For the moment, nested direction is not supported.
3864                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3865                  left-to-right, and nozero means right-to-left.  */
3866               ONE_MORE_BYTE (c1);
3867               switch (c1)
3868                 {
3869                 case ']':       /* end of the current direction */
3870                   coding->mode &= ~CODING_MODE_DIRECTION;
3871
3872                 case '0':       /* end of the current direction */
3873                 case '1':       /* start of left-to-right direction */
3874                   ONE_MORE_BYTE (c1);
3875                   if (c1 == ']')
3876                     coding->mode &= ~CODING_MODE_DIRECTION;
3877                   else
3878                     goto invalid_code;
3879                   break;
3880
3881                 case '2':       /* start of right-to-left direction */
3882                   ONE_MORE_BYTE (c1);
3883                   if (c1 == ']')
3884                     coding->mode |= CODING_MODE_DIRECTION;
3885                   else
3886                     goto invalid_code;
3887                   break;
3888
3889                 default:
3890                   goto invalid_code;
3891                 }
3892               continue;
3893
3894             case '%':
3895               ONE_MORE_BYTE (c1);
3896               if (c1 == '/')
3897                 {
3898                   /* CTEXT extended segment:
3899                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3900                      We keep these bytes as is for the moment.
3901                      They may be decoded by post-read-conversion.  */
3902                   int dim, M, L;
3903                   int size;
3904
3905                   ONE_MORE_BYTE (dim);
3906                   if (dim < 0 || dim > 4)
3907                     goto invalid_code;
3908                   ONE_MORE_BYTE (M);
3909                   if (M < 128)
3910                     goto invalid_code;
3911                   ONE_MORE_BYTE (L);
3912                   if (L < 128)
3913                     goto invalid_code;
3914                   size = ((M - 128) * 128) + (L - 128);
3915                   if (charbuf + 6 > charbuf_end)
3916                     goto break_loop;
3917                   *charbuf++ = ISO_CODE_ESC;
3918                   *charbuf++ = '%';
3919                   *charbuf++ = '/';
3920                   *charbuf++ = dim;
3921                   *charbuf++ = BYTE8_TO_CHAR (M);
3922                   *charbuf++ = BYTE8_TO_CHAR (L);
3923                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3924                 }
3925               else if (c1 == 'G')
3926                 {
3927                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3928                      ESC % G --UTF-8-BYTES-- ESC % @
3929                      We keep these bytes as is for the moment.
3930                      They may be decoded by post-read-conversion.  */
3931                   if (charbuf + 3 > charbuf_end)
3932                     goto break_loop;
3933                   *charbuf++ = ISO_CODE_ESC;
3934                   *charbuf++ = '%';
3935                   *charbuf++ = 'G';
3936                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3937                 }
3938               else
3939                 goto invalid_code;
3940               continue;
3941               break;
3942
3943             default:
3944               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3945                 goto invalid_code;
3946               {
3947                 int reg, chars96;
3948
3949                 if (c1 >= 0x28 && c1 <= 0x2B)
3950                   { /* designation of DIMENSION1_CHARS94 character set */
3951                     reg = c1 - 0x28, chars96 = 0;
3952                     ONE_MORE_BYTE (c1);
3953                   }
3954                 else if (c1 >= 0x2C && c1 <= 0x2F)
3955                   { /* designation of DIMENSION1_CHARS96 character set */
3956                     reg = c1 - 0x2C, chars96 = 1;
3957                     ONE_MORE_BYTE (c1);
3958                   }
3959                 else
3960                   goto invalid_code;
3961                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3962                 /* We must update these variables now.  */
3963                 if (reg == 0)
3964                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3965                 else if (reg == 1)
3966                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3967                 if (chars96 < 0)
3968                   goto invalid_code;
3969               }
3970               continue;
3971             }
3972         }
3973
3974       if (cmp_status->state == COMPOSING_NO
3975           && charset->id != charset_ascii
3976           && last_id != charset->id)
3977         {
3978           if (last_id != charset_ascii)
3979             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3980           last_id = charset->id;
3981           last_offset = char_offset;
3982         }
3983
3984       /* Now we know CHARSET and 1st position code C1 of a character.
3985          Produce a decoded character while getting 2nd position code
3986          C2 if necessary.  */
3987       c1 &= 0x7F;
3988       if (CHARSET_DIMENSION (charset) > 1)
3989         {
3990           ONE_MORE_BYTE (c2);
3991           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3992             /* C2 is not in a valid range.  */
3993             goto invalid_code;
3994           c1 = (c1 << 8) | (c2 & 0x7F);
3995           if (CHARSET_DIMENSION (charset) > 2)
3996             {
3997               ONE_MORE_BYTE (c2);
3998               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3999                 /* C2 is not in a valid range.  */
4000                 goto invalid_code;
4001               c1 = (c1 << 8) | (c2 & 0x7F);
4002             }
4003         }
4004
4005       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4006       if (c < 0)
4007         {
4008           MAYBE_FINISH_COMPOSITION ();
4009           for (; src_base < src; src_base++, char_offset++)
4010             {
4011               if (ASCII_BYTE_P (*src_base))
4012                 *charbuf++ = *src_base;
4013               else
4014                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4015             }
4016         }
4017       else if (cmp_status->state == COMPOSING_NO)
4018         {
4019           *charbuf++ = c;
4020           char_offset++;
4021         }
4022       else if ((cmp_status->state == COMPOSING_CHAR
4023                 ? cmp_status->nchars
4024                 : cmp_status->ncomps)
4025                >= MAX_COMPOSITION_COMPONENTS)
4026         {
4027           /* Too long composition.  */
4028           MAYBE_FINISH_COMPOSITION ();
4029           *charbuf++ = c;
4030           char_offset++;
4031         }
4032       else
4033         STORE_COMPOSITION_CHAR (c);
4034       continue;
4035
4036     invalid_code:
4037       MAYBE_FINISH_COMPOSITION ();
4038       src = src_base;
4039       consumed_chars = consumed_chars_base;
4040       ONE_MORE_BYTE (c);
4041       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4042       char_offset++;
4043       coding->errors++;
4044       continue;
4045
4046     break_loop:
4047       break;
4048     }
4049
4050  no_more_source:
4051   if (cmp_status->state != COMPOSING_NO)
4052     {
4053       if (coding->mode & CODING_MODE_LAST_BLOCK)
4054         MAYBE_FINISH_COMPOSITION ();
4055       else
4056         {
4057           charbuf -= cmp_status->length;
4058           for (i = 0; i < cmp_status->length; i++)
4059             cmp_status->carryover[i] = charbuf[i];
4060         }
4061     }
4062   else if (last_id != charset_ascii)
4063     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4064   coding->consumed_char += consumed_chars_base;
4065   coding->consumed = src_base - coding->source;
4066   coding->charbuf_used = charbuf - coding->charbuf;
4067 }
4068
4069
4070 /* ISO2022 encoding stuff.  */
4071
4072 /*
4073    It is not enough to say just "ISO2022" on encoding, we have to
4074    specify more details.  In Emacs, each coding system of ISO2022
4075    variant has the following specifications:
4076         1. Initial designation to G0 thru G3.
4077         2. Allows short-form designation?
4078         3. ASCII should be designated to G0 before control characters?
4079         4. ASCII should be designated to G0 at end of line?
4080         5. 7-bit environment or 8-bit environment?
4081         6. Use locking-shift?
4082         7. Use Single-shift?
4083    And the following two are only for Japanese:
4084         8. Use ASCII in place of JIS0201-1976-Roman?
4085         9. Use JISX0208-1983 in place of JISX0208-1978?
4086    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4087    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4088    details.
4089 */
4090
4091 /* Produce codes (escape sequence) for designating CHARSET to graphic
4092    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4093    '@', 'A', or 'B' and the coding system CODING allows, produce
4094    designation sequence of short-form.  */
4095
4096 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4097   do {                                                                  \
4098     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4099     char *intermediate_char_94 = "()*+";                                \
4100     char *intermediate_char_96 = ",-./";                                \
4101     int revision = -1;                                                  \
4102     int c;                                                              \
4103                                                                         \
4104     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4105       revision = CHARSET_ISO_REVISION (charset);                        \
4106                                                                         \
4107     if (revision >= 0)                                                  \
4108       {                                                                 \
4109         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4110         EMIT_ONE_BYTE ('@' + revision);                                 \
4111       }                                                                 \
4112     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4113     if (CHARSET_DIMENSION (charset) == 1)                               \
4114       {                                                                 \
4115         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4116           c = intermediate_char_94[reg];                                \
4117         else                                                            \
4118           c = intermediate_char_96[reg];                                \
4119         EMIT_ONE_ASCII_BYTE (c);                                        \
4120       }                                                                 \
4121     else                                                                \
4122       {                                                                 \
4123         EMIT_ONE_ASCII_BYTE ('$');                                      \
4124         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4125           {                                                             \
4126             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4127                 || reg != 0                                             \
4128                 || final_char < '@' || final_char > 'B')                \
4129               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4130           }                                                             \
4131         else                                                            \
4132           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4133       }                                                                 \
4134     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4135                                                                         \
4136     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4137   } while (0)
4138
4139
4140 /* The following two macros produce codes (control character or escape
4141    sequence) for ISO2022 single-shift functions (single-shift-2 and
4142    single-shift-3).  */
4143
4144 #define ENCODE_SINGLE_SHIFT_2                                           \
4145   do {                                                                  \
4146     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4147       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4148     else                                                                \
4149       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4150     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4151   } while (0)
4152
4153
4154 #define ENCODE_SINGLE_SHIFT_3                                           \
4155   do {                                                                  \
4156     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4157       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4158     else                                                                \
4159       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4160     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4161   } while (0)
4162
4163
4164 /* The following four macros produce codes (control character or
4165    escape sequence) for ISO2022 locking-shift functions (shift-in,
4166    shift-out, locking-shift-2, and locking-shift-3).  */
4167
4168 #define ENCODE_SHIFT_IN                                 \
4169   do {                                                  \
4170     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4171     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4172   } while (0)
4173
4174
4175 #define ENCODE_SHIFT_OUT                                \
4176   do {                                                  \
4177     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4178     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4179   } while (0)
4180
4181
4182 #define ENCODE_LOCKING_SHIFT_2                          \
4183   do {                                                  \
4184     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4185     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4186   } while (0)
4187
4188
4189 #define ENCODE_LOCKING_SHIFT_3                          \
4190   do {                                                  \
4191     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4192     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4193   } while (0)
4194
4195
4196 /* Produce codes for a DIMENSION1 character whose character set is
4197    CHARSET and whose position-code is C1.  Designation and invocation
4198    sequences are also produced in advance if necessary.  */
4199
4200 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4201   do {                                                                  \
4202     int id = CHARSET_ID (charset);                                      \
4203                                                                         \
4204     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4205         && id == charset_ascii)                                         \
4206       {                                                                 \
4207         id = charset_jisx0201_roman;                                    \
4208         charset = CHARSET_FROM_ID (id);                                 \
4209       }                                                                 \
4210                                                                         \
4211     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4212       {                                                                 \
4213         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4214           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4215         else                                                            \
4216           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4217         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4218         break;                                                          \
4219       }                                                                 \
4220     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4221       {                                                                 \
4222         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4223         break;                                                          \
4224       }                                                                 \
4225     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4226       {                                                                 \
4227         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4228         break;                                                          \
4229       }                                                                 \
4230     else                                                                \
4231       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4232          must invoke it, or, at first, designate it to some graphic     \
4233          register.  Then repeat the loop to actually produce the        \
4234          character.  */                                                 \
4235       dst = encode_invocation_designation (charset, coding, dst,        \
4236                                            &produced_chars);            \
4237   } while (1)
4238
4239
4240 /* Produce codes for a DIMENSION2 character whose character set is
4241    CHARSET and whose position-codes are C1 and C2.  Designation and
4242    invocation codes are also produced in advance if necessary.  */
4243
4244 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4245   do {                                                                  \
4246     int id = CHARSET_ID (charset);                                      \
4247                                                                         \
4248     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4249         && id == charset_jisx0208)                                      \
4250       {                                                                 \
4251         id = charset_jisx0208_1978;                                     \
4252         charset = CHARSET_FROM_ID (id);                                 \
4253       }                                                                 \
4254                                                                         \
4255     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4256       {                                                                 \
4257         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4258           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4259         else                                                            \
4260           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4261         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4262         break;                                                          \
4263       }                                                                 \
4264     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4265       {                                                                 \
4266         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4267         break;                                                          \
4268       }                                                                 \
4269     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4270       {                                                                 \
4271         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4272         break;                                                          \
4273       }                                                                 \
4274     else                                                                \
4275       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4276          must invoke it, or, at first, designate it to some graphic     \
4277          register.  Then repeat the loop to actually produce the        \
4278          character.  */                                                 \
4279       dst = encode_invocation_designation (charset, coding, dst,        \
4280                                            &produced_chars);            \
4281   } while (1)
4282
4283
4284 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4285   do {                                                                     \
4286     int code = ENCODE_CHAR ((charset),(c));                                \
4287                                                                            \
4288     if (CHARSET_DIMENSION (charset) == 1)                                  \
4289       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4290     else                                                                   \
4291       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4292   } while (0)
4293
4294
4295 /* Produce designation and invocation codes at a place pointed by DST
4296    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4297    Return new DST.  */
4298
4299 unsigned char *
4300 encode_invocation_designation (charset, coding, dst, p_nchars)
4301      struct charset *charset;
4302      struct coding_system *coding;
4303      unsigned char *dst;
4304      int *p_nchars;
4305 {
4306   int multibytep = coding->dst_multibyte;
4307   int produced_chars = *p_nchars;
4308   int reg;                      /* graphic register number */
4309   int id = CHARSET_ID (charset);
4310
4311   /* At first, check designations.  */
4312   for (reg = 0; reg < 4; reg++)
4313     if (id == CODING_ISO_DESIGNATION (coding, reg))
4314       break;
4315
4316   if (reg >= 4)
4317     {
4318       /* CHARSET is not yet designated to any graphic registers.  */
4319       /* At first check the requested designation.  */
4320       reg = CODING_ISO_REQUEST (coding, id);
4321       if (reg < 0)
4322         /* Since CHARSET requests no special designation, designate it
4323            to graphic register 0.  */
4324         reg = 0;
4325
4326       ENCODE_DESIGNATION (charset, reg, coding);
4327     }
4328
4329   if (CODING_ISO_INVOCATION (coding, 0) != reg
4330       && CODING_ISO_INVOCATION (coding, 1) != reg)
4331     {
4332       /* Since the graphic register REG is not invoked to any graphic
4333          planes, invoke it to graphic plane 0.  */
4334       switch (reg)
4335         {
4336         case 0:                 /* graphic register 0 */
4337           ENCODE_SHIFT_IN;
4338           break;
4339
4340         case 1:                 /* graphic register 1 */
4341           ENCODE_SHIFT_OUT;
4342           break;
4343
4344         case 2:                 /* graphic register 2 */
4345           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4346             ENCODE_SINGLE_SHIFT_2;
4347           else
4348             ENCODE_LOCKING_SHIFT_2;
4349           break;
4350
4351         case 3:                 /* graphic register 3 */
4352           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4353             ENCODE_SINGLE_SHIFT_3;
4354           else
4355             ENCODE_LOCKING_SHIFT_3;
4356           break;
4357         }
4358     }
4359
4360   *p_nchars = produced_chars;
4361   return dst;
4362 }
4363
4364 /* The following three macros produce codes for indicating direction
4365    of text.  */
4366 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4367   do {                                                                  \
4368     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4369       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4370     else                                                                \
4371       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4372   } while (0)
4373
4374
4375 #define ENCODE_DIRECTION_R2L()                  \
4376   do {                                          \
4377     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4378     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4379   } while (0)
4380
4381
4382 #define ENCODE_DIRECTION_L2R()                  \
4383   do {                                          \
4384     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4385     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4386   } while (0)
4387
4388
4389 /* Produce codes for designation and invocation to reset the graphic
4390    planes and registers to initial state.  */
4391 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4392   do {                                                                  \
4393     int reg;                                                            \
4394     struct charset *charset;                                            \
4395                                                                         \
4396     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4397       ENCODE_SHIFT_IN;                                                  \
4398     for (reg = 0; reg < 4; reg++)                                       \
4399       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4400           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4401               != CODING_ISO_INITIAL (coding, reg)))                     \
4402         {                                                               \
4403           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4404           ENCODE_DESIGNATION (charset, reg, coding);                    \
4405         }                                                               \
4406   } while (0)
4407
4408
4409 /* Produce designation sequences of charsets in the line started from
4410    SRC to a place pointed by DST, and return updated DST.
4411
4412    If the current block ends before any end-of-line, we may fail to
4413    find all the necessary designations.  */
4414
4415 static unsigned char *
4416 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4417      struct coding_system *coding;
4418      int *charbuf, *charbuf_end;
4419      unsigned char *dst;
4420 {
4421   struct charset *charset;
4422   /* Table of charsets to be designated to each graphic register.  */
4423   int r[4];
4424   int c, found = 0, reg;
4425   int produced_chars = 0;
4426   int multibytep = coding->dst_multibyte;
4427   Lisp_Object attrs;
4428   Lisp_Object charset_list;
4429
4430   attrs = CODING_ID_ATTRS (coding->id);
4431   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4432   if (EQ (charset_list, Qiso_2022))
4433     charset_list = Viso_2022_charset_list;
4434
4435   for (reg = 0; reg < 4; reg++)
4436     r[reg] = -1;
4437
4438   while (found < 4)
4439     {
4440       int id;
4441
4442       c = *charbuf++;
4443       if (c == '\n')
4444         break;
4445       charset = char_charset (c, charset_list, NULL);
4446       id = CHARSET_ID (charset);
4447       reg = CODING_ISO_REQUEST (coding, id);
4448       if (reg >= 0 && r[reg] < 0)
4449         {
4450           found++;
4451           r[reg] = id;
4452         }
4453     }
4454
4455   if (found)
4456     {
4457       for (reg = 0; reg < 4; reg++)
4458         if (r[reg] >= 0
4459             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4460           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4461     }
4462
4463   return dst;
4464 }
4465
4466 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4467
4468 static int
4469 encode_coding_iso_2022 (coding)
4470      struct coding_system *coding;
4471 {
4472   int multibytep = coding->dst_multibyte;
4473   int *charbuf = coding->charbuf;
4474   int *charbuf_end = charbuf + coding->charbuf_used;
4475   unsigned char *dst = coding->destination + coding->produced;
4476   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4477   int safe_room = 16;
4478   int bol_designation
4479     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4480        && CODING_ISO_BOL (coding));
4481   int produced_chars = 0;
4482   Lisp_Object attrs, eol_type, charset_list;
4483   int ascii_compatible;
4484   int c;
4485   int preferred_charset_id = -1;
4486
4487   CODING_GET_INFO (coding, attrs, charset_list);
4488   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4489   if (VECTORP (eol_type))
4490     eol_type = Qunix;
4491
4492   setup_iso_safe_charsets (attrs);
4493   /* Charset list may have been changed.  */
4494   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4495   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4496
4497   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4498
4499   while (charbuf < charbuf_end)
4500     {
4501       ASSURE_DESTINATION (safe_room);
4502
4503       if (bol_designation)
4504         {
4505           unsigned char *dst_prev = dst;
4506
4507           /* We have to produce designation sequences if any now.  */
4508           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4509           bol_designation = 0;
4510           /* We are sure that designation sequences are all ASCII bytes.  */
4511           produced_chars += dst - dst_prev;
4512         }
4513
4514       c = *charbuf++;
4515
4516       if (c < 0)
4517         {
4518           /* Handle an annotation.  */
4519           switch (*charbuf)
4520             {
4521             case CODING_ANNOTATE_COMPOSITION_MASK:
4522               /* Not yet implemented.  */
4523               break;
4524             case CODING_ANNOTATE_CHARSET_MASK:
4525               preferred_charset_id = charbuf[2];
4526               if (preferred_charset_id >= 0
4527                   && NILP (Fmemq (make_number (preferred_charset_id),
4528                                   charset_list)))
4529                 preferred_charset_id = -1;
4530               break;
4531             default:
4532               abort ();
4533             }
4534           charbuf += -c - 1;
4535           continue;
4536         }
4537
4538       /* Now encode the character C.  */
4539       if (c < 0x20 || c == 0x7F)
4540         {
4541           if (c == '\n'
4542               || (c == '\r' && EQ (eol_type, Qmac)))
4543             {
4544               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4545                 ENCODE_RESET_PLANE_AND_REGISTER ();
4546               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4547                 {
4548                   int i;
4549
4550                   for (i = 0; i < 4; i++)
4551                     CODING_ISO_DESIGNATION (coding, i)
4552                       = CODING_ISO_INITIAL (coding, i);
4553                 }
4554               bol_designation
4555                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4556             }
4557           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4558             ENCODE_RESET_PLANE_AND_REGISTER ();
4559           EMIT_ONE_ASCII_BYTE (c);
4560         }
4561       else if (ASCII_CHAR_P (c))
4562         {
4563           if (ascii_compatible)
4564             EMIT_ONE_ASCII_BYTE (c);
4565           else
4566             {
4567               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4568               ENCODE_ISO_CHARACTER (charset, c);
4569             }
4570         }
4571       else if (CHAR_BYTE8_P (c))
4572         {
4573           c = CHAR_TO_BYTE8 (c);
4574           EMIT_ONE_BYTE (c);
4575         }
4576       else
4577         {
4578           struct charset *charset;
4579
4580           if (preferred_charset_id >= 0)
4581             {
4582               charset = CHARSET_FROM_ID (preferred_charset_id);
4583               if (! CHAR_CHARSET_P (c, charset))
4584                 charset = char_charset (c, charset_list, NULL);
4585             }
4586           else
4587             charset = char_charset (c, charset_list, NULL);
4588           if (!charset)
4589             {
4590               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4591                 {
4592                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4593                   charset = CHARSET_FROM_ID (charset_ascii);
4594                 }
4595               else
4596                 {
4597                   c = coding->default_char;
4598                   charset = char_charset (c, charset_list, NULL);
4599                 }
4600             }
4601           ENCODE_ISO_CHARACTER (charset, c);
4602         }
4603     }
4604
4605   if (coding->mode & CODING_MODE_LAST_BLOCK
4606       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4607     {
4608       ASSURE_DESTINATION (safe_room);
4609       ENCODE_RESET_PLANE_AND_REGISTER ();
4610     }
4611   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4612   CODING_ISO_BOL (coding) = bol_designation;
4613   coding->produced_char += produced_chars;
4614   coding->produced = dst - coding->destination;
4615   return 0;
4616 }
4617
4618 \f
4619 /*** 8,9. SJIS and BIG5 handlers ***/
4620
4621 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4622    quite widely.  So, for the moment, Emacs supports them in the bare
4623    C code.  But, in the future, they may be supported only by CCL.  */
4624
4625 /* SJIS is a coding system encoding three character sets: ASCII, right
4626    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4627    as is.  A character of charset katakana-jisx0201 is encoded by
4628    "position-code + 0x80".  A character of charset japanese-jisx0208
4629    is encoded in 2-byte but two position-codes are divided and shifted
4630    so that it fit in the range below.
4631
4632    --- CODE RANGE of SJIS ---
4633    (character set)      (range)
4634    ASCII                0x00 .. 0x7F
4635    KATAKANA-JISX0201    0xA0 .. 0xDF
4636    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4637             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4638    -------------------------------
4639
4640 */
4641
4642 /* BIG5 is a coding system encoding two character sets: ASCII and
4643    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4644    character set and is encoded in two-byte.
4645
4646    --- CODE RANGE of BIG5 ---
4647    (character set)      (range)
4648    ASCII                0x00 .. 0x7F
4649    Big5 (1st byte)      0xA1 .. 0xFE
4650         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4651    --------------------------
4652
4653   */
4654
4655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4656    Check if a text is encoded in SJIS.  If it is, return
4657    CATEGORY_MASK_SJIS, else return 0.  */
4658
4659 static int
4660 detect_coding_sjis (coding, detect_info)
4661      struct coding_system *coding;
4662      struct coding_detection_info *detect_info;
4663 {
4664   const unsigned char *src = coding->source, *src_base;
4665   const unsigned char *src_end = coding->source + coding->src_bytes;
4666   int multibytep = coding->src_multibyte;
4667   int consumed_chars = 0;
4668   int found = 0;
4669   int c;
4670
4671   detect_info->checked |= CATEGORY_MASK_SJIS;
4672   /* A coding system of this category is always ASCII compatible.  */
4673   src += coding->head_ascii;
4674
4675   while (1)
4676     {
4677       src_base = src;
4678       ONE_MORE_BYTE (c);
4679       if (c < 0x80)
4680         continue;
4681       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4682         {
4683           ONE_MORE_BYTE (c);
4684           if (c < 0x40 || c == 0x7F || c > 0xFC)
4685             break;
4686           found = CATEGORY_MASK_SJIS;
4687         }
4688       else if (c >= 0xA0 && c < 0xE0)
4689         found = CATEGORY_MASK_SJIS;
4690       else
4691         break;
4692     }
4693   detect_info->rejected |= CATEGORY_MASK_SJIS;
4694   return 0;
4695
4696  no_more_source:
4697   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4698     {
4699       detect_info->rejected |= CATEGORY_MASK_SJIS;
4700       return 0;
4701     }
4702   detect_info->found |= found;
4703   return 1;
4704 }
4705
4706 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4707    Check if a text is encoded in BIG5.  If it is, return
4708    CATEGORY_MASK_BIG5, else return 0.  */
4709
4710 static int
4711 detect_coding_big5 (coding, detect_info)
4712      struct coding_system *coding;
4713      struct coding_detection_info *detect_info;
4714 {
4715   const unsigned char *src = coding->source, *src_base;
4716   const unsigned char *src_end = coding->source + coding->src_bytes;
4717   int multibytep = coding->src_multibyte;
4718   int consumed_chars = 0;
4719   int found = 0;
4720   int c;
4721
4722   detect_info->checked |= CATEGORY_MASK_BIG5;
4723   /* A coding system of this category is always ASCII compatible.  */
4724   src += coding->head_ascii;
4725
4726   while (1)
4727     {
4728       src_base = src;
4729       ONE_MORE_BYTE (c);
4730       if (c < 0x80)
4731         continue;
4732       if (c >= 0xA1)
4733         {
4734           ONE_MORE_BYTE (c);
4735           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4736             return 0;
4737           found = CATEGORY_MASK_BIG5;
4738         }
4739       else
4740         break;
4741     }
4742   detect_info->rejected |= CATEGORY_MASK_BIG5;
4743   return 0;
4744
4745  no_more_source:
4746   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4747     {
4748       detect_info->rejected |= CATEGORY_MASK_BIG5;
4749       return 0;
4750     }
4751   detect_info->found |= found;
4752   return 1;
4753 }
4754
4755 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4756    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4757
4758 static void
4759 decode_coding_sjis (coding)
4760      struct coding_system *coding;
4761 {
4762   const unsigned char *src = coding->source + coding->consumed;
4763   const unsigned char *src_end = coding->source + coding->src_bytes;
4764   const unsigned char *src_base;
4765   int *charbuf = coding->charbuf + coding->charbuf_used;
4766   /* We may produce one charset annocation in one loop and one more at
4767      the end.  */
4768   int *charbuf_end
4769     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4770   int consumed_chars = 0, consumed_chars_base;
4771   int multibytep = coding->src_multibyte;
4772   struct charset *charset_roman, *charset_kanji, *charset_kana;
4773   struct charset *charset_kanji2;
4774   Lisp_Object attrs, charset_list, val;
4775   int char_offset = coding->produced_char;
4776   int last_offset = char_offset;
4777   int last_id = charset_ascii;
4778   int eol_crlf =
4779     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4780   int byte_after_cr = -1;
4781
4782   CODING_GET_INFO (coding, attrs, charset_list);
4783
4784   val = charset_list;
4785   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4786   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4787   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4788   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4789
4790   while (1)
4791     {
4792       int c, c1;
4793       struct charset *charset;
4794
4795       src_base = src;
4796       consumed_chars_base = consumed_chars;
4797
4798       if (charbuf >= charbuf_end)
4799         {
4800           if (byte_after_cr >= 0)
4801             src_base--;
4802           break;
4803         }
4804
4805       if (byte_after_cr >= 0)
4806         c = byte_after_cr, byte_after_cr = -1;
4807       else
4808         ONE_MORE_BYTE (c);
4809       if (c < 0)
4810         goto invalid_code;
4811       if (c < 0x80)
4812         {
4813           if (eol_crlf && c == '\r')
4814             ONE_MORE_BYTE (byte_after_cr);
4815           charset = charset_roman;
4816         }
4817       else if (c == 0x80 || c == 0xA0)
4818         goto invalid_code;
4819       else if (c >= 0xA1 && c <= 0xDF)
4820         {
4821           /* SJIS -> JISX0201-Kana */
4822           c &= 0x7F;
4823           charset = charset_kana;
4824         }
4825       else if (c <= 0xEF)
4826         {
4827           /* SJIS -> JISX0208 */
4828           ONE_MORE_BYTE (c1);
4829           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4830             goto invalid_code;
4831           c = (c << 8) | c1;
4832           SJIS_TO_JIS (c);
4833           charset = charset_kanji;
4834         }
4835       else if (c <= 0xFC && charset_kanji2)
4836         {
4837           /* SJIS -> JISX0213-2 */
4838           ONE_MORE_BYTE (c1);
4839           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4840             goto invalid_code;
4841           c = (c << 8) | c1;
4842           SJIS_TO_JIS2 (c);
4843           charset = charset_kanji2;
4844         }
4845       else
4846         goto invalid_code;
4847       if (charset->id != charset_ascii
4848           && last_id != charset->id)
4849         {
4850           if (last_id != charset_ascii)
4851             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4852           last_id = charset->id;
4853           last_offset = char_offset;
4854         }
4855       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4856       *charbuf++ = c;
4857       char_offset++;
4858       continue;
4859
4860     invalid_code:
4861       src = src_base;
4862       consumed_chars = consumed_chars_base;
4863       ONE_MORE_BYTE (c);
4864       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4865       char_offset++;
4866       coding->errors++;
4867     }
4868
4869  no_more_source:
4870   if (last_id != charset_ascii)
4871     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4872   coding->consumed_char += consumed_chars_base;
4873   coding->consumed = src_base - coding->source;
4874   coding->charbuf_used = charbuf - coding->charbuf;
4875 }
4876
4877 static void
4878 decode_coding_big5 (coding)
4879      struct coding_system *coding;
4880 {
4881   const unsigned char *src = coding->source + coding->consumed;
4882   const unsigned char *src_end = coding->source + coding->src_bytes;
4883   const unsigned char *src_base;
4884   int *charbuf = coding->charbuf + coding->charbuf_used;
4885   /* We may produce one charset annocation in one loop and one more at
4886      the end.  */
4887   int *charbuf_end
4888     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4889   int consumed_chars = 0, consumed_chars_base;
4890   int multibytep = coding->src_multibyte;
4891   struct charset *charset_roman, *charset_big5;
4892   Lisp_Object attrs, charset_list, val;
4893   int char_offset = coding->produced_char;
4894   int last_offset = char_offset;
4895   int last_id = charset_ascii;
4896   int eol_crlf =
4897     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4898   int byte_after_cr = -1;
4899
4900   CODING_GET_INFO (coding, attrs, charset_list);
4901   val = charset_list;
4902   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4903   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4904
4905   while (1)
4906     {
4907       int c, c1;
4908       struct charset *charset;
4909
4910       src_base = src;
4911       consumed_chars_base = consumed_chars;
4912
4913       if (charbuf >= charbuf_end)
4914         {
4915           if (byte_after_cr >= 0)
4916             src_base--;
4917           break;
4918         }
4919
4920       if (byte_after_cr >= 0)
4921         c = byte_after_cr, byte_after_cr = -1;
4922       else
4923         ONE_MORE_BYTE (c);
4924
4925       if (c < 0)
4926         goto invalid_code;
4927       if (c < 0x80)
4928         {
4929           if (eol_crlf && c == '\r')
4930             ONE_MORE_BYTE (byte_after_cr);
4931           charset = charset_roman;
4932         }
4933       else
4934         {
4935           /* BIG5 -> Big5 */
4936           if (c < 0xA1 || c > 0xFE)
4937             goto invalid_code;
4938           ONE_MORE_BYTE (c1);
4939           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4940             goto invalid_code;
4941           c = c << 8 | c1;
4942           charset = charset_big5;
4943         }
4944       if (charset->id != charset_ascii
4945           && last_id != charset->id)
4946         {
4947           if (last_id != charset_ascii)
4948             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4949           last_id = charset->id;
4950           last_offset = char_offset;
4951         }
4952       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4953       *charbuf++ = c;
4954       char_offset++;
4955       continue;
4956
4957     invalid_code:
4958       src = src_base;
4959       consumed_chars = consumed_chars_base;
4960       ONE_MORE_BYTE (c);
4961       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4962       char_offset++;
4963       coding->errors++;
4964     }
4965
4966  no_more_source:
4967   if (last_id != charset_ascii)
4968     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4969   coding->consumed_char += consumed_chars_base;
4970   coding->consumed = src_base - coding->source;
4971   coding->charbuf_used = charbuf - coding->charbuf;
4972 }
4973
4974 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4975    This function can encode charsets `ascii', `katakana-jisx0201',
4976    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4977    are sure that all these charsets are registered as official charset
4978    (i.e. do not have extended leading-codes).  Characters of other
4979    charsets are produced without any encoding.  If SJIS_P is 1, encode
4980    SJIS text, else encode BIG5 text.  */
4981
4982 static int
4983 encode_coding_sjis (coding)
4984      struct coding_system *coding;
4985 {
4986   int multibytep = coding->dst_multibyte;
4987   int *charbuf = coding->charbuf;
4988   int *charbuf_end = charbuf + coding->charbuf_used;
4989   unsigned char *dst = coding->destination + coding->produced;
4990   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4991   int safe_room = 4;
4992   int produced_chars = 0;
4993   Lisp_Object attrs, charset_list, val;
4994   int ascii_compatible;
4995   struct charset *charset_roman, *charset_kanji, *charset_kana;
4996   struct charset *charset_kanji2;
4997   int c;
4998
4999   CODING_GET_INFO (coding, attrs, charset_list);
5000   val = charset_list;
5001   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5002   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5003   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5004   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5005
5006   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5007
5008   while (charbuf < charbuf_end)
5009     {
5010       ASSURE_DESTINATION (safe_room);
5011       c = *charbuf++;
5012       /* Now encode the character C.  */
5013       if (ASCII_CHAR_P (c) && ascii_compatible)
5014         EMIT_ONE_ASCII_BYTE (c);
5015       else if (CHAR_BYTE8_P (c))
5016         {
5017           c = CHAR_TO_BYTE8 (c);
5018           EMIT_ONE_BYTE (c);
5019         }
5020       else
5021         {
5022           unsigned code;
5023           struct charset *charset = char_charset (c, charset_list, &code);
5024
5025           if (!charset)
5026             {
5027               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5028                 {
5029                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5030                   charset = CHARSET_FROM_ID (charset_ascii);
5031                 }
5032               else
5033                 {
5034                   c = coding->default_char;
5035                   charset = char_charset (c, charset_list, &code);
5036                 }
5037             }
5038           if (code == CHARSET_INVALID_CODE (charset))
5039             abort ();
5040           if (charset == charset_kanji)
5041             {
5042               int c1, c2;
5043               JIS_TO_SJIS (code);
5044               c1 = code >> 8, c2 = code & 0xFF;
5045               EMIT_TWO_BYTES (c1, c2);
5046             }
5047           else if (charset == charset_kana)
5048             EMIT_ONE_BYTE (code | 0x80);
5049           else if (charset_kanji2 && charset == charset_kanji2)
5050             {
5051               int c1, c2;
5052
5053               c1 = code >> 8;
5054               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
5055                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5056                 {
5057                   JIS_TO_SJIS2 (code);
5058                   c1 = code >> 8, c2 = code & 0xFF;
5059                   EMIT_TWO_BYTES (c1, c2);
5060                 }
5061               else
5062                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5063             }
5064           else
5065             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5066         }
5067     }
5068   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5069   coding->produced_char += produced_chars;
5070   coding->produced = dst - coding->destination;
5071   return 0;
5072 }
5073
5074 static int
5075 encode_coding_big5 (coding)
5076      struct coding_system *coding;
5077 {
5078   int multibytep = coding->dst_multibyte;
5079   int *charbuf = coding->charbuf;
5080   int *charbuf_end = charbuf + coding->charbuf_used;
5081   unsigned char *dst = coding->destination + coding->produced;
5082   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5083   int safe_room = 4;
5084   int produced_chars = 0;
5085   Lisp_Object attrs, charset_list, val;
5086   int ascii_compatible;
5087   struct charset *charset_roman, *charset_big5;
5088   int c;
5089
5090   CODING_GET_INFO (coding, attrs, charset_list);
5091   val = charset_list;
5092   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5093   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5094   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5095
5096   while (charbuf < charbuf_end)
5097     {
5098       ASSURE_DESTINATION (safe_room);
5099       c = *charbuf++;
5100       /* Now encode the character C.  */
5101       if (ASCII_CHAR_P (c) && ascii_compatible)
5102         EMIT_ONE_ASCII_BYTE (c);
5103       else if (CHAR_BYTE8_P (c))
5104         {
5105           c = CHAR_TO_BYTE8 (c);
5106           EMIT_ONE_BYTE (c);
5107         }
5108       else
5109         {
5110           unsigned code;
5111           struct charset *charset = char_charset (c, charset_list, &code);
5112
5113           if (! charset)
5114             {
5115               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5116                 {
5117                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5118                   charset = CHARSET_FROM_ID (charset_ascii);
5119                 }
5120               else
5121                 {
5122                   c = coding->default_char;
5123                   charset = char_charset (c, charset_list, &code);
5124                 }
5125             }
5126           if (code == CHARSET_INVALID_CODE (charset))
5127             abort ();
5128           if (charset == charset_big5)
5129             {
5130               int c1, c2;
5131
5132               c1 = code >> 8, c2 = code & 0xFF;
5133               EMIT_TWO_BYTES (c1, c2);
5134             }
5135           else
5136             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5137         }
5138     }
5139   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5140   coding->produced_char += produced_chars;
5141   coding->produced = dst - coding->destination;
5142   return 0;
5143 }
5144
5145 \f
5146 /*** 10. CCL handlers ***/
5147
5148 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5149    Check if a text is encoded in a coding system of which
5150    encoder/decoder are written in CCL program.  If it is, return
5151    CATEGORY_MASK_CCL, else return 0.  */
5152
5153 static int
5154 detect_coding_ccl (coding, detect_info)
5155      struct coding_system *coding;
5156      struct coding_detection_info *detect_info;
5157 {
5158   const unsigned char *src = coding->source, *src_base;
5159   const unsigned char *src_end = coding->source + coding->src_bytes;
5160   int multibytep = coding->src_multibyte;
5161   int consumed_chars = 0;
5162   int found = 0;
5163   unsigned char *valids;
5164   int head_ascii = coding->head_ascii;
5165   Lisp_Object attrs;
5166
5167   detect_info->checked |= CATEGORY_MASK_CCL;
5168
5169   coding = &coding_categories[coding_category_ccl];
5170   valids = CODING_CCL_VALIDS (coding);
5171   attrs = CODING_ID_ATTRS (coding->id);
5172   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5173     src += head_ascii;
5174
5175   while (1)
5176     {
5177       int c;
5178
5179       src_base = src;
5180       ONE_MORE_BYTE (c);
5181       if (c < 0 || ! valids[c])
5182         break;
5183       if ((valids[c] > 1))
5184         found = CATEGORY_MASK_CCL;
5185     }
5186   detect_info->rejected |= CATEGORY_MASK_CCL;
5187   return 0;
5188
5189  no_more_source:
5190   detect_info->found |= found;
5191   return 1;
5192 }
5193
5194 static void
5195 decode_coding_ccl (coding)
5196      struct coding_system *coding;
5197 {
5198   const unsigned char *src = coding->source + coding->consumed;
5199   const unsigned char *src_end = coding->source + coding->src_bytes;
5200   int *charbuf = coding->charbuf + coding->charbuf_used;
5201   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5202   int consumed_chars = 0;
5203   int multibytep = coding->src_multibyte;
5204   struct ccl_program ccl;
5205   int source_charbuf[1024];
5206   int source_byteidx[1024];
5207   Lisp_Object attrs, charset_list;
5208
5209   CODING_GET_INFO (coding, attrs, charset_list);
5210   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
5211
5212   while (src < src_end)
5213     {
5214       const unsigned char *p = src;
5215       int *source, *source_end;
5216       int i = 0;
5217
5218       if (multibytep)
5219         while (i < 1024 && p < src_end)
5220           {
5221             source_byteidx[i] = p - src;
5222             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5223           }
5224       else
5225         while (i < 1024 && p < src_end)
5226           source_charbuf[i++] = *p++;
5227
5228       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5229         ccl.last_block = 1;
5230
5231       source = source_charbuf;
5232       source_end = source + i;
5233       while (source < source_end)
5234         {
5235           ccl_driver (&ccl, source, charbuf,
5236                       source_end - source, charbuf_end - charbuf,
5237                       charset_list);
5238           source += ccl.consumed;
5239           charbuf += ccl.produced;
5240           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
5241             break;
5242         }
5243       if (source < source_end)
5244         src += source_byteidx[source - source_charbuf];
5245       else
5246         src = p;
5247       consumed_chars += source - source_charbuf;
5248
5249       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
5250           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
5251         break;
5252     }
5253
5254   switch (ccl.status)
5255     {
5256     case CCL_STAT_SUSPEND_BY_SRC:
5257       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5258       break;
5259     case CCL_STAT_SUSPEND_BY_DST:
5260       break;
5261     case CCL_STAT_QUIT:
5262     case CCL_STAT_INVALID_CMD:
5263       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5264       break;
5265     default:
5266       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5267       break;
5268     }
5269   coding->consumed_char += consumed_chars;
5270   coding->consumed = src - coding->source;
5271   coding->charbuf_used = charbuf - coding->charbuf;
5272 }
5273
5274 static int
5275 encode_coding_ccl (coding)
5276      struct coding_system *coding;
5277 {
5278   struct ccl_program ccl;
5279   int multibytep = coding->dst_multibyte;
5280   int *charbuf = coding->charbuf;
5281   int *charbuf_end = charbuf + coding->charbuf_used;
5282   unsigned char *dst = coding->destination + coding->produced;
5283   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5284   int destination_charbuf[1024];
5285   int i, produced_chars = 0;
5286   Lisp_Object attrs, charset_list;
5287
5288   CODING_GET_INFO (coding, attrs, charset_list);
5289   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5290
5291   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5292   ccl.dst_multibyte = coding->dst_multibyte;
5293
5294   while (charbuf < charbuf_end)
5295     {
5296       ccl_driver (&ccl, charbuf, destination_charbuf,
5297                   charbuf_end - charbuf, 1024, charset_list);
5298       if (multibytep)
5299         {
5300           ASSURE_DESTINATION (ccl.produced * 2);
5301           for (i = 0; i < ccl.produced; i++)
5302             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5303         }
5304       else
5305         {
5306           ASSURE_DESTINATION (ccl.produced);
5307           for (i = 0; i < ccl.produced; i++)
5308             *dst++ = destination_charbuf[i] & 0xFF;
5309           produced_chars += ccl.produced;
5310         }
5311       charbuf += ccl.consumed;
5312       if (ccl.status == CCL_STAT_QUIT
5313           || ccl.status == CCL_STAT_INVALID_CMD)
5314         break;
5315     }
5316
5317   switch (ccl.status)
5318     {
5319     case CCL_STAT_SUSPEND_BY_SRC:
5320       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5321       break;
5322     case CCL_STAT_SUSPEND_BY_DST:
5323       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5324       break;
5325     case CCL_STAT_QUIT:
5326     case CCL_STAT_INVALID_CMD:
5327       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5328       break;
5329     default:
5330       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5331       break;
5332     }
5333
5334   coding->produced_char += produced_chars;
5335   coding->produced = dst - coding->destination;
5336   return 0;
5337 }
5338
5339
5340 \f
5341 /*** 10, 11. no-conversion handlers ***/
5342
5343 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5344
5345 static void
5346 decode_coding_raw_text (coding)
5347      struct coding_system *coding;
5348 {
5349   int eol_crlf =
5350     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5351
5352   coding->chars_at_source = 1;
5353   coding->consumed_char = coding->src_chars;
5354   coding->consumed = coding->src_bytes;
5355   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5356     {
5357       coding->consumed_char--;
5358       coding->consumed--;
5359       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5360     }
5361   else
5362     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5363 }
5364
5365 static int
5366 encode_coding_raw_text (coding)
5367      struct coding_system *coding;
5368 {
5369   int multibytep = coding->dst_multibyte;
5370   int *charbuf = coding->charbuf;
5371   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5372   unsigned char *dst = coding->destination + coding->produced;
5373   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5374   int produced_chars = 0;
5375   int c;
5376
5377   if (multibytep)
5378     {
5379       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5380
5381       if (coding->src_multibyte)
5382         while (charbuf < charbuf_end)
5383           {
5384             ASSURE_DESTINATION (safe_room);
5385             c = *charbuf++;
5386             if (ASCII_CHAR_P (c))
5387               EMIT_ONE_ASCII_BYTE (c);
5388             else if (CHAR_BYTE8_P (c))
5389               {
5390                 c = CHAR_TO_BYTE8 (c);
5391                 EMIT_ONE_BYTE (c);
5392               }
5393             else
5394               {
5395                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5396
5397                 CHAR_STRING_ADVANCE (c, p1);
5398                 while (p0 < p1)
5399                   {
5400                     EMIT_ONE_BYTE (*p0);
5401                     p0++;
5402                   }
5403               }
5404           }
5405       else
5406         while (charbuf < charbuf_end)
5407           {
5408             ASSURE_DESTINATION (safe_room);
5409             c = *charbuf++;
5410             EMIT_ONE_BYTE (c);
5411           }
5412     }
5413   else
5414     {
5415       if (coding->src_multibyte)
5416         {
5417           int safe_room = MAX_MULTIBYTE_LENGTH;
5418
5419           while (charbuf < charbuf_end)
5420             {
5421               ASSURE_DESTINATION (safe_room);
5422               c = *charbuf++;
5423               if (ASCII_CHAR_P (c))
5424                 *dst++ = c;
5425               else if (CHAR_BYTE8_P (c))
5426                 *dst++ = CHAR_TO_BYTE8 (c);
5427               else
5428                 CHAR_STRING_ADVANCE (c, dst);
5429             }
5430         }
5431       else
5432         {
5433           ASSURE_DESTINATION (charbuf_end - charbuf);
5434           while (charbuf < charbuf_end && dst < dst_end)
5435             *dst++ = *charbuf++;
5436         }
5437       produced_chars = dst - (coding->destination + coding->produced);
5438     }
5439   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5440   coding->produced_char += produced_chars;
5441   coding->produced = dst - coding->destination;
5442   return 0;
5443 }
5444
5445 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5446    Check if a text is encoded in a charset-based coding system.  If it
5447    is, return 1, else return 0.  */
5448
5449 static int
5450 detect_coding_charset (coding, detect_info)
5451      struct coding_system *coding;
5452      struct coding_detection_info *detect_info;
5453 {
5454   const unsigned char *src = coding->source, *src_base;
5455   const unsigned char *src_end = coding->source + coding->src_bytes;
5456   int multibytep = coding->src_multibyte;
5457   int consumed_chars = 0;
5458   Lisp_Object attrs, valids, name;
5459   int found = 0;
5460   int head_ascii = coding->head_ascii;
5461   int check_latin_extra = 0;
5462
5463   detect_info->checked |= CATEGORY_MASK_CHARSET;
5464
5465   coding = &coding_categories[coding_category_charset];
5466   attrs = CODING_ID_ATTRS (coding->id);
5467   valids = AREF (attrs, coding_attr_charset_valids);
5468   name = CODING_ID_NAME (coding->id);
5469   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5470                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5471       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5472                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5473     check_latin_extra = 1;
5474
5475   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5476     src += head_ascii;
5477
5478   while (1)
5479     {
5480       int c;
5481       Lisp_Object val;
5482       struct charset *charset;
5483       int dim, idx;
5484
5485       src_base = src;
5486       ONE_MORE_BYTE (c);
5487       if (c < 0)
5488         continue;
5489       val = AREF (valids, c);
5490       if (NILP (val))
5491         break;
5492       if (c >= 0x80)
5493         {
5494           if (c < 0xA0
5495               && check_latin_extra
5496               && (!VECTORP (Vlatin_extra_code_table)
5497                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5498             break;
5499           found = CATEGORY_MASK_CHARSET;
5500         }
5501       if (INTEGERP (val))
5502         {
5503           charset = CHARSET_FROM_ID (XFASTINT (val));
5504           dim = CHARSET_DIMENSION (charset);
5505           for (idx = 1; idx < dim; idx++)
5506             {
5507               if (src == src_end)
5508                 goto too_short;
5509               ONE_MORE_BYTE (c);
5510               if (c < charset->code_space[(dim - 1 - idx) * 2]
5511                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5512                 break;
5513             }
5514           if (idx < dim)
5515             break;
5516         }
5517       else
5518         {
5519           idx = 1;
5520           for (; CONSP (val); val = XCDR (val))
5521             {
5522               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5523               dim = CHARSET_DIMENSION (charset);
5524               while (idx < dim)
5525                 {
5526                   if (src == src_end)
5527                     goto too_short;
5528                   ONE_MORE_BYTE (c);
5529                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5530                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5531                     break;
5532                   idx++;
5533                 }
5534               if (idx == dim)
5535                 {
5536                   val = Qnil;
5537                   break;
5538                 }
5539             }
5540           if (CONSP (val))
5541             break;
5542         }
5543     }
5544  too_short:
5545   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5546   return 0;
5547
5548  no_more_source:
5549   detect_info->found |= found;
5550   return 1;
5551 }
5552
5553 static void
5554 decode_coding_charset (coding)
5555      struct coding_system *coding;
5556 {
5557   const unsigned char *src = coding->source + coding->consumed;
5558   const unsigned char *src_end = coding->source + coding->src_bytes;
5559   const unsigned char *src_base;
5560   int *charbuf = coding->charbuf + coding->charbuf_used;
5561   /* We may produce one charset annocation in one loop and one more at
5562      the end.  */
5563   int *charbuf_end
5564     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5565   int consumed_chars = 0, consumed_chars_base;
5566   int multibytep = coding->src_multibyte;
5567   Lisp_Object attrs, charset_list, valids;
5568   int char_offset = coding->produced_char;
5569   int last_offset = char_offset;
5570   int last_id = charset_ascii;
5571   int eol_crlf =
5572     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5573   int byte_after_cr = -1;
5574
5575   CODING_GET_INFO (coding, attrs, charset_list);
5576   valids = AREF (attrs, coding_attr_charset_valids);
5577
5578   while (1)
5579     {
5580       int c;
5581       Lisp_Object val;
5582       struct charset *charset;
5583       int dim;
5584       int len = 1;
5585       unsigned code;
5586
5587       src_base = src;
5588       consumed_chars_base = consumed_chars;
5589
5590       if (charbuf >= charbuf_end)
5591         {
5592           if (byte_after_cr >= 0)
5593             src_base--;
5594           break;
5595         }
5596
5597       if (byte_after_cr >= 0)
5598         {
5599           c = byte_after_cr;
5600           byte_after_cr = -1;
5601         }
5602       else
5603         {
5604           ONE_MORE_BYTE (c);
5605           if (eol_crlf && c == '\r')
5606             ONE_MORE_BYTE (byte_after_cr);
5607         }
5608       if (c < 0)
5609         goto invalid_code;
5610       code = c;
5611
5612       val = AREF (valids, c);
5613       if (! INTEGERP (val) && ! CONSP (val))
5614         goto invalid_code;
5615       if (INTEGERP (val))
5616         {
5617           charset = CHARSET_FROM_ID (XFASTINT (val));
5618           dim = CHARSET_DIMENSION (charset);
5619           while (len < dim)
5620             {
5621               ONE_MORE_BYTE (c);
5622               code = (code << 8) | c;
5623               len++;
5624             }
5625           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5626                               charset, code, c);
5627         }
5628       else
5629         {
5630           /* VAL is a list of charset IDs.  It is assured that the
5631              list is sorted by charset dimensions (smaller one
5632              comes first).  */
5633           while (CONSP (val))
5634             {
5635               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5636               dim = CHARSET_DIMENSION (charset);
5637               while (len < dim)
5638                 {
5639                   ONE_MORE_BYTE (c);
5640                   code = (code << 8) | c;
5641                   len++;
5642                 }
5643               CODING_DECODE_CHAR (coding, src, src_base,
5644                                   src_end, charset, code, c);
5645               if (c >= 0)
5646                 break;
5647               val = XCDR (val);
5648             }
5649         }
5650       if (c < 0)
5651         goto invalid_code;
5652       if (charset->id != charset_ascii
5653           && last_id != charset->id)
5654         {
5655           if (last_id != charset_ascii)
5656             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5657           last_id = charset->id;
5658           last_offset = char_offset;
5659         }
5660
5661       *charbuf++ = c;
5662       char_offset++;
5663       continue;
5664
5665     invalid_code:
5666       src = src_base;
5667       consumed_chars = consumed_chars_base;
5668       ONE_MORE_BYTE (c);
5669       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5670       char_offset++;
5671       coding->errors++;
5672     }
5673
5674  no_more_source:
5675   if (last_id != charset_ascii)
5676     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5677   coding->consumed_char += consumed_chars_base;
5678   coding->consumed = src_base - coding->source;
5679   coding->charbuf_used = charbuf - coding->charbuf;
5680 }
5681
5682 static int
5683 encode_coding_charset (coding)
5684      struct coding_system *coding;
5685 {
5686   int multibytep = coding->dst_multibyte;
5687   int *charbuf = coding->charbuf;
5688   int *charbuf_end = charbuf + coding->charbuf_used;
5689   unsigned char *dst = coding->destination + coding->produced;
5690   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5691   int safe_room = MAX_MULTIBYTE_LENGTH;
5692   int produced_chars = 0;
5693   Lisp_Object attrs, charset_list;
5694   int ascii_compatible;
5695   int c;
5696
5697   CODING_GET_INFO (coding, attrs, charset_list);
5698   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5699
5700   while (charbuf < charbuf_end)
5701     {
5702       struct charset *charset;
5703       unsigned code;
5704
5705       ASSURE_DESTINATION (safe_room);
5706       c = *charbuf++;
5707       if (ascii_compatible && ASCII_CHAR_P (c))
5708         EMIT_ONE_ASCII_BYTE (c);
5709       else if (CHAR_BYTE8_P (c))
5710         {
5711           c = CHAR_TO_BYTE8 (c);
5712           EMIT_ONE_BYTE (c);
5713         }
5714       else
5715         {
5716           charset = char_charset (c, charset_list, &code);
5717           if (charset)
5718             {
5719               if (CHARSET_DIMENSION (charset) == 1)
5720                 EMIT_ONE_BYTE (code);
5721               else if (CHARSET_DIMENSION (charset) == 2)
5722                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5723               else if (CHARSET_DIMENSION (charset) == 3)
5724                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5725               else
5726                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5727                                  (code >> 8) & 0xFF, code & 0xFF);
5728             }
5729           else
5730             {
5731               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5732                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5733               else
5734                 c = coding->default_char;
5735               EMIT_ONE_BYTE (c);
5736             }
5737         }
5738     }
5739
5740   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5741   coding->produced_char += produced_chars;
5742   coding->produced = dst - coding->destination;
5743   return 0;
5744 }
5745
5746 \f
5747 /*** 7. C library functions ***/
5748
5749 /* Setup coding context CODING from information about CODING_SYSTEM.
5750    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5751    CODING_SYSTEM is invalid, signal an error.  */
5752
5753 void
5754 setup_coding_system (coding_system, coding)
5755      Lisp_Object coding_system;
5756      struct coding_system *coding;
5757 {
5758   Lisp_Object attrs;
5759   Lisp_Object eol_type;
5760   Lisp_Object coding_type;
5761   Lisp_Object val;
5762
5763   if (NILP (coding_system))
5764     coding_system = Qundecided;
5765
5766   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5767
5768   attrs = CODING_ID_ATTRS (coding->id);
5769   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5770
5771   coding->mode = 0;
5772   coding->head_ascii = -1;
5773   if (VECTORP (eol_type))
5774     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5775                             | CODING_REQUIRE_DETECTION_MASK);
5776   else if (! EQ (eol_type, Qunix))
5777     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5778                             | CODING_REQUIRE_ENCODING_MASK);
5779   else
5780     coding->common_flags = 0;
5781   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5782     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5783   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5784     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5785   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5786     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5787
5788   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5789   coding->max_charset_id = SCHARS (val) - 1;
5790   coding->safe_charsets = SDATA (val);
5791   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5792
5793   coding_type = CODING_ATTR_TYPE (attrs);
5794   if (EQ (coding_type, Qundecided))
5795     {
5796       coding->detector = NULL;
5797       coding->decoder = decode_coding_raw_text;
5798       coding->encoder = encode_coding_raw_text;
5799       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5800     }
5801   else if (EQ (coding_type, Qiso_2022))
5802     {
5803       int i;
5804       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5805
5806       /* Invoke graphic register 0 to plane 0.  */
5807       CODING_ISO_INVOCATION (coding, 0) = 0;
5808       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5809       CODING_ISO_INVOCATION (coding, 1)
5810         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5811       /* Setup the initial status of designation.  */
5812       for (i = 0; i < 4; i++)
5813         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5814       /* Not single shifting initially.  */
5815       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5816       /* Beginning of buffer should also be regarded as bol. */
5817       CODING_ISO_BOL (coding) = 1;
5818       coding->detector = detect_coding_iso_2022;
5819       coding->decoder = decode_coding_iso_2022;
5820       coding->encoder = encode_coding_iso_2022;
5821       if (flags & CODING_ISO_FLAG_SAFE)
5822         coding->mode |= CODING_MODE_SAFE_ENCODING;
5823       coding->common_flags
5824         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5825             | CODING_REQUIRE_FLUSHING_MASK);
5826       if (flags & CODING_ISO_FLAG_COMPOSITION)
5827         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5828       if (flags & CODING_ISO_FLAG_DESIGNATION)
5829         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5830       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5831         {
5832           setup_iso_safe_charsets (attrs);
5833           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5834           coding->max_charset_id = SCHARS (val) - 1;
5835           coding->safe_charsets = SDATA (val);
5836         }
5837       CODING_ISO_FLAGS (coding) = flags;
5838       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5839       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5840       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5841       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5842     }
5843   else if (EQ (coding_type, Qcharset))
5844     {
5845       coding->detector = detect_coding_charset;
5846       coding->decoder = decode_coding_charset;
5847       coding->encoder = encode_coding_charset;
5848       coding->common_flags
5849         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5850     }
5851   else if (EQ (coding_type, Qutf_8))
5852     {
5853       val = AREF (attrs, coding_attr_utf_bom);
5854       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5855                                    : EQ (val, Qt) ? utf_with_bom
5856                                    : utf_without_bom);
5857       coding->detector = detect_coding_utf_8;
5858       coding->decoder = decode_coding_utf_8;
5859       coding->encoder = encode_coding_utf_8;
5860       coding->common_flags
5861         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5862       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5863         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5864     }
5865   else if (EQ (coding_type, Qutf_16))
5866     {
5867       val = AREF (attrs, coding_attr_utf_bom);
5868       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5869                                     : EQ (val, Qt) ? utf_with_bom
5870                                     : utf_without_bom);
5871       val = AREF (attrs, coding_attr_utf_16_endian);
5872       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5873                                        : utf_16_little_endian);
5874       CODING_UTF_16_SURROGATE (coding) = 0;
5875       coding->detector = detect_coding_utf_16;
5876       coding->decoder = decode_coding_utf_16;
5877       coding->encoder = encode_coding_utf_16;
5878       coding->common_flags
5879         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5880       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5881         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5882     }
5883   else if (EQ (coding_type, Qccl))
5884     {
5885       coding->detector = detect_coding_ccl;
5886       coding->decoder = decode_coding_ccl;
5887       coding->encoder = encode_coding_ccl;
5888       coding->common_flags
5889         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5890             | CODING_REQUIRE_FLUSHING_MASK);
5891     }
5892   else if (EQ (coding_type, Qemacs_mule))
5893     {
5894       coding->detector = detect_coding_emacs_mule;
5895       coding->decoder = decode_coding_emacs_mule;
5896       coding->encoder = encode_coding_emacs_mule;
5897       coding->common_flags
5898         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5899       coding->spec.emacs_mule.full_support = 1;
5900       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5901           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5902         {
5903           Lisp_Object tail, safe_charsets;
5904           int max_charset_id = 0;
5905
5906           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5907                tail = XCDR (tail))
5908             if (max_charset_id < XFASTINT (XCAR (tail)))
5909               max_charset_id = XFASTINT (XCAR (tail));
5910           safe_charsets = make_uninit_string (max_charset_id + 1);
5911           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5912           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5913                tail = XCDR (tail))
5914             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5915           coding->max_charset_id = max_charset_id;
5916           coding->safe_charsets = SDATA (safe_charsets);
5917           coding->spec.emacs_mule.full_support = 1;
5918         }
5919       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5920       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5921     }
5922   else if (EQ (coding_type, Qshift_jis))
5923     {
5924       coding->detector = detect_coding_sjis;
5925       coding->decoder = decode_coding_sjis;
5926       coding->encoder = encode_coding_sjis;
5927       coding->common_flags
5928         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5929     }
5930   else if (EQ (coding_type, Qbig5))
5931     {
5932       coding->detector = detect_coding_big5;
5933       coding->decoder = decode_coding_big5;
5934       coding->encoder = encode_coding_big5;
5935       coding->common_flags
5936         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5937     }
5938   else                          /* EQ (coding_type, Qraw_text) */
5939     {
5940       coding->detector = NULL;
5941       coding->decoder = decode_coding_raw_text;
5942       coding->encoder = encode_coding_raw_text;
5943       if (! EQ (eol_type, Qunix))
5944         {
5945           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5946           if (! VECTORP (eol_type))
5947             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5948         }
5949
5950     }
5951
5952   return;
5953 }
5954
5955 /* Return a list of charsets supported by CODING.  */
5956
5957 Lisp_Object
5958 coding_charset_list (coding)
5959      struct coding_system *coding;
5960 {
5961   Lisp_Object attrs, charset_list;
5962
5963   CODING_GET_INFO (coding, attrs, charset_list);
5964   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5965     {
5966       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5967
5968       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5969         charset_list = Viso_2022_charset_list;
5970     }
5971   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5972     {
5973       charset_list = Vemacs_mule_charset_list;
5974     }
5975   return charset_list;
5976 }
5977
5978
5979 /* Return a list of charsets supported by CODING-SYSTEM.  */
5980
5981 Lisp_Object
5982 coding_system_charset_list (coding_system)
5983      Lisp_Object coding_system;
5984 {
5985   int id;
5986   Lisp_Object attrs, charset_list;
5987
5988   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5989   attrs = CODING_ID_ATTRS (id);
5990
5991   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5992     {
5993       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5994
5995       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5996         charset_list = Viso_2022_charset_list;
5997       else
5998         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5999     }
6000   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6001     {
6002       charset_list = Vemacs_mule_charset_list;
6003     }
6004   else
6005     {
6006       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6007     }
6008   return charset_list;
6009 }
6010
6011
6012 /* Return raw-text or one of its subsidiaries that has the same
6013    eol_type as CODING-SYSTEM.  */
6014
6015 Lisp_Object
6016 raw_text_coding_system (coding_system)
6017      Lisp_Object coding_system;
6018 {
6019   Lisp_Object spec, attrs;
6020   Lisp_Object eol_type, raw_text_eol_type;
6021
6022   if (NILP (coding_system))
6023     return Qraw_text;
6024   spec = CODING_SYSTEM_SPEC (coding_system);
6025   attrs = AREF (spec, 0);
6026
6027   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6028     return coding_system;
6029
6030   eol_type = AREF (spec, 2);
6031   if (VECTORP (eol_type))
6032     return Qraw_text;
6033   spec = CODING_SYSTEM_SPEC (Qraw_text);
6034   raw_text_eol_type = AREF (spec, 2);
6035   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6036           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6037           : AREF (raw_text_eol_type, 2));
6038 }
6039
6040
6041 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6042    does, return one of the subsidiary that has the same eol-spec as
6043    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6044    inherit end-of-line format from the system's setting
6045    (system_eol_type).  */
6046
6047 Lisp_Object
6048 coding_inherit_eol_type (coding_system, parent)
6049      Lisp_Object coding_system, parent;
6050 {
6051   Lisp_Object spec, eol_type;
6052
6053   if (NILP (coding_system))
6054     coding_system = Qraw_text;
6055   spec = CODING_SYSTEM_SPEC (coding_system);
6056   eol_type = AREF (spec, 2);
6057   if (VECTORP (eol_type))
6058     {
6059       Lisp_Object parent_eol_type;
6060
6061       if (! NILP (parent))
6062         {
6063           Lisp_Object parent_spec;
6064
6065           parent_spec = CODING_SYSTEM_SPEC (parent);
6066           parent_eol_type = AREF (parent_spec, 2);
6067         }
6068       else
6069         parent_eol_type = system_eol_type;
6070       if (EQ (parent_eol_type, Qunix))
6071         coding_system = AREF (eol_type, 0);
6072       else if (EQ (parent_eol_type, Qdos))
6073         coding_system = AREF (eol_type, 1);
6074       else if (EQ (parent_eol_type, Qmac))
6075         coding_system = AREF (eol_type, 2);
6076     }
6077   return coding_system;
6078 }
6079
6080 /* Emacs has a mechanism to automatically detect a coding system if it
6081    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6082    it's impossible to distinguish some coding systems accurately
6083    because they use the same range of codes.  So, at first, coding
6084    systems are categorized into 7, those are:
6085
6086    o coding-category-emacs-mule
6087
6088         The category for a coding system which has the same code range
6089         as Emacs' internal format.  Assigned the coding-system (Lisp
6090         symbol) `emacs-mule' by default.
6091
6092    o coding-category-sjis
6093
6094         The category for a coding system which has the same code range
6095         as SJIS.  Assigned the coding-system (Lisp
6096         symbol) `japanese-shift-jis' by default.
6097
6098    o coding-category-iso-7
6099
6100         The category for a coding system which has the same code range
6101         as ISO2022 of 7-bit environment.  This doesn't use any locking
6102         shift and single shift functions.  This can encode/decode all
6103         charsets.  Assigned the coding-system (Lisp symbol)
6104         `iso-2022-7bit' by default.
6105
6106    o coding-category-iso-7-tight
6107
6108         Same as coding-category-iso-7 except that this can
6109         encode/decode only the specified charsets.
6110
6111    o coding-category-iso-8-1
6112
6113         The category for a coding system which has the same code range
6114         as ISO2022 of 8-bit environment and graphic plane 1 used only
6115         for DIMENSION1 charset.  This doesn't use any locking shift
6116         and single shift functions.  Assigned the coding-system (Lisp
6117         symbol) `iso-latin-1' by default.
6118
6119    o coding-category-iso-8-2
6120
6121         The category for a coding system which has the same code range
6122         as ISO2022 of 8-bit environment and graphic plane 1 used only
6123         for DIMENSION2 charset.  This doesn't use any locking shift
6124         and single shift functions.  Assigned the coding-system (Lisp
6125         symbol) `japanese-iso-8bit' by default.
6126
6127    o coding-category-iso-7-else
6128
6129         The category for a coding system which has the same code range
6130         as ISO2022 of 7-bit environemnt but uses locking shift or
6131         single shift functions.  Assigned the coding-system (Lisp
6132         symbol) `iso-2022-7bit-lock' by default.
6133
6134    o coding-category-iso-8-else
6135
6136         The category for a coding system which has the same code range
6137         as ISO2022 of 8-bit environemnt but uses locking shift or
6138         single shift functions.  Assigned the coding-system (Lisp
6139         symbol) `iso-2022-8bit-ss2' by default.
6140
6141    o coding-category-big5
6142
6143         The category for a coding system which has the same code range
6144         as BIG5.  Assigned the coding-system (Lisp symbol)
6145         `cn-big5' by default.
6146
6147    o coding-category-utf-8
6148
6149         The category for a coding system which has the same code range
6150         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6151         symbol) `utf-8' by default.
6152
6153    o coding-category-utf-16-be
6154
6155         The category for a coding system in which a text has an
6156         Unicode signature (cf. Unicode Standard) in the order of BIG
6157         endian at the head.  Assigned the coding-system (Lisp symbol)
6158         `utf-16-be' by default.
6159
6160    o coding-category-utf-16-le
6161
6162         The category for a coding system in which a text has an
6163         Unicode signature (cf. Unicode Standard) in the order of
6164         LITTLE endian at the head.  Assigned the coding-system (Lisp
6165         symbol) `utf-16-le' by default.
6166
6167    o coding-category-ccl
6168
6169         The category for a coding system of which encoder/decoder is
6170         written in CCL programs.  The default value is nil, i.e., no
6171         coding system is assigned.
6172
6173    o coding-category-binary
6174
6175         The category for a coding system not categorized in any of the
6176         above.  Assigned the coding-system (Lisp symbol)
6177         `no-conversion' by default.
6178
6179    Each of them is a Lisp symbol and the value is an actual
6180    `coding-system's (this is also a Lisp symbol) assigned by a user.
6181    What Emacs does actually is to detect a category of coding system.
6182    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6183    decide only one possible category, it selects a category of the
6184    highest priority.  Priorities of categories are also specified by a
6185    user in a Lisp variable `coding-category-list'.
6186
6187 */
6188
6189 #define EOL_SEEN_NONE   0
6190 #define EOL_SEEN_LF     1
6191 #define EOL_SEEN_CR     2
6192 #define EOL_SEEN_CRLF   4
6193
6194 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6195    SOURCE is encoded.  If CATEGORY is one of
6196    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6197    two-byte, else they are encoded by one-byte.
6198
6199    Return one of EOL_SEEN_XXX.  */
6200
6201 #define MAX_EOL_CHECK_COUNT 3
6202
6203 static int
6204 detect_eol (source, src_bytes, category)
6205      const unsigned char *source;
6206      EMACS_INT src_bytes;
6207      enum coding_category category;
6208 {
6209   const unsigned char *src = source, *src_end = src + src_bytes;
6210   unsigned char c;
6211   int total  = 0;
6212   int eol_seen = EOL_SEEN_NONE;
6213
6214   if ((1 << category) & CATEGORY_MASK_UTF_16)
6215     {
6216       int msb, lsb;
6217
6218       msb = category == (coding_category_utf_16_le
6219                          | coding_category_utf_16_le_nosig);
6220       lsb = 1 - msb;
6221
6222       while (src + 1 < src_end)
6223         {
6224           c = src[lsb];
6225           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6226             {
6227               int this_eol;
6228
6229               if (c == '\n')
6230                 this_eol = EOL_SEEN_LF;
6231               else if (src + 3 >= src_end
6232                        || src[msb + 2] != 0
6233                        || src[lsb + 2] != '\n')
6234                 this_eol = EOL_SEEN_CR;
6235               else
6236                 {
6237                   this_eol = EOL_SEEN_CRLF;
6238                   src += 2;
6239                 }
6240
6241               if (eol_seen == EOL_SEEN_NONE)
6242                 /* This is the first end-of-line.  */
6243                 eol_seen = this_eol;
6244               else if (eol_seen != this_eol)
6245                 {
6246                   /* The found type is different from what found before.
6247                      Allow for stray ^M characters in DOS EOL files.  */
6248                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6249                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6250                     eol_seen = EOL_SEEN_CRLF;
6251                   else
6252                     {
6253                       eol_seen = EOL_SEEN_LF;
6254                       break;
6255                     }
6256                 }
6257               if (++total == MAX_EOL_CHECK_COUNT)
6258                 break;
6259             }
6260           src += 2;
6261         }
6262     }
6263   else
6264     {
6265       while (src < src_end)
6266         {
6267           c = *src++;
6268           if (c == '\n' || c == '\r')
6269             {
6270               int this_eol;
6271
6272               if (c == '\n')
6273                 this_eol = EOL_SEEN_LF;
6274               else if (src >= src_end || *src != '\n')
6275                 this_eol = EOL_SEEN_CR;
6276               else
6277                 this_eol = EOL_SEEN_CRLF, src++;
6278
6279               if (eol_seen == EOL_SEEN_NONE)
6280                 /* This is the first end-of-line.  */
6281                 eol_seen = this_eol;
6282               else if (eol_seen != this_eol)
6283                 {
6284                   /* The found type is different from what found before.
6285                      Allow for stray ^M characters in DOS EOL files.  */
6286                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6287                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6288                     eol_seen = EOL_SEEN_CRLF;
6289                   else
6290                     {
6291                       eol_seen = EOL_SEEN_LF;
6292                       break;
6293                     }
6294                 }
6295               if (++total == MAX_EOL_CHECK_COUNT)
6296                 break;
6297             }
6298         }
6299     }
6300   return eol_seen;
6301 }
6302
6303
6304 static Lisp_Object
6305 adjust_coding_eol_type (coding, eol_seen)
6306      struct coding_system *coding;
6307      int eol_seen;
6308 {
6309   Lisp_Object eol_type;
6310
6311   eol_type = CODING_ID_EOL_TYPE (coding->id);
6312   if (eol_seen & EOL_SEEN_LF)
6313     {
6314       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6315       eol_type = Qunix;
6316     }
6317   else if (eol_seen & EOL_SEEN_CRLF)
6318     {
6319       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6320       eol_type = Qdos;
6321     }
6322   else if (eol_seen & EOL_SEEN_CR)
6323     {
6324       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6325       eol_type = Qmac;
6326     }
6327   return eol_type;
6328 }
6329
6330 /* Detect how a text specified in CODING is encoded.  If a coding
6331    system is detected, update fields of CODING by the detected coding
6332    system.  */
6333
6334 void
6335 detect_coding (coding)
6336      struct coding_system *coding;
6337 {
6338   const unsigned char *src, *src_end;
6339   int saved_mode = coding->mode;
6340
6341   coding->consumed = coding->consumed_char = 0;
6342   coding->produced = coding->produced_char = 0;
6343   coding_set_source (coding);
6344
6345   src_end = coding->source + coding->src_bytes;
6346   coding->head_ascii = 0;
6347
6348   /* If we have not yet decided the text encoding type, detect it
6349      now.  */
6350   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6351     {
6352       int c, i;
6353       struct coding_detection_info detect_info;
6354       int null_byte_found = 0, eight_bit_found = 0;
6355
6356       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6357       for (src = coding->source; src < src_end; src++)
6358         {
6359           c = *src;
6360           if (c & 0x80)
6361             {
6362               eight_bit_found = 1;
6363               if (null_byte_found)
6364                 break;
6365             }
6366           else if (c < 0x20)
6367             {
6368               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6369                   && ! inhibit_iso_escape_detection
6370                   && ! detect_info.checked)
6371                 {
6372                   if (detect_coding_iso_2022 (coding, &detect_info))
6373                     {
6374                       /* We have scanned the whole data.  */
6375                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6376                         {
6377                           /* We didn't find an 8-bit code.  We may
6378                              have found a null-byte, but it's very
6379                              rare that a binary file confirm to
6380                              ISO-2022.  */
6381                           src = src_end;
6382                           coding->head_ascii = src - coding->source;
6383                         }
6384                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6385                       break;
6386                     }
6387                 }
6388               else if (! c && !inhibit_null_byte_detection)
6389                 {
6390                   null_byte_found = 1;
6391                   if (eight_bit_found)
6392                     break;
6393                 }
6394               if (! eight_bit_found)
6395                 coding->head_ascii++;
6396             }
6397           else if (! eight_bit_found)
6398             coding->head_ascii++;
6399         }
6400
6401       if (null_byte_found || eight_bit_found
6402           || coding->head_ascii < coding->src_bytes
6403           || detect_info.found)
6404         {
6405           enum coding_category category;
6406           struct coding_system *this;
6407
6408           if (coding->head_ascii == coding->src_bytes)
6409             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6410             for (i = 0; i < coding_category_raw_text; i++)
6411               {
6412                 category = coding_priorities[i];
6413                 this = coding_categories + category;
6414                 if (detect_info.found & (1 << category))
6415                   break;
6416               }
6417           else
6418             {
6419               if (null_byte_found)
6420                 {
6421                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6422                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6423                 }
6424               for (i = 0; i < coding_category_raw_text; i++)
6425                 {
6426                   category = coding_priorities[i];
6427                   this = coding_categories + category;
6428                   if (this->id < 0)
6429                     {
6430                       /* No coding system of this category is defined.  */
6431                       detect_info.rejected |= (1 << category);
6432                     }
6433                   else if (category >= coding_category_raw_text)
6434                     continue;
6435                   else if (detect_info.checked & (1 << category))
6436                     {
6437                       if (detect_info.found & (1 << category))
6438                         break;
6439                     }
6440                   else if ((*(this->detector)) (coding, &detect_info)
6441                            && detect_info.found & (1 << category))
6442                     {
6443                       if (category == coding_category_utf_16_auto)
6444                         {
6445                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6446                             category = coding_category_utf_16_le;
6447                           else
6448                             category = coding_category_utf_16_be;
6449                         }
6450                       break;
6451                     }
6452                 }
6453             }
6454
6455           if (i < coding_category_raw_text)
6456             setup_coding_system (CODING_ID_NAME (this->id), coding);
6457           else if (null_byte_found)
6458             setup_coding_system (Qno_conversion, coding);
6459           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6460                    == CATEGORY_MASK_ANY)
6461             setup_coding_system (Qraw_text, coding);
6462           else if (detect_info.rejected)
6463             for (i = 0; i < coding_category_raw_text; i++)
6464               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6465                 {
6466                   this = coding_categories + coding_priorities[i];
6467                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6468                   break;
6469                 }
6470         }
6471     }
6472   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6473            == coding_category_utf_8_auto)
6474     {
6475       Lisp_Object coding_systems;
6476       struct coding_detection_info detect_info;
6477
6478       coding_systems
6479         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6480       detect_info.found = detect_info.rejected = 0;
6481       coding->head_ascii = 0;
6482       if (CONSP (coding_systems)
6483           && detect_coding_utf_8 (coding, &detect_info))
6484         {
6485           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6486             setup_coding_system (XCAR (coding_systems), coding);
6487           else
6488             setup_coding_system (XCDR (coding_systems), coding);
6489         }
6490     }
6491   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6492            == coding_category_utf_16_auto)
6493     {
6494       Lisp_Object coding_systems;
6495       struct coding_detection_info detect_info;
6496
6497       coding_systems
6498         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6499       detect_info.found = detect_info.rejected = 0;
6500       coding->head_ascii = 0;
6501       if (CONSP (coding_systems)
6502           && detect_coding_utf_16 (coding, &detect_info))
6503         {
6504           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6505             setup_coding_system (XCAR (coding_systems), coding);
6506           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6507             setup_coding_system (XCDR (coding_systems), coding);
6508         }
6509     }
6510   coding->mode = saved_mode;
6511 }
6512
6513
6514 static void
6515 decode_eol (coding)
6516      struct coding_system *coding;
6517 {
6518   Lisp_Object eol_type;
6519   unsigned char *p, *pbeg, *pend;
6520
6521   eol_type = CODING_ID_EOL_TYPE (coding->id);
6522   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6523     return;
6524
6525   if (NILP (coding->dst_object))
6526     pbeg = coding->destination;
6527   else
6528     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6529   pend = pbeg + coding->produced;
6530
6531   if (VECTORP (eol_type))
6532     {
6533       int eol_seen = EOL_SEEN_NONE;
6534
6535       for (p = pbeg; p < pend; p++)
6536         {
6537           if (*p == '\n')
6538             eol_seen |= EOL_SEEN_LF;
6539           else if (*p == '\r')
6540             {
6541               if (p + 1 < pend && *(p + 1) == '\n')
6542                 {
6543                   eol_seen |= EOL_SEEN_CRLF;
6544                   p++;
6545                 }
6546               else
6547                 eol_seen |= EOL_SEEN_CR;
6548             }
6549         }
6550       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6551       if ((eol_seen & EOL_SEEN_CRLF) != 0
6552           && (eol_seen & EOL_SEEN_CR) != 0
6553           && (eol_seen & EOL_SEEN_LF) == 0)
6554         eol_seen = EOL_SEEN_CRLF;
6555       else if (eol_seen != EOL_SEEN_NONE
6556           && eol_seen != EOL_SEEN_LF
6557           && eol_seen != EOL_SEEN_CRLF
6558           && eol_seen != EOL_SEEN_CR)
6559         eol_seen = EOL_SEEN_LF;
6560       if (eol_seen != EOL_SEEN_NONE)
6561         eol_type = adjust_coding_eol_type (coding, eol_seen);
6562     }
6563
6564   if (EQ (eol_type, Qmac))
6565     {
6566       for (p = pbeg; p < pend; p++)
6567         if (*p == '\r')
6568           *p = '\n';
6569     }
6570   else if (EQ (eol_type, Qdos))
6571     {
6572       int n = 0;
6573
6574       if (NILP (coding->dst_object))
6575         {
6576           /* Start deleting '\r' from the tail to minimize the memory
6577              movement.  */
6578           for (p = pend - 2; p >= pbeg; p--)
6579             if (*p == '\r')
6580               {
6581                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6582                 n++;
6583               }
6584         }
6585       else
6586         {
6587           int pos_byte = coding->dst_pos_byte;
6588           int pos = coding->dst_pos;
6589           int pos_end = pos + coding->produced_char - 1;
6590
6591           while (pos < pos_end)
6592             {
6593               p = BYTE_POS_ADDR (pos_byte);
6594               if (*p == '\r' && p[1] == '\n')
6595                 {
6596                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6597                   n++;
6598                   pos_end--;
6599                 }
6600               pos++;
6601               if (coding->dst_multibyte)
6602                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6603               else
6604                 pos_byte++;
6605             }
6606         }
6607       coding->produced -= n;
6608       coding->produced_char -= n;
6609     }
6610 }
6611
6612
6613 /* Return a translation table (or list of them) from coding system
6614    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6615    decoding (ENCODEP is zero). */
6616
6617 static Lisp_Object
6618 get_translation_table (attrs, encodep, max_lookup)
6619      Lisp_Object attrs;
6620      int encodep, *max_lookup;
6621 {
6622   Lisp_Object standard, translation_table;
6623   Lisp_Object val;
6624
6625   if (encodep)
6626     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6627       standard = Vstandard_translation_table_for_encode;
6628   else
6629     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6630       standard = Vstandard_translation_table_for_decode;
6631   if (NILP (translation_table))
6632     translation_table = standard;
6633   else
6634     {
6635       if (SYMBOLP (translation_table))
6636         translation_table = Fget (translation_table, Qtranslation_table);
6637       else if (CONSP (translation_table))
6638         {
6639           translation_table = Fcopy_sequence (translation_table);
6640           for (val = translation_table; CONSP (val); val = XCDR (val))
6641             if (SYMBOLP (XCAR (val)))
6642               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6643         }
6644       if (CHAR_TABLE_P (standard))
6645         {
6646           if (CONSP (translation_table))
6647             translation_table = nconc2 (translation_table,
6648                                         Fcons (standard, Qnil));
6649           else
6650             translation_table = Fcons (translation_table,
6651                                        Fcons (standard, Qnil));
6652         }
6653     }
6654
6655   if (max_lookup)
6656     {
6657       *max_lookup = 1;
6658       if (CHAR_TABLE_P (translation_table)
6659           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6660         {
6661           val = XCHAR_TABLE (translation_table)->extras[1];
6662           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6663             *max_lookup = XFASTINT (val);
6664         }
6665       else if (CONSP (translation_table))
6666         {
6667           Lisp_Object tail, val;
6668
6669           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6670             if (CHAR_TABLE_P (XCAR (tail))
6671                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6672               {
6673                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6674                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6675                   *max_lookup = XFASTINT (val);
6676               }
6677         }
6678     }
6679   return translation_table;
6680 }
6681
6682 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6683   do {                                                          \
6684     trans = Qnil;                                               \
6685     if (CHAR_TABLE_P (table))                                   \
6686       {                                                         \
6687         trans = CHAR_TABLE_REF (table, c);                      \
6688         if (CHARACTERP (trans))                                 \
6689           c = XFASTINT (trans), trans = Qnil;                   \
6690       }                                                         \
6691     else if (CONSP (table))                                     \
6692       {                                                         \
6693         Lisp_Object tail;                                       \
6694                                                                 \
6695         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6696           if (CHAR_TABLE_P (XCAR (tail)))                       \
6697             {                                                   \
6698               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6699               if (CHARACTERP (trans))                           \
6700                 c = XFASTINT (trans), trans = Qnil;             \
6701               else if (! NILP (trans))                          \
6702                 break;                                          \
6703             }                                                   \
6704       }                                                         \
6705   } while (0)
6706
6707
6708 /* Return a translation of character(s) at BUF according to TRANS.
6709    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6710    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6711    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6712    translation is found, and Qnil if not found..
6713    If BUF is too short to lookup characters in FROM, return Qt.  */
6714
6715 static Lisp_Object
6716 get_translation (trans, buf, buf_end)
6717      Lisp_Object trans;
6718      int *buf, *buf_end;
6719 {
6720
6721   if (INTEGERP (trans))
6722     return trans;
6723   for (; CONSP (trans); trans = XCDR (trans))
6724     {
6725       Lisp_Object val = XCAR (trans);
6726       Lisp_Object from = XCAR (val);
6727       int len = ASIZE (from);
6728       int i;
6729
6730       for (i = 0; i < len; i++)
6731         {
6732           if (buf + i == buf_end)
6733             return Qt;
6734           if (XINT (AREF (from, i)) != buf[i])
6735             break;
6736         }
6737       if (i == len)
6738         return val;
6739     }
6740   return Qnil;
6741 }
6742
6743
6744 static int
6745 produce_chars (coding, translation_table, last_block)
6746      struct coding_system *coding;
6747      Lisp_Object translation_table;
6748      int last_block;
6749 {
6750   unsigned char *dst = coding->destination + coding->produced;
6751   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6752   EMACS_INT produced;
6753   EMACS_INT produced_chars = 0;
6754   int carryover = 0;
6755
6756   if (! coding->chars_at_source)
6757     {
6758       /* Source characters are in coding->charbuf.  */
6759       int *buf = coding->charbuf;
6760       int *buf_end = buf + coding->charbuf_used;
6761
6762       if (EQ (coding->src_object, coding->dst_object))
6763         {
6764           coding_set_source (coding);
6765           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6766         }
6767
6768       while (buf < buf_end)
6769         {
6770           int c = *buf, i;
6771
6772           if (c >= 0)
6773             {
6774               int from_nchars = 1, to_nchars = 1;
6775               Lisp_Object trans = Qnil;
6776
6777               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6778               if (! NILP (trans))
6779                 {
6780                   trans = get_translation (trans, buf, buf_end);
6781                   if (INTEGERP (trans))
6782                     c = XINT (trans);
6783                   else if (CONSP (trans))
6784                     {
6785                       from_nchars = ASIZE (XCAR (trans));
6786                       trans = XCDR (trans);
6787                       if (INTEGERP (trans))
6788                         c = XINT (trans);
6789                       else
6790                         {
6791                           to_nchars = ASIZE (trans);
6792                           c = XINT (AREF (trans, 0));
6793                         }
6794                     }
6795                   else if (EQ (trans, Qt) && ! last_block)
6796                     break;
6797                 }
6798
6799               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6800                 {
6801                   dst = alloc_destination (coding,
6802                                            buf_end - buf
6803                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6804                                            dst);
6805                   if (EQ (coding->src_object, coding->dst_object))
6806                     {
6807                       coding_set_source (coding);
6808                       dst_end = (((unsigned char *) coding->source)
6809                                  + coding->consumed);
6810                     }
6811                   else
6812                     dst_end = coding->destination + coding->dst_bytes;
6813                 }
6814
6815               for (i = 0; i < to_nchars; i++)
6816                 {
6817                   if (i > 0)
6818                     c = XINT (AREF (trans, i));
6819                   if (coding->dst_multibyte
6820                       || ! CHAR_BYTE8_P (c))
6821                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6822                   else
6823                     *dst++ = CHAR_TO_BYTE8 (c);
6824                 }
6825               produced_chars += to_nchars;
6826               buf += from_nchars;
6827             }
6828           else
6829             /* This is an annotation datum.  (-C) is the length.  */
6830             buf += -c;
6831         }
6832       carryover = buf_end - buf;
6833     }
6834   else
6835     {
6836       /* Source characters are at coding->source.  */
6837       const unsigned char *src = coding->source;
6838       const unsigned char *src_end = src + coding->consumed;
6839
6840       if (EQ (coding->dst_object, coding->src_object))
6841         dst_end = (unsigned char *) src;
6842       if (coding->src_multibyte != coding->dst_multibyte)
6843         {
6844           if (coding->src_multibyte)
6845             {
6846               int multibytep = 1;
6847               EMACS_INT consumed_chars = 0;
6848
6849               while (1)
6850                 {
6851                   const unsigned char *src_base = src;
6852                   int c;
6853
6854                   ONE_MORE_BYTE (c);
6855                   if (dst == dst_end)
6856                     {
6857                       if (EQ (coding->src_object, coding->dst_object))
6858                         dst_end = (unsigned char *) src;
6859                       if (dst == dst_end)
6860                         {
6861                           EMACS_INT offset = src - coding->source;
6862
6863                           dst = alloc_destination (coding, src_end - src + 1,
6864                                                    dst);
6865                           dst_end = coding->destination + coding->dst_bytes;
6866                           coding_set_source (coding);
6867                           src = coding->source + offset;
6868                           src_end = coding->source + coding->src_bytes;
6869                           if (EQ (coding->src_object, coding->dst_object))
6870                             dst_end = (unsigned char *) src;
6871                         }
6872                     }
6873                   *dst++ = c;
6874                   produced_chars++;
6875                 }
6876             no_more_source:
6877               ;
6878             }
6879           else
6880             while (src < src_end)
6881               {
6882                 int multibytep = 1;
6883                 int c = *src++;
6884
6885                 if (dst >= dst_end - 1)
6886                   {
6887                     if (EQ (coding->src_object, coding->dst_object))
6888                       dst_end = (unsigned char *) src;
6889                     if (dst >= dst_end - 1)
6890                       {
6891                         EMACS_INT offset = src - coding->source;
6892                         EMACS_INT more_bytes;
6893
6894                         if (EQ (coding->src_object, coding->dst_object))
6895                           more_bytes = ((src_end - src) / 2) + 2;
6896                         else
6897                           more_bytes = src_end - src + 2;
6898                         dst = alloc_destination (coding, more_bytes, dst);
6899                         dst_end = coding->destination + coding->dst_bytes;
6900                         coding_set_source (coding);
6901                         src = coding->source + offset;
6902                         src_end = coding->source + coding->src_bytes;
6903                         if (EQ (coding->src_object, coding->dst_object))
6904                           dst_end = (unsigned char *) src;
6905                       }
6906                   }
6907                 EMIT_ONE_BYTE (c);
6908               }
6909         }
6910       else
6911         {
6912           if (!EQ (coding->src_object, coding->dst_object))
6913             {
6914               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6915
6916               if (require > 0)
6917                 {
6918                   EMACS_INT offset = src - coding->source;
6919
6920                   dst = alloc_destination (coding, require, dst);
6921                   coding_set_source (coding);
6922                   src = coding->source + offset;
6923                   src_end = coding->source + coding->src_bytes;
6924                 }
6925             }
6926           produced_chars = coding->consumed_char;
6927           while (src < src_end)
6928             *dst++ = *src++;
6929         }
6930     }
6931
6932   produced = dst - (coding->destination + coding->produced);
6933   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6934     insert_from_gap (produced_chars, produced);
6935   coding->produced += produced;
6936   coding->produced_char += produced_chars;
6937   return carryover;
6938 }
6939
6940 /* Compose text in CODING->object according to the annotation data at
6941    CHARBUF.  CHARBUF is an array:
6942      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6943  */
6944
6945 static INLINE void
6946 produce_composition (coding, charbuf, pos)
6947      struct coding_system *coding;
6948      int *charbuf;
6949      EMACS_INT pos;
6950 {
6951   int len;
6952   EMACS_INT to;
6953   enum composition_method method;
6954   Lisp_Object components;
6955
6956   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6957   to = pos + charbuf[2];
6958   method = (enum composition_method) (charbuf[4]);
6959
6960   if (method == COMPOSITION_RELATIVE)
6961     components = Qnil;
6962   else
6963     {
6964       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6965       int i, j;
6966
6967       if (method == COMPOSITION_WITH_RULE)
6968         len = charbuf[2] * 3 - 2;
6969       charbuf += MAX_ANNOTATION_LENGTH;
6970       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6971       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6972         {
6973           if (charbuf[i] >= 0)
6974             args[j] = make_number (charbuf[i]);
6975           else
6976             {
6977               i++;
6978               args[j] = make_number (charbuf[i] % 0x100);
6979             }
6980         }
6981       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6982     }
6983   compose_text (pos, to, components, Qnil, coding->dst_object);
6984 }
6985
6986
6987 /* Put `charset' property on text in CODING->object according to
6988    the annotation data at CHARBUF.  CHARBUF is an array:
6989      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6990  */
6991
6992 static INLINE void
6993 produce_charset (coding, charbuf, pos)
6994      struct coding_system *coding;
6995      int *charbuf;
6996      EMACS_INT pos;
6997 {
6998   EMACS_INT from = pos - charbuf[2];
6999   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7000
7001   Fput_text_property (make_number (from), make_number (pos),
7002                       Qcharset, CHARSET_NAME (charset),
7003                       coding->dst_object);
7004 }
7005
7006
7007 #define CHARBUF_SIZE 0x4000
7008
7009 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7010   do {                                                                  \
7011     int size = CHARBUF_SIZE;                                            \
7012                                                                         \
7013     coding->charbuf = NULL;                                             \
7014     while (size > 1024)                                                 \
7015       {                                                                 \
7016         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7017         if (coding->charbuf)                                            \
7018           break;                                                        \
7019         size >>= 1;                                                     \
7020       }                                                                 \
7021     if (! coding->charbuf)                                              \
7022       {                                                                 \
7023         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7024         return coding->result;                                          \
7025       }                                                                 \
7026     coding->charbuf_size = size;                                        \
7027   } while (0)
7028
7029
7030 static void
7031 produce_annotation (coding, pos)
7032      struct coding_system *coding;
7033      EMACS_INT pos;
7034 {
7035   int *charbuf = coding->charbuf;
7036   int *charbuf_end = charbuf + coding->charbuf_used;
7037
7038   if (NILP (coding->dst_object))
7039     return;
7040
7041   while (charbuf < charbuf_end)
7042     {
7043       if (*charbuf >= 0)
7044         pos++, charbuf++;
7045       else
7046         {
7047           int len = -*charbuf;
7048
7049           if (len > 2)
7050             switch (charbuf[1])
7051               {
7052               case CODING_ANNOTATE_COMPOSITION_MASK:
7053                 produce_composition (coding, charbuf, pos);
7054                 break;
7055               case CODING_ANNOTATE_CHARSET_MASK:
7056                 produce_charset (coding, charbuf, pos);
7057                 break;
7058               }
7059           charbuf += len;
7060         }
7061     }
7062 }
7063
7064 /* Decode the data at CODING->src_object into CODING->dst_object.
7065    CODING->src_object is a buffer, a string, or nil.
7066    CODING->dst_object is a buffer.
7067
7068    If CODING->src_object is a buffer, it must be the current buffer.
7069    In this case, if CODING->src_pos is positive, it is a position of
7070    the source text in the buffer, otherwise, the source text is in the
7071    gap area of the buffer, and CODING->src_pos specifies the offset of
7072    the text from GPT (which must be the same as PT).  If this is the
7073    same buffer as CODING->dst_object, CODING->src_pos must be
7074    negative.
7075
7076    If CODING->src_object is a string, CODING->src_pos is an index to
7077    that string.
7078
7079    If CODING->src_object is nil, CODING->source must already point to
7080    the non-relocatable memory area.  In this case, CODING->src_pos is
7081    an offset from CODING->source.
7082
7083    The decoded data is inserted at the current point of the buffer
7084    CODING->dst_object.
7085 */
7086
7087 static int
7088 decode_coding (coding)
7089      struct coding_system *coding;
7090 {
7091   Lisp_Object attrs;
7092   Lisp_Object undo_list;
7093   Lisp_Object translation_table;
7094   int carryover;
7095   int i;
7096
7097   if (BUFFERP (coding->src_object)
7098       && coding->src_pos > 0
7099       && coding->src_pos < GPT
7100       && coding->src_pos + coding->src_chars > GPT)
7101     move_gap_both (coding->src_pos, coding->src_pos_byte);
7102
7103   undo_list = Qt;
7104   if (BUFFERP (coding->dst_object))
7105     {
7106       if (current_buffer != XBUFFER (coding->dst_object))
7107         set_buffer_internal (XBUFFER (coding->dst_object));
7108       if (GPT != PT)
7109         move_gap_both (PT, PT_BYTE);
7110       undo_list = current_buffer->undo_list;
7111       current_buffer->undo_list = Qt;
7112     }
7113
7114   coding->consumed = coding->consumed_char = 0;
7115   coding->produced = coding->produced_char = 0;
7116   coding->chars_at_source = 0;
7117   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7118   coding->errors = 0;
7119
7120   ALLOC_CONVERSION_WORK_AREA (coding);
7121
7122   attrs = CODING_ID_ATTRS (coding->id);
7123   translation_table = get_translation_table (attrs, 0, NULL);
7124
7125   carryover = 0;
7126   do
7127     {
7128       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7129
7130       coding_set_source (coding);
7131       coding->annotated = 0;
7132       coding->charbuf_used = carryover;
7133       (*(coding->decoder)) (coding);
7134       coding_set_destination (coding);
7135       carryover = produce_chars (coding, translation_table, 0);
7136       if (coding->annotated)
7137         produce_annotation (coding, pos);
7138       for (i = 0; i < carryover; i++)
7139         coding->charbuf[i]
7140           = coding->charbuf[coding->charbuf_used - carryover + i];
7141     }
7142   while (coding->consumed < coding->src_bytes
7143          && (coding->result == CODING_RESULT_SUCCESS
7144              || coding->result == CODING_RESULT_INVALID_SRC));
7145
7146   if (carryover > 0)
7147     {
7148       coding_set_destination (coding);
7149       coding->charbuf_used = carryover;
7150       produce_chars (coding, translation_table, 1);
7151     }
7152
7153   coding->carryover_bytes = 0;
7154   if (coding->consumed < coding->src_bytes)
7155     {
7156       int nbytes = coding->src_bytes - coding->consumed;
7157       const unsigned char *src;
7158
7159       coding_set_source (coding);
7160       coding_set_destination (coding);
7161       src = coding->source + coding->consumed;
7162
7163       if (coding->mode & CODING_MODE_LAST_BLOCK)
7164         {
7165           /* Flush out unprocessed data as binary chars.  We are sure
7166              that the number of data is less than the size of
7167              coding->charbuf.  */
7168           coding->charbuf_used = 0;
7169           coding->chars_at_source = 0;
7170
7171           while (nbytes-- > 0)
7172             {
7173               int c = *src++;
7174
7175               if (c & 0x80)
7176                 c = BYTE8_TO_CHAR (c);
7177               coding->charbuf[coding->charbuf_used++] = c;
7178             }
7179           produce_chars (coding, Qnil, 1);
7180         }
7181       else
7182         {
7183           /* Record unprocessed bytes in coding->carryover.  We are
7184              sure that the number of data is less than the size of
7185              coding->carryover.  */
7186           unsigned char *p = coding->carryover;
7187
7188           if (nbytes > sizeof coding->carryover)
7189             nbytes = sizeof coding->carryover;
7190           coding->carryover_bytes = nbytes;
7191           while (nbytes-- > 0)
7192             *p++ = *src++;
7193         }
7194       coding->consumed = coding->src_bytes;
7195     }
7196
7197   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7198       && !inhibit_eol_conversion)
7199     decode_eol (coding);
7200   if (BUFFERP (coding->dst_object))
7201     {
7202       current_buffer->undo_list = undo_list;
7203       record_insert (coding->dst_pos, coding->produced_char);
7204     }
7205   return coding->result;
7206 }
7207
7208
7209 /* Extract an annotation datum from a composition starting at POS and
7210    ending before LIMIT of CODING->src_object (buffer or string), store
7211    the data in BUF, set *STOP to a starting position of the next
7212    composition (if any) or to LIMIT, and return the address of the
7213    next element of BUF.
7214
7215    If such an annotation is not found, set *STOP to a starting
7216    position of a composition after POS (if any) or to LIMIT, and
7217    return BUF.  */
7218
7219 static INLINE int *
7220 handle_composition_annotation (pos, limit, coding, buf, stop)
7221      EMACS_INT pos, limit;
7222      struct coding_system *coding;
7223      int *buf;
7224      EMACS_INT *stop;
7225 {
7226   EMACS_INT start, end;
7227   Lisp_Object prop;
7228
7229   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7230       || end > limit)
7231     *stop = limit;
7232   else if (start > pos)
7233     *stop = start;
7234   else
7235     {
7236       if (start == pos)
7237         {
7238           /* We found a composition.  Store the corresponding
7239              annotation data in BUF.  */
7240           int *head = buf;
7241           enum composition_method method = COMPOSITION_METHOD (prop);
7242           int nchars = COMPOSITION_LENGTH (prop);
7243
7244           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7245           if (method != COMPOSITION_RELATIVE)
7246             {
7247               Lisp_Object components;
7248               int len, i, i_byte;
7249
7250               components = COMPOSITION_COMPONENTS (prop);
7251               if (VECTORP (components))
7252                 {
7253                   len = XVECTOR (components)->size;
7254                   for (i = 0; i < len; i++)
7255                     *buf++ = XINT (AREF (components, i));
7256                 }
7257               else if (STRINGP (components))
7258                 {
7259                   len = SCHARS (components);
7260                   i = i_byte = 0;
7261                   while (i < len)
7262                     {
7263                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7264                       buf++;
7265                     }
7266                 }
7267               else if (INTEGERP (components))
7268                 {
7269                   len = 1;
7270                   *buf++ = XINT (components);
7271                 }
7272               else if (CONSP (components))
7273                 {
7274                   for (len = 0; CONSP (components);
7275                        len++, components = XCDR (components))
7276                     *buf++ = XINT (XCAR (components));
7277                 }
7278               else
7279                 abort ();
7280               *head -= len;
7281             }
7282         }
7283
7284       if (find_composition (end, limit, &start, &end, &prop,
7285                             coding->src_object)
7286           && end <= limit)
7287         *stop = start;
7288       else
7289         *stop = limit;
7290     }
7291   return buf;
7292 }
7293
7294
7295 /* Extract an annotation datum from a text property `charset' at POS of
7296    CODING->src_object (buffer of string), store the data in BUF, set
7297    *STOP to the position where the value of `charset' property changes
7298    (limiting by LIMIT), and return the address of the next element of
7299    BUF.
7300
7301    If the property value is nil, set *STOP to the position where the
7302    property value is non-nil (limiting by LIMIT), and return BUF.  */
7303
7304 static INLINE int *
7305 handle_charset_annotation (pos, limit, coding, buf, stop)
7306      EMACS_INT pos, limit;
7307      struct coding_system *coding;
7308      int *buf;
7309      EMACS_INT *stop;
7310 {
7311   Lisp_Object val, next;
7312   int id;
7313
7314   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7315   if (! NILP (val) && CHARSETP (val))
7316     id = XINT (CHARSET_SYMBOL_ID (val));
7317   else
7318     id = -1;
7319   ADD_CHARSET_DATA (buf, 0, id);
7320   next = Fnext_single_property_change (make_number (pos), Qcharset,
7321                                        coding->src_object,
7322                                        make_number (limit));
7323   *stop = XINT (next);
7324   return buf;
7325 }
7326
7327
7328 static void
7329 consume_chars (coding, translation_table, max_lookup)
7330      struct coding_system *coding;
7331      Lisp_Object translation_table;
7332      int max_lookup;
7333 {
7334   int *buf = coding->charbuf;
7335   int *buf_end = coding->charbuf + coding->charbuf_size;
7336   const unsigned char *src = coding->source + coding->consumed;
7337   const unsigned char *src_end = coding->source + coding->src_bytes;
7338   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7339   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7340   int multibytep = coding->src_multibyte;
7341   Lisp_Object eol_type;
7342   int c;
7343   EMACS_INT stop, stop_composition, stop_charset;
7344   int *lookup_buf = NULL;
7345
7346   if (! NILP (translation_table))
7347     lookup_buf = alloca (sizeof (int) * max_lookup);
7348
7349   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7350   if (VECTORP (eol_type))
7351     eol_type = Qunix;
7352
7353   /* Note: composition handling is not yet implemented.  */
7354   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7355
7356   if (NILP (coding->src_object))
7357     stop = stop_composition = stop_charset = end_pos;
7358   else
7359     {
7360       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7361         stop = stop_composition = pos;
7362       else
7363         stop = stop_composition = end_pos;
7364       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7365         stop = stop_charset = pos;
7366       else
7367         stop_charset = end_pos;
7368     }
7369
7370   /* Compensate for CRLF and conversion.  */
7371   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7372   while (buf < buf_end)
7373     {
7374       Lisp_Object trans;
7375
7376       if (pos == stop)
7377         {
7378           if (pos == end_pos)
7379             break;
7380           if (pos == stop_composition)
7381             buf = handle_composition_annotation (pos, end_pos, coding,
7382                                                  buf, &stop_composition);
7383           if (pos == stop_charset)
7384             buf = handle_charset_annotation (pos, end_pos, coding,
7385                                              buf, &stop_charset);
7386           stop = (stop_composition < stop_charset
7387                   ? stop_composition : stop_charset);
7388         }
7389
7390       if (! multibytep)
7391         {
7392           EMACS_INT bytes;
7393
7394           if (coding->encoder == encode_coding_raw_text)
7395             c = *src++, pos++;
7396           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7397             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7398           else
7399             c = BYTE8_TO_CHAR (*src), src++, pos++;
7400         }
7401       else
7402         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7403       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7404         c = '\n';
7405       if (! EQ (eol_type, Qunix))
7406         {
7407           if (c == '\n')
7408             {
7409               if (EQ (eol_type, Qdos))
7410                 *buf++ = '\r';
7411               else
7412                 c = '\r';
7413             }
7414         }
7415
7416       trans = Qnil;
7417       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7418       if (NILP (trans))
7419         *buf++ = c;
7420       else
7421         {
7422           int from_nchars = 1, to_nchars = 1;
7423           int *lookup_buf_end;
7424           const unsigned char *p = src;
7425           int i;
7426
7427           lookup_buf[0] = c;
7428           for (i = 1; i < max_lookup && p < src_end; i++)
7429             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7430           lookup_buf_end = lookup_buf + i;
7431           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7432           if (INTEGERP (trans))
7433             c = XINT (trans);
7434           else if (CONSP (trans))
7435             {
7436               from_nchars = ASIZE (XCAR (trans));
7437               trans = XCDR (trans);
7438               if (INTEGERP (trans))
7439                 c = XINT (trans);
7440               else
7441                 {
7442                   to_nchars = ASIZE (trans);
7443                   if (buf + to_nchars > buf_end)
7444                     break;
7445                   c = XINT (AREF (trans, 0));
7446                 }
7447             }
7448           else
7449             break;
7450           *buf++ = c;
7451           for (i = 1; i < to_nchars; i++)
7452             *buf++ = XINT (AREF (trans, i));
7453           for (i = 1; i < from_nchars; i++, pos++)
7454             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7455         }
7456     }
7457
7458   coding->consumed = src - coding->source;
7459   coding->consumed_char = pos - coding->src_pos;
7460   coding->charbuf_used = buf - coding->charbuf;
7461   coding->chars_at_source = 0;
7462 }
7463
7464
7465 /* Encode the text at CODING->src_object into CODING->dst_object.
7466    CODING->src_object is a buffer or a string.
7467    CODING->dst_object is a buffer or nil.
7468
7469    If CODING->src_object is a buffer, it must be the current buffer.
7470    In this case, if CODING->src_pos is positive, it is a position of
7471    the source text in the buffer, otherwise. the source text is in the
7472    gap area of the buffer, and coding->src_pos specifies the offset of
7473    the text from GPT (which must be the same as PT).  If this is the
7474    same buffer as CODING->dst_object, CODING->src_pos must be
7475    negative and CODING should not have `pre-write-conversion'.
7476
7477    If CODING->src_object is a string, CODING should not have
7478    `pre-write-conversion'.
7479
7480    If CODING->dst_object is a buffer, the encoded data is inserted at
7481    the current point of that buffer.
7482
7483    If CODING->dst_object is nil, the encoded data is placed at the
7484    memory area specified by CODING->destination.  */
7485
7486 static int
7487 encode_coding (coding)
7488      struct coding_system *coding;
7489 {
7490   Lisp_Object attrs;
7491   Lisp_Object translation_table;
7492   int max_lookup;
7493
7494   attrs = CODING_ID_ATTRS (coding->id);
7495   if (coding->encoder == encode_coding_raw_text)
7496     translation_table = Qnil, max_lookup = 0;
7497   else
7498     translation_table = get_translation_table (attrs, 1, &max_lookup);
7499
7500   if (BUFFERP (coding->dst_object))
7501     {
7502       set_buffer_internal (XBUFFER (coding->dst_object));
7503       coding->dst_multibyte
7504         = ! NILP (current_buffer->enable_multibyte_characters);
7505     }
7506
7507   coding->consumed = coding->consumed_char = 0;
7508   coding->produced = coding->produced_char = 0;
7509   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7510   coding->errors = 0;
7511
7512   ALLOC_CONVERSION_WORK_AREA (coding);
7513
7514   do {
7515     coding_set_source (coding);
7516     consume_chars (coding, translation_table, max_lookup);
7517     coding_set_destination (coding);
7518     (*(coding->encoder)) (coding);
7519   } while (coding->consumed_char < coding->src_chars);
7520
7521   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7522     insert_from_gap (coding->produced_char, coding->produced);
7523
7524   return (coding->result);
7525 }
7526
7527
7528 /* Name (or base name) of work buffer for code conversion.  */
7529 static Lisp_Object Vcode_conversion_workbuf_name;
7530
7531 /* A working buffer used by the top level conversion.  Once it is
7532    created, it is never destroyed.  It has the name
7533    Vcode_conversion_workbuf_name.  The other working buffers are
7534    destroyed after the use is finished, and their names are modified
7535    versions of Vcode_conversion_workbuf_name.  */
7536 static Lisp_Object Vcode_conversion_reused_workbuf;
7537
7538 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7539 static int reused_workbuf_in_use;
7540
7541
7542 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7543    multibyteness of returning buffer.  */
7544
7545 static Lisp_Object
7546 make_conversion_work_buffer (multibyte)
7547      int multibyte;
7548 {
7549   Lisp_Object name, workbuf;
7550   struct buffer *current;
7551
7552   if (reused_workbuf_in_use++)
7553     {
7554       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7555       workbuf = Fget_buffer_create (name);
7556     }
7557   else
7558     {
7559       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7560         Vcode_conversion_reused_workbuf
7561           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7562       workbuf = Vcode_conversion_reused_workbuf;
7563     }
7564   current = current_buffer;
7565   set_buffer_internal (XBUFFER (workbuf));
7566   /* We can't allow modification hooks to run in the work buffer.  For
7567      instance, directory_files_internal assumes that file decoding
7568      doesn't compile new regexps.  */
7569   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7570   Ferase_buffer ();
7571   current_buffer->undo_list = Qt;
7572   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7573   set_buffer_internal (current);
7574   return workbuf;
7575 }
7576
7577
7578 static Lisp_Object
7579 code_conversion_restore (arg)
7580      Lisp_Object arg;
7581 {
7582   Lisp_Object current, workbuf;
7583   struct gcpro gcpro1;
7584
7585   GCPRO1 (arg);
7586   current = XCAR (arg);
7587   workbuf = XCDR (arg);
7588   if (! NILP (workbuf))
7589     {
7590       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7591         reused_workbuf_in_use = 0;
7592       else if (! NILP (Fbuffer_live_p (workbuf)))
7593         Fkill_buffer (workbuf);
7594     }
7595   set_buffer_internal (XBUFFER (current));
7596   UNGCPRO;
7597   return Qnil;
7598 }
7599
7600 Lisp_Object
7601 code_conversion_save (with_work_buf, multibyte)
7602      int with_work_buf, multibyte;
7603 {
7604   Lisp_Object workbuf = Qnil;
7605
7606   if (with_work_buf)
7607     workbuf = make_conversion_work_buffer (multibyte);
7608   record_unwind_protect (code_conversion_restore,
7609                          Fcons (Fcurrent_buffer (), workbuf));
7610   return workbuf;
7611 }
7612
7613 int
7614 decode_coding_gap (coding, chars, bytes)
7615      struct coding_system *coding;
7616      EMACS_INT chars, bytes;
7617 {
7618   int count = specpdl_ptr - specpdl;
7619   Lisp_Object attrs;
7620
7621   code_conversion_save (0, 0);
7622
7623   coding->src_object = Fcurrent_buffer ();
7624   coding->src_chars = chars;
7625   coding->src_bytes = bytes;
7626   coding->src_pos = -chars;
7627   coding->src_pos_byte = -bytes;
7628   coding->src_multibyte = chars < bytes;
7629   coding->dst_object = coding->src_object;
7630   coding->dst_pos = PT;
7631   coding->dst_pos_byte = PT_BYTE;
7632   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7633
7634   if (CODING_REQUIRE_DETECTION (coding))
7635     detect_coding (coding);
7636
7637   coding->mode |= CODING_MODE_LAST_BLOCK;
7638   current_buffer->text->inhibit_shrinking = 1;
7639   decode_coding (coding);
7640   current_buffer->text->inhibit_shrinking = 0;
7641
7642   attrs = CODING_ID_ATTRS (coding->id);
7643   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7644     {
7645       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7646       Lisp_Object val;
7647
7648       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7649       val = call1 (CODING_ATTR_POST_READ (attrs),
7650                    make_number (coding->produced_char));
7651       CHECK_NATNUM (val);
7652       coding->produced_char += Z - prev_Z;
7653       coding->produced += Z_BYTE - prev_Z_BYTE;
7654     }
7655
7656   unbind_to (count, Qnil);
7657   return coding->result;
7658 }
7659
7660 int
7661 encode_coding_gap (coding, chars, bytes)
7662      struct coding_system *coding;
7663      EMACS_INT chars, bytes;
7664 {
7665   int count = specpdl_ptr - specpdl;
7666
7667   code_conversion_save (0, 0);
7668
7669   coding->src_object = Fcurrent_buffer ();
7670   coding->src_chars = chars;
7671   coding->src_bytes = bytes;
7672   coding->src_pos = -chars;
7673   coding->src_pos_byte = -bytes;
7674   coding->src_multibyte = chars < bytes;
7675   coding->dst_object = coding->src_object;
7676   coding->dst_pos = PT;
7677   coding->dst_pos_byte = PT_BYTE;
7678
7679   encode_coding (coding);
7680
7681   unbind_to (count, Qnil);
7682   return coding->result;
7683 }
7684
7685
7686 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7687    SRC_OBJECT into DST_OBJECT by coding context CODING.
7688
7689    SRC_OBJECT is a buffer, a string, or Qnil.
7690
7691    If it is a buffer, the text is at point of the buffer.  FROM and TO
7692    are positions in the buffer.
7693
7694    If it is a string, the text is at the beginning of the string.
7695    FROM and TO are indices to the string.
7696
7697    If it is nil, the text is at coding->source.  FROM and TO are
7698    indices to coding->source.
7699
7700    DST_OBJECT is a buffer, Qt, or Qnil.
7701
7702    If it is a buffer, the decoded text is inserted at point of the
7703    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7704    is deleted.
7705
7706    If it is Qt, a string is made from the decoded text, and
7707    set in CODING->dst_object.
7708
7709    If it is Qnil, the decoded text is stored at CODING->destination.
7710    The caller must allocate CODING->dst_bytes bytes at
7711    CODING->destination by xmalloc.  If the decoded text is longer than
7712    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7713  */
7714
7715 void
7716 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7717                       dst_object)
7718      struct coding_system *coding;
7719      Lisp_Object src_object;
7720      EMACS_INT from, from_byte, to, to_byte;
7721      Lisp_Object dst_object;
7722 {
7723   int count = specpdl_ptr - specpdl;
7724   unsigned char *destination;
7725   EMACS_INT dst_bytes;
7726   EMACS_INT chars = to - from;
7727   EMACS_INT bytes = to_byte - from_byte;
7728   Lisp_Object attrs;
7729   int saved_pt = -1, saved_pt_byte;
7730   int need_marker_adjustment = 0;
7731   Lisp_Object old_deactivate_mark;
7732
7733   old_deactivate_mark = Vdeactivate_mark;
7734
7735   if (NILP (dst_object))
7736     {
7737       destination = coding->destination;
7738       dst_bytes = coding->dst_bytes;
7739     }
7740
7741   coding->src_object = src_object;
7742   coding->src_chars = chars;
7743   coding->src_bytes = bytes;
7744   coding->src_multibyte = chars < bytes;
7745
7746   if (STRINGP (src_object))
7747     {
7748       coding->src_pos = from;
7749       coding->src_pos_byte = from_byte;
7750     }
7751   else if (BUFFERP (src_object))
7752     {
7753       set_buffer_internal (XBUFFER (src_object));
7754       if (from != GPT)
7755         move_gap_both (from, from_byte);
7756       if (EQ (src_object, dst_object))
7757         {
7758           struct Lisp_Marker *tail;
7759
7760           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7761             {
7762               tail->need_adjustment
7763                 = tail->charpos == (tail->insertion_type ? from : to);
7764               need_marker_adjustment |= tail->need_adjustment;
7765             }
7766           saved_pt = PT, saved_pt_byte = PT_BYTE;
7767           TEMP_SET_PT_BOTH (from, from_byte);
7768           current_buffer->text->inhibit_shrinking = 1;
7769           del_range_both (from, from_byte, to, to_byte, 1);
7770           coding->src_pos = -chars;
7771           coding->src_pos_byte = -bytes;
7772         }
7773       else
7774         {
7775           coding->src_pos = from;
7776           coding->src_pos_byte = from_byte;
7777         }
7778     }
7779
7780   if (CODING_REQUIRE_DETECTION (coding))
7781     detect_coding (coding);
7782   attrs = CODING_ID_ATTRS (coding->id);
7783
7784   if (EQ (dst_object, Qt)
7785       || (! NILP (CODING_ATTR_POST_READ (attrs))
7786           && NILP (dst_object)))
7787     {
7788       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7789       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7790       coding->dst_pos = BEG;
7791       coding->dst_pos_byte = BEG_BYTE;
7792     }
7793   else if (BUFFERP (dst_object))
7794     {
7795       code_conversion_save (0, 0);
7796       coding->dst_object = dst_object;
7797       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7798       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7799       coding->dst_multibyte
7800         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7801     }
7802   else
7803     {
7804       code_conversion_save (0, 0);
7805       coding->dst_object = Qnil;
7806       /* Most callers presume this will return a multibyte result, and they
7807          won't use `binary' or `raw-text' anyway, so let's not worry about
7808          CODING_FOR_UNIBYTE.  */
7809       coding->dst_multibyte = 1;
7810     }
7811
7812   decode_coding (coding);
7813
7814   if (BUFFERP (coding->dst_object))
7815     set_buffer_internal (XBUFFER (coding->dst_object));
7816
7817   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7818     {
7819       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7820       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7821       Lisp_Object val;
7822
7823       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7824       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7825               old_deactivate_mark);
7826       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7827                         make_number (coding->produced_char));
7828       UNGCPRO;
7829       CHECK_NATNUM (val);
7830       coding->produced_char += Z - prev_Z;
7831       coding->produced += Z_BYTE - prev_Z_BYTE;
7832     }
7833
7834   if (EQ (dst_object, Qt))
7835     {
7836       coding->dst_object = Fbuffer_string ();
7837     }
7838   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7839     {
7840       set_buffer_internal (XBUFFER (coding->dst_object));
7841       if (dst_bytes < coding->produced)
7842         {
7843           destination = xrealloc (destination, coding->produced);
7844           if (! destination)
7845             {
7846               record_conversion_result (coding,
7847                                         CODING_RESULT_INSUFFICIENT_DST);
7848               unbind_to (count, Qnil);
7849               return;
7850             }
7851           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7852             move_gap_both (BEGV, BEGV_BYTE);
7853           bcopy (BEGV_ADDR, destination, coding->produced);
7854           coding->destination = destination;
7855         }
7856     }
7857
7858   if (saved_pt >= 0)
7859     {
7860       /* This is the case of:
7861          (BUFFERP (src_object) && EQ (src_object, dst_object))
7862          As we have moved PT while replacing the original buffer
7863          contents, we must recover it now.  */
7864       set_buffer_internal (XBUFFER (src_object));
7865       current_buffer->text->inhibit_shrinking = 0;
7866       if (saved_pt < from)
7867         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7868       else if (saved_pt < from + chars)
7869         TEMP_SET_PT_BOTH (from, from_byte);
7870       else if (! NILP (current_buffer->enable_multibyte_characters))
7871         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7872                           saved_pt_byte + (coding->produced - bytes));
7873       else
7874         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7875                           saved_pt_byte + (coding->produced - bytes));
7876
7877       if (need_marker_adjustment)
7878         {
7879           struct Lisp_Marker *tail;
7880
7881           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7882             if (tail->need_adjustment)
7883               {
7884                 tail->need_adjustment = 0;
7885                 if (tail->insertion_type)
7886                   {
7887                     tail->bytepos = from_byte;
7888                     tail->charpos = from;
7889                   }
7890                 else
7891                   {
7892                     tail->bytepos = from_byte + coding->produced;
7893                     tail->charpos
7894                       = (NILP (current_buffer->enable_multibyte_characters)
7895                          ? tail->bytepos : from + coding->produced_char);
7896                   }
7897               }
7898         }
7899     }
7900
7901   Vdeactivate_mark = old_deactivate_mark;
7902   unbind_to (count, coding->dst_object);
7903 }
7904
7905
7906 void
7907 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7908                       dst_object)
7909      struct coding_system *coding;
7910      Lisp_Object src_object;
7911      EMACS_INT from, from_byte, to, to_byte;
7912      Lisp_Object dst_object;
7913 {
7914   int count = specpdl_ptr - specpdl;
7915   EMACS_INT chars = to - from;
7916   EMACS_INT bytes = to_byte - from_byte;
7917   Lisp_Object attrs;
7918   int saved_pt = -1, saved_pt_byte;
7919   int need_marker_adjustment = 0;
7920   int kill_src_buffer = 0;
7921   Lisp_Object old_deactivate_mark;
7922
7923   old_deactivate_mark = Vdeactivate_mark;
7924
7925   coding->src_object = src_object;
7926   coding->src_chars = chars;
7927   coding->src_bytes = bytes;
7928   coding->src_multibyte = chars < bytes;
7929
7930   attrs = CODING_ID_ATTRS (coding->id);
7931
7932   if (EQ (src_object, dst_object))
7933     {
7934       struct Lisp_Marker *tail;
7935
7936       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7937         {
7938           tail->need_adjustment
7939             = tail->charpos == (tail->insertion_type ? from : to);
7940           need_marker_adjustment |= tail->need_adjustment;
7941         }
7942     }
7943
7944   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7945     {
7946       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7947       set_buffer_internal (XBUFFER (coding->src_object));
7948       if (STRINGP (src_object))
7949         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7950       else if (BUFFERP (src_object))
7951         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7952       else
7953         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7954
7955       if (EQ (src_object, dst_object))
7956         {
7957           set_buffer_internal (XBUFFER (src_object));
7958           saved_pt = PT, saved_pt_byte = PT_BYTE;
7959           del_range_both (from, from_byte, to, to_byte, 1);
7960           set_buffer_internal (XBUFFER (coding->src_object));
7961         }
7962
7963       {
7964         Lisp_Object args[3];
7965         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7966
7967         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7968                 old_deactivate_mark);
7969         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7970         args[1] = make_number (BEG);
7971         args[2] = make_number (Z);
7972         safe_call (3, args);
7973         UNGCPRO;
7974       }
7975       if (XBUFFER (coding->src_object) != current_buffer)
7976         kill_src_buffer = 1;
7977       coding->src_object = Fcurrent_buffer ();
7978       if (BEG != GPT)
7979         move_gap_both (BEG, BEG_BYTE);
7980       coding->src_chars = Z - BEG;
7981       coding->src_bytes = Z_BYTE - BEG_BYTE;
7982       coding->src_pos = BEG;
7983       coding->src_pos_byte = BEG_BYTE;
7984       coding->src_multibyte = Z < Z_BYTE;
7985     }
7986   else if (STRINGP (src_object))
7987     {
7988       code_conversion_save (0, 0);
7989       coding->src_pos = from;
7990       coding->src_pos_byte = from_byte;
7991     }
7992   else if (BUFFERP (src_object))
7993     {
7994       code_conversion_save (0, 0);
7995       set_buffer_internal (XBUFFER (src_object));
7996       if (EQ (src_object, dst_object))
7997         {
7998           saved_pt = PT, saved_pt_byte = PT_BYTE;
7999           coding->src_object = del_range_1 (from, to, 1, 1);
8000           coding->src_pos = 0;
8001           coding->src_pos_byte = 0;
8002         }
8003       else
8004         {
8005           if (from < GPT && to >= GPT)
8006             move_gap_both (from, from_byte);
8007           coding->src_pos = from;
8008           coding->src_pos_byte = from_byte;
8009         }
8010     }
8011   else
8012     code_conversion_save (0, 0);
8013
8014   if (BUFFERP (dst_object))
8015     {
8016       coding->dst_object = dst_object;
8017       if (EQ (src_object, dst_object))
8018         {
8019           coding->dst_pos = from;
8020           coding->dst_pos_byte = from_byte;
8021         }
8022       else
8023         {
8024           struct buffer *current = current_buffer;
8025
8026           set_buffer_temp (XBUFFER (dst_object));
8027           coding->dst_pos = PT;
8028           coding->dst_pos_byte = PT_BYTE;
8029           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8030           set_buffer_temp (current);
8031         }
8032       coding->dst_multibyte
8033         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8034     }
8035   else if (EQ (dst_object, Qt))
8036     {
8037       coding->dst_object = Qnil;
8038       coding->dst_bytes = coding->src_chars;
8039       if (coding->dst_bytes == 0)
8040         coding->dst_bytes = 1;
8041       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8042       coding->dst_multibyte = 0;
8043     }
8044   else
8045     {
8046       coding->dst_object = Qnil;
8047       coding->dst_multibyte = 0;
8048     }
8049
8050   encode_coding (coding);
8051
8052   if (EQ (dst_object, Qt))
8053     {
8054       if (BUFFERP (coding->dst_object))
8055         coding->dst_object = Fbuffer_string ();
8056       else
8057         {
8058           coding->dst_object
8059             = make_unibyte_string ((char *) coding->destination,
8060                                    coding->produced);
8061           xfree (coding->destination);
8062         }
8063     }
8064
8065   if (saved_pt >= 0)
8066     {
8067       /* This is the case of:
8068          (BUFFERP (src_object) && EQ (src_object, dst_object))
8069          As we have moved PT while replacing the original buffer
8070          contents, we must recover it now.  */
8071       set_buffer_internal (XBUFFER (src_object));
8072       if (saved_pt < from)
8073         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8074       else if (saved_pt < from + chars)
8075         TEMP_SET_PT_BOTH (from, from_byte);
8076       else if (! NILP (current_buffer->enable_multibyte_characters))
8077         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8078                           saved_pt_byte + (coding->produced - bytes));
8079       else
8080         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8081                           saved_pt_byte + (coding->produced - bytes));
8082
8083       if (need_marker_adjustment)
8084         {
8085           struct Lisp_Marker *tail;
8086
8087           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8088             if (tail->need_adjustment)
8089               {
8090                 tail->need_adjustment = 0;
8091                 if (tail->insertion_type)
8092                   {
8093                     tail->bytepos = from_byte;
8094                     tail->charpos = from;
8095                   }
8096                 else
8097                   {
8098                     tail->bytepos = from_byte + coding->produced;
8099                     tail->charpos
8100                       = (NILP (current_buffer->enable_multibyte_characters)
8101                          ? tail->bytepos : from + coding->produced_char);
8102                   }
8103               }
8104         }
8105     }
8106
8107   if (kill_src_buffer)
8108     Fkill_buffer (coding->src_object);
8109
8110   Vdeactivate_mark = old_deactivate_mark;
8111   unbind_to (count, Qnil);
8112 }
8113
8114
8115 Lisp_Object
8116 preferred_coding_system ()
8117 {
8118   int id = coding_categories[coding_priorities[0]].id;
8119
8120   return CODING_ID_NAME (id);
8121 }
8122
8123 \f
8124 #ifdef emacs
8125 /*** 8. Emacs Lisp library functions ***/
8126
8127 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8128        doc: /* Return t if OBJECT is nil or a coding-system.
8129 See the documentation of `define-coding-system' for information
8130 about coding-system objects.  */)
8131      (object)
8132      Lisp_Object object;
8133 {
8134   if (NILP (object)
8135       || CODING_SYSTEM_ID (object) >= 0)
8136     return Qt;
8137   if (! SYMBOLP (object)
8138       || NILP (Fget (object, Qcoding_system_define_form)))
8139     return Qnil;
8140   return Qt;
8141 }
8142
8143 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8144        Sread_non_nil_coding_system, 1, 1, 0,
8145        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8146      (prompt)
8147      Lisp_Object prompt;
8148 {
8149   Lisp_Object val;
8150   do
8151     {
8152       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8153                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8154     }
8155   while (SCHARS (val) == 0);
8156   return (Fintern (val, Qnil));
8157 }
8158
8159 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8160        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8161 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8162 Ignores case when completing coding systems (all Emacs coding systems
8163 are lower-case).  */)
8164      (prompt, default_coding_system)
8165      Lisp_Object prompt, default_coding_system;
8166 {
8167   Lisp_Object val;
8168   int count = SPECPDL_INDEX ();
8169
8170   if (SYMBOLP (default_coding_system))
8171     default_coding_system = SYMBOL_NAME (default_coding_system);
8172   specbind (Qcompletion_ignore_case, Qt);
8173   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8174                           Qt, Qnil, Qcoding_system_history,
8175                           default_coding_system, Qnil);
8176   unbind_to (count, Qnil);
8177   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8178 }
8179
8180 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8181        1, 1, 0,
8182        doc: /* Check validity of CODING-SYSTEM.
8183 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8184 It is valid if it is nil or a symbol defined as a coding system by the
8185 function `define-coding-system'.  */)
8186   (coding_system)
8187      Lisp_Object coding_system;
8188 {
8189   Lisp_Object define_form;
8190
8191   define_form = Fget (coding_system, Qcoding_system_define_form);
8192   if (! NILP (define_form))
8193     {
8194       Fput (coding_system, Qcoding_system_define_form, Qnil);
8195       safe_eval (define_form);
8196     }
8197   if (!NILP (Fcoding_system_p (coding_system)))
8198     return coding_system;
8199   xsignal1 (Qcoding_system_error, coding_system);
8200 }
8201
8202 \f
8203 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8204    HIGHEST is nonzero, return the coding system of the highest
8205    priority among the detected coding systems.  Otherwize return a
8206    list of detected coding systems sorted by their priorities.  If
8207    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8208    multibyte form but contains only ASCII and eight-bit chars.
8209    Otherwise, the bytes are raw bytes.
8210
8211    CODING-SYSTEM controls the detection as below:
8212
8213    If it is nil, detect both text-format and eol-format.  If the
8214    text-format part of CODING-SYSTEM is already specified
8215    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8216    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8217    detect only text-format.  */
8218
8219 Lisp_Object
8220 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8221                       coding_system)
8222      const unsigned char *src;
8223      EMACS_INT src_chars, src_bytes;
8224      int highest;
8225      int multibytep;
8226      Lisp_Object coding_system;
8227 {
8228   const unsigned char *src_end = src + src_bytes;
8229   Lisp_Object attrs, eol_type;
8230   Lisp_Object val = Qnil;
8231   struct coding_system coding;
8232   int id;
8233   struct coding_detection_info detect_info;
8234   enum coding_category base_category;
8235   int null_byte_found = 0, eight_bit_found = 0;
8236
8237   if (NILP (coding_system))
8238     coding_system = Qundecided;
8239   setup_coding_system (coding_system, &coding);
8240   attrs = CODING_ID_ATTRS (coding.id);
8241   eol_type = CODING_ID_EOL_TYPE (coding.id);
8242   coding_system = CODING_ATTR_BASE_NAME (attrs);
8243
8244   coding.source = src;
8245   coding.src_chars = src_chars;
8246   coding.src_bytes = src_bytes;
8247   coding.src_multibyte = multibytep;
8248   coding.consumed = 0;
8249   coding.mode |= CODING_MODE_LAST_BLOCK;
8250   coding.head_ascii = 0;
8251
8252   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8253
8254   /* At first, detect text-format if necessary.  */
8255   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8256   if (base_category == coding_category_undecided)
8257     {
8258       enum coding_category category;
8259       struct coding_system *this;
8260       int c, i;
8261
8262       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8263       for (; src < src_end; src++)
8264         {
8265           c = *src;
8266           if (c & 0x80)
8267             {
8268               eight_bit_found = 1;
8269               if (null_byte_found)
8270                 break;
8271             }
8272           else if (c < 0x20)
8273             {
8274               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8275                   && ! inhibit_iso_escape_detection
8276                   && ! detect_info.checked)
8277                 {
8278                   if (detect_coding_iso_2022 (&coding, &detect_info))
8279                     {
8280                       /* We have scanned the whole data.  */
8281                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8282                         {
8283                           /* We didn't find an 8-bit code.  We may
8284                              have found a null-byte, but it's very
8285                              rare that a binary file confirm to
8286                              ISO-2022.  */
8287                           src = src_end;
8288                           coding.head_ascii = src - coding.source;
8289                         }
8290                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8291                       break;
8292                     }
8293                 }
8294               else if (! c && !inhibit_null_byte_detection)
8295                 {
8296                   null_byte_found = 1;
8297                   if (eight_bit_found)
8298                     break;
8299                 }
8300               if (! eight_bit_found)
8301                 coding.head_ascii++;
8302             }
8303           else if (! eight_bit_found)
8304             coding.head_ascii++;
8305         }
8306
8307       if (null_byte_found || eight_bit_found
8308           || coding.head_ascii < coding.src_bytes
8309           || detect_info.found)
8310         {
8311           if (coding.head_ascii == coding.src_bytes)
8312             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8313             for (i = 0; i < coding_category_raw_text; i++)
8314               {
8315                 category = coding_priorities[i];
8316                 this = coding_categories + category;
8317                 if (detect_info.found & (1 << category))
8318                   break;
8319               }
8320           else
8321             {
8322               if (null_byte_found)
8323                 {
8324                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8325                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8326                 }
8327               for (i = 0; i < coding_category_raw_text; i++)
8328                 {
8329                   category = coding_priorities[i];
8330                   this = coding_categories + category;
8331
8332                   if (this->id < 0)
8333                     {
8334                       /* No coding system of this category is defined.  */
8335                       detect_info.rejected |= (1 << category);
8336                     }
8337                   else if (category >= coding_category_raw_text)
8338                     continue;
8339                   else if (detect_info.checked & (1 << category))
8340                     {
8341                       if (highest
8342                           && (detect_info.found & (1 << category)))
8343                         break;
8344                     }
8345                   else if ((*(this->detector)) (&coding, &detect_info)
8346                            && highest
8347                            && (detect_info.found & (1 << category)))
8348                     {
8349                       if (category == coding_category_utf_16_auto)
8350                         {
8351                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8352                             category = coding_category_utf_16_le;
8353                           else
8354                             category = coding_category_utf_16_be;
8355                         }
8356                       break;
8357                     }
8358                 }
8359             }
8360         }
8361
8362       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8363           || null_byte_found)
8364         {
8365           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8366           id = CODING_SYSTEM_ID (Qno_conversion);
8367           val = Fcons (make_number (id), Qnil);
8368         }
8369       else if (! detect_info.rejected && ! detect_info.found)
8370         {
8371           detect_info.found = CATEGORY_MASK_ANY;
8372           id = coding_categories[coding_category_undecided].id;
8373           val = Fcons (make_number (id), Qnil);
8374         }
8375       else if (highest)
8376         {
8377           if (detect_info.found)
8378             {
8379               detect_info.found = 1 << category;
8380               val = Fcons (make_number (this->id), Qnil);
8381             }
8382           else
8383             for (i = 0; i < coding_category_raw_text; i++)
8384               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8385                 {
8386                   detect_info.found = 1 << coding_priorities[i];
8387                   id = coding_categories[coding_priorities[i]].id;
8388                   val = Fcons (make_number (id), Qnil);
8389                   break;
8390                 }
8391         }
8392       else
8393         {
8394           int mask = detect_info.rejected | detect_info.found;
8395           int found = 0;
8396
8397           for (i = coding_category_raw_text - 1; i >= 0; i--)
8398             {
8399               category = coding_priorities[i];
8400               if (! (mask & (1 << category)))
8401                 {
8402                   found |= 1 << category;
8403                   id = coding_categories[category].id;
8404                   if (id >= 0)
8405                     val = Fcons (make_number (id), val);
8406                 }
8407             }
8408           for (i = coding_category_raw_text - 1; i >= 0; i--)
8409             {
8410               category = coding_priorities[i];
8411               if (detect_info.found & (1 << category))
8412                 {
8413                   id = coding_categories[category].id;
8414                   val = Fcons (make_number (id), val);
8415                 }
8416             }
8417           detect_info.found |= found;
8418         }
8419     }
8420   else if (base_category == coding_category_utf_8_auto)
8421     {
8422       if (detect_coding_utf_8 (&coding, &detect_info))
8423         {
8424           struct coding_system *this;
8425
8426           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8427             this = coding_categories + coding_category_utf_8_sig;
8428           else
8429             this = coding_categories + coding_category_utf_8_nosig;
8430           val = Fcons (make_number (this->id), Qnil);
8431         }
8432     }
8433   else if (base_category == coding_category_utf_16_auto)
8434     {
8435       if (detect_coding_utf_16 (&coding, &detect_info))
8436         {
8437           struct coding_system *this;
8438
8439           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8440             this = coding_categories + coding_category_utf_16_le;
8441           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8442             this = coding_categories + coding_category_utf_16_be;
8443           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8444             this = coding_categories + coding_category_utf_16_be_nosig;
8445           else
8446             this = coding_categories + coding_category_utf_16_le_nosig;
8447           val = Fcons (make_number (this->id), Qnil);
8448         }
8449     }
8450   else
8451     {
8452       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8453       val = Fcons (make_number (coding.id), Qnil);
8454     }
8455
8456   /* Then, detect eol-format if necessary.  */
8457   {
8458     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8459     Lisp_Object tail;
8460
8461     if (VECTORP (eol_type))
8462       {
8463         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8464           {
8465             if (null_byte_found)
8466               normal_eol = EOL_SEEN_LF;
8467             else
8468               normal_eol = detect_eol (coding.source, src_bytes,
8469                                        coding_category_raw_text);
8470           }
8471         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8472                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8473           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8474                                       coding_category_utf_16_be);
8475         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8476                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8477           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8478                                       coding_category_utf_16_le);
8479       }
8480     else
8481       {
8482         if (EQ (eol_type, Qunix))
8483           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8484         else if (EQ (eol_type, Qdos))
8485           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8486         else
8487           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8488       }
8489
8490     for (tail = val; CONSP (tail); tail = XCDR (tail))
8491       {
8492         enum coding_category category;
8493         int this_eol;
8494
8495         id = XINT (XCAR (tail));
8496         attrs = CODING_ID_ATTRS (id);
8497         category = XINT (CODING_ATTR_CATEGORY (attrs));
8498         eol_type = CODING_ID_EOL_TYPE (id);
8499         if (VECTORP (eol_type))
8500           {
8501             if (category == coding_category_utf_16_be
8502                 || category == coding_category_utf_16_be_nosig)
8503               this_eol = utf_16_be_eol;
8504             else if (category == coding_category_utf_16_le
8505                      || category == coding_category_utf_16_le_nosig)
8506               this_eol = utf_16_le_eol;
8507             else
8508               this_eol = normal_eol;
8509
8510             if (this_eol == EOL_SEEN_LF)
8511               XSETCAR (tail, AREF (eol_type, 0));
8512             else if (this_eol == EOL_SEEN_CRLF)
8513               XSETCAR (tail, AREF (eol_type, 1));
8514             else if (this_eol == EOL_SEEN_CR)
8515               XSETCAR (tail, AREF (eol_type, 2));
8516             else
8517               XSETCAR (tail, CODING_ID_NAME (id));
8518           }
8519         else
8520           XSETCAR (tail, CODING_ID_NAME (id));
8521       }
8522   }
8523
8524   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8525 }
8526
8527
8528 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8529        2, 3, 0,
8530        doc: /* Detect coding system of the text in the region between START and END.
8531 Return a list of possible coding systems ordered by priority.
8532 The coding systems to try and their priorities follows what
8533 the function `coding-system-priority-list' (which see) returns.
8534
8535 If only ASCII characters are found (except for such ISO-2022 control
8536 characters as ESC), it returns a list of single element `undecided'
8537 or its subsidiary coding system according to a detected end-of-line
8538 format.
8539
8540 If optional argument HIGHEST is non-nil, return the coding system of
8541 highest priority.  */)
8542      (start, end, highest)
8543      Lisp_Object start, end, highest;
8544 {
8545   int from, to;
8546   int from_byte, to_byte;
8547
8548   CHECK_NUMBER_COERCE_MARKER (start);
8549   CHECK_NUMBER_COERCE_MARKER (end);
8550
8551   validate_region (&start, &end);
8552   from = XINT (start), to = XINT (end);
8553   from_byte = CHAR_TO_BYTE (from);
8554   to_byte = CHAR_TO_BYTE (to);
8555
8556   if (from < GPT && to >= GPT)
8557     move_gap_both (to, to_byte);
8558
8559   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8560                                to - from, to_byte - from_byte,
8561                                !NILP (highest),
8562                                !NILP (current_buffer
8563                                       ->enable_multibyte_characters),
8564                                Qnil);
8565 }
8566
8567 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8568        1, 2, 0,
8569        doc: /* Detect coding system of the text in STRING.
8570 Return a list of possible coding systems ordered by priority.
8571 The coding systems to try and their priorities follows what
8572 the function `coding-system-priority-list' (which see) returns.
8573
8574 If only ASCII characters are found (except for such ISO-2022 control
8575 characters as ESC), it returns a list of single element `undecided'
8576 or its subsidiary coding system according to a detected end-of-line
8577 format.
8578
8579 If optional argument HIGHEST is non-nil, return the coding system of
8580 highest priority.  */)
8581      (string, highest)
8582      Lisp_Object string, highest;
8583 {
8584   CHECK_STRING (string);
8585
8586   return detect_coding_system (SDATA (string),
8587                                SCHARS (string), SBYTES (string),
8588                                !NILP (highest), STRING_MULTIBYTE (string),
8589                                Qnil);
8590 }
8591
8592
8593 static INLINE int
8594 char_encodable_p (c, attrs)
8595      int c;
8596      Lisp_Object attrs;
8597 {
8598   Lisp_Object tail;
8599   struct charset *charset;
8600   Lisp_Object translation_table;
8601
8602   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8603   if (! NILP (translation_table))
8604     c = translate_char (translation_table, c);
8605   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8606        CONSP (tail); tail = XCDR (tail))
8607     {
8608       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8609       if (CHAR_CHARSET_P (c, charset))
8610         break;
8611     }
8612   return (! NILP (tail));
8613 }
8614
8615
8616 /* Return a list of coding systems that safely encode the text between
8617    START and END.  If EXCLUDE is non-nil, it is a list of coding
8618    systems not to check.  The returned list doesn't contain any such
8619    coding systems.  In any case, if the text contains only ASCII or is
8620    unibyte, return t.  */
8621
8622 DEFUN ("find-coding-systems-region-internal",
8623        Ffind_coding_systems_region_internal,
8624        Sfind_coding_systems_region_internal, 2, 3, 0,
8625        doc: /* Internal use only.  */)
8626      (start, end, exclude)
8627      Lisp_Object start, end, exclude;
8628 {
8629   Lisp_Object coding_attrs_list, safe_codings;
8630   EMACS_INT start_byte, end_byte;
8631   const unsigned char *p, *pbeg, *pend;
8632   int c;
8633   Lisp_Object tail, elt;
8634
8635   if (STRINGP (start))
8636     {
8637       if (!STRING_MULTIBYTE (start)
8638           || SCHARS (start) == SBYTES (start))
8639         return Qt;
8640       start_byte = 0;
8641       end_byte = SBYTES (start);
8642     }
8643   else
8644     {
8645       CHECK_NUMBER_COERCE_MARKER (start);
8646       CHECK_NUMBER_COERCE_MARKER (end);
8647       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8648         args_out_of_range (start, end);
8649       if (NILP (current_buffer->enable_multibyte_characters))
8650         return Qt;
8651       start_byte = CHAR_TO_BYTE (XINT (start));
8652       end_byte = CHAR_TO_BYTE (XINT (end));
8653       if (XINT (end) - XINT (start) == end_byte - start_byte)
8654         return Qt;
8655
8656       if (XINT (start) < GPT && XINT (end) > GPT)
8657         {
8658           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8659             move_gap_both (XINT (start), start_byte);
8660           else
8661             move_gap_both (XINT (end), end_byte);
8662         }
8663     }
8664
8665   coding_attrs_list = Qnil;
8666   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8667     if (NILP (exclude)
8668         || NILP (Fmemq (XCAR (tail), exclude)))
8669       {
8670         Lisp_Object attrs;
8671
8672         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8673         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8674             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8675           {
8676             ASET (attrs, coding_attr_trans_tbl,
8677                   get_translation_table (attrs, 1, NULL));
8678             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8679           }
8680       }
8681
8682   if (STRINGP (start))
8683     p = pbeg = SDATA (start);
8684   else
8685     p = pbeg = BYTE_POS_ADDR (start_byte);
8686   pend = p + (end_byte - start_byte);
8687
8688   while (p < pend && ASCII_BYTE_P (*p)) p++;
8689   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8690
8691   while (p < pend)
8692     {
8693       if (ASCII_BYTE_P (*p))
8694         p++;
8695       else
8696         {
8697           c = STRING_CHAR_ADVANCE (p);
8698
8699           charset_map_loaded = 0;
8700           for (tail = coding_attrs_list; CONSP (tail);)
8701             {
8702               elt = XCAR (tail);
8703               if (NILP (elt))
8704                 tail = XCDR (tail);
8705               else if (char_encodable_p (c, elt))
8706                 tail = XCDR (tail);
8707               else if (CONSP (XCDR (tail)))
8708                 {
8709                   XSETCAR (tail, XCAR (XCDR (tail)));
8710                   XSETCDR (tail, XCDR (XCDR (tail)));
8711                 }
8712               else
8713                 {
8714                   XSETCAR (tail, Qnil);
8715                   tail = XCDR (tail);
8716                 }
8717             }
8718           if (charset_map_loaded)
8719             {
8720               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8721
8722               if (STRINGP (start))
8723                 pbeg = SDATA (start);
8724               else
8725                 pbeg = BYTE_POS_ADDR (start_byte);
8726               p = pbeg + p_offset;
8727               pend = pbeg + pend_offset;
8728             }
8729         }
8730     }
8731
8732   safe_codings = list2 (Qraw_text, Qno_conversion);
8733   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8734     if (! NILP (XCAR (tail)))
8735       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8736
8737   return safe_codings;
8738 }
8739
8740
8741 DEFUN ("unencodable-char-position", Funencodable_char_position,
8742        Sunencodable_char_position, 3, 5, 0,
8743        doc: /*
8744 Return position of first un-encodable character in a region.
8745 START and END specify the region and CODING-SYSTEM specifies the
8746 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8747
8748 If optional 4th argument COUNT is non-nil, it specifies at most how
8749 many un-encodable characters to search.  In this case, the value is a
8750 list of positions.
8751
8752 If optional 5th argument STRING is non-nil, it is a string to search
8753 for un-encodable characters.  In that case, START and END are indexes
8754 to the string.  */)
8755      (start, end, coding_system, count, string)
8756      Lisp_Object start, end, coding_system, count, string;
8757 {
8758   int n;
8759   struct coding_system coding;
8760   Lisp_Object attrs, charset_list, translation_table;
8761   Lisp_Object positions;
8762   int from, to;
8763   const unsigned char *p, *stop, *pend;
8764   int ascii_compatible;
8765
8766   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8767   attrs = CODING_ID_ATTRS (coding.id);
8768   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8769     return Qnil;
8770   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8771   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8772   translation_table = get_translation_table (attrs, 1, NULL);
8773
8774   if (NILP (string))
8775     {
8776       validate_region (&start, &end);
8777       from = XINT (start);
8778       to = XINT (end);
8779       if (NILP (current_buffer->enable_multibyte_characters)
8780           || (ascii_compatible
8781               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8782         return Qnil;
8783       p = CHAR_POS_ADDR (from);
8784       pend = CHAR_POS_ADDR (to);
8785       if (from < GPT && to >= GPT)
8786         stop = GPT_ADDR;
8787       else
8788         stop = pend;
8789     }
8790   else
8791     {
8792       CHECK_STRING (string);
8793       CHECK_NATNUM (start);
8794       CHECK_NATNUM (end);
8795       from = XINT (start);
8796       to = XINT (end);
8797       if (from > to
8798           || to > SCHARS (string))
8799         args_out_of_range_3 (string, start, end);
8800       if (! STRING_MULTIBYTE (string))
8801         return Qnil;
8802       p = SDATA (string) + string_char_to_byte (string, from);
8803       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8804       if (ascii_compatible && (to - from) == (pend - p))
8805         return Qnil;
8806     }
8807
8808   if (NILP (count))
8809     n = 1;
8810   else
8811     {
8812       CHECK_NATNUM (count);
8813       n = XINT (count);
8814     }
8815
8816   positions = Qnil;
8817   while (1)
8818     {
8819       int c;
8820
8821       if (ascii_compatible)
8822         while (p < stop && ASCII_BYTE_P (*p))
8823           p++, from++;
8824       if (p >= stop)
8825         {
8826           if (p >= pend)
8827             break;
8828           stop = pend;
8829           p = GAP_END_ADDR;
8830         }
8831
8832       c = STRING_CHAR_ADVANCE (p);
8833       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8834           && ! char_charset (translate_char (translation_table, c),
8835                              charset_list, NULL))
8836         {
8837           positions = Fcons (make_number (from), positions);
8838           n--;
8839           if (n == 0)
8840             break;
8841         }
8842
8843       from++;
8844     }
8845
8846   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8847 }
8848
8849
8850 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8851        Scheck_coding_systems_region, 3, 3, 0,
8852        doc: /* Check if the region is encodable by coding systems.
8853
8854 START and END are buffer positions specifying the region.
8855 CODING-SYSTEM-LIST is a list of coding systems to check.
8856
8857 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8858 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8859 whole region, POS0, POS1, ... are buffer positions where non-encodable
8860 characters are found.
8861
8862 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8863 value is nil.
8864
8865 START may be a string.  In that case, check if the string is
8866 encodable, and the value contains indices to the string instead of
8867 buffer positions.  END is ignored.
8868
8869 If the current buffer (or START if it is a string) is unibyte, the value
8870 is nil.  */)
8871      (start, end, coding_system_list)
8872      Lisp_Object start, end, coding_system_list;
8873 {
8874   Lisp_Object list;
8875   EMACS_INT start_byte, end_byte;
8876   int pos;
8877   const unsigned char *p, *pbeg, *pend;
8878   int c;
8879   Lisp_Object tail, elt, attrs;
8880
8881   if (STRINGP (start))
8882     {
8883       if (!STRING_MULTIBYTE (start)
8884           || SCHARS (start) == SBYTES (start))
8885         return Qnil;
8886       start_byte = 0;
8887       end_byte = SBYTES (start);
8888       pos = 0;
8889     }
8890   else
8891     {
8892       CHECK_NUMBER_COERCE_MARKER (start);
8893       CHECK_NUMBER_COERCE_MARKER (end);
8894       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8895         args_out_of_range (start, end);
8896       if (NILP (current_buffer->enable_multibyte_characters))
8897         return Qnil;
8898       start_byte = CHAR_TO_BYTE (XINT (start));
8899       end_byte = CHAR_TO_BYTE (XINT (end));
8900       if (XINT (end) - XINT (start) == end_byte - start_byte)
8901         return Qnil;
8902
8903       if (XINT (start) < GPT && XINT (end) > GPT)
8904         {
8905           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8906             move_gap_both (XINT (start), start_byte);
8907           else
8908             move_gap_both (XINT (end), end_byte);
8909         }
8910       pos = XINT (start);
8911     }
8912
8913   list = Qnil;
8914   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8915     {
8916       elt = XCAR (tail);
8917       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8918       ASET (attrs, coding_attr_trans_tbl,
8919             get_translation_table (attrs, 1, NULL));
8920       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8921     }
8922
8923   if (STRINGP (start))
8924     p = pbeg = SDATA (start);
8925   else
8926     p = pbeg = BYTE_POS_ADDR (start_byte);
8927   pend = p + (end_byte - start_byte);
8928
8929   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8930   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8931
8932   while (p < pend)
8933     {
8934       if (ASCII_BYTE_P (*p))
8935         p++;
8936       else
8937         {
8938           c = STRING_CHAR_ADVANCE (p);
8939
8940           charset_map_loaded = 0;
8941           for (tail = list; CONSP (tail); tail = XCDR (tail))
8942             {
8943               elt = XCDR (XCAR (tail));
8944               if (! char_encodable_p (c, XCAR (elt)))
8945                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8946             }
8947           if (charset_map_loaded)
8948             {
8949               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8950
8951               if (STRINGP (start))
8952                 pbeg = SDATA (start);
8953               else
8954                 pbeg = BYTE_POS_ADDR (start_byte);
8955               p = pbeg + p_offset;
8956               pend = pbeg + pend_offset;
8957             }
8958         }
8959       pos++;
8960     }
8961
8962   tail = list;
8963   list = Qnil;
8964   for (; CONSP (tail); tail = XCDR (tail))
8965     {
8966       elt = XCAR (tail);
8967       if (CONSP (XCDR (XCDR (elt))))
8968         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8969                       list);
8970     }
8971
8972   return list;
8973 }
8974
8975
8976 Lisp_Object
8977 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8978      Lisp_Object start, end, coding_system, dst_object;
8979      int encodep, norecord;
8980 {
8981   struct coding_system coding;
8982   EMACS_INT from, from_byte, to, to_byte;
8983   Lisp_Object src_object;
8984
8985   CHECK_NUMBER_COERCE_MARKER (start);
8986   CHECK_NUMBER_COERCE_MARKER (end);
8987   if (NILP (coding_system))
8988     coding_system = Qno_conversion;
8989   else
8990     CHECK_CODING_SYSTEM (coding_system);
8991   src_object = Fcurrent_buffer ();
8992   if (NILP (dst_object))
8993     dst_object = src_object;
8994   else if (! EQ (dst_object, Qt))
8995     CHECK_BUFFER (dst_object);
8996
8997   validate_region (&start, &end);
8998   from = XFASTINT (start);
8999   from_byte = CHAR_TO_BYTE (from);
9000   to = XFASTINT (end);
9001   to_byte = CHAR_TO_BYTE (to);
9002
9003   setup_coding_system (coding_system, &coding);
9004   coding.mode |= CODING_MODE_LAST_BLOCK;
9005
9006   if (encodep)
9007     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9008                           dst_object);
9009   else
9010     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9011                           dst_object);
9012   if (! norecord)
9013     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9014
9015   return (BUFFERP (dst_object)
9016           ? make_number (coding.produced_char)
9017           : coding.dst_object);
9018 }
9019
9020
9021 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9022        3, 4, "r\nzCoding system: ",
9023        doc: /* Decode the current region from the specified coding system.
9024 When called from a program, takes four arguments:
9025         START, END, CODING-SYSTEM, and DESTINATION.
9026 START and END are buffer positions.
9027
9028 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9029 If nil, the region between START and END is replaced by the decoded text.
9030 If buffer, the decoded text is inserted in that buffer after point (point
9031 does not move).
9032 In those cases, the length of the decoded text is returned.
9033 If DESTINATION is t, the decoded text is returned.
9034
9035 This function sets `last-coding-system-used' to the precise coding system
9036 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9037 not fully specified.)  */)
9038      (start, end, coding_system, destination)
9039      Lisp_Object start, end, coding_system, destination;
9040 {
9041   return code_convert_region (start, end, coding_system, destination, 0, 0);
9042 }
9043
9044 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9045        3, 4, "r\nzCoding system: ",
9046        doc: /* Encode the current region by specified coding system.
9047 When called from a program, takes four arguments:
9048         START, END, CODING-SYSTEM and DESTINATION.
9049 START and END are buffer positions.
9050
9051 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9052 If nil, the region between START and END is replace by the encoded text.
9053 If buffer, the encoded text is inserted in that buffer after point (point
9054 does not move).
9055 In those cases, the length of the encoded text is returned.
9056 If DESTINATION is t, the encoded text is returned.
9057
9058 This function sets `last-coding-system-used' to the precise coding system
9059 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9060 not fully specified.)  */)
9061   (start, end, coding_system, destination)
9062      Lisp_Object start, end, coding_system, destination;
9063 {
9064   return code_convert_region (start, end, coding_system, destination, 1, 0);
9065 }
9066
9067 Lisp_Object
9068 code_convert_string (string, coding_system, dst_object,
9069                      encodep, nocopy, norecord)
9070      Lisp_Object string, coding_system, dst_object;
9071      int encodep, nocopy, norecord;
9072 {
9073   struct coding_system coding;
9074   EMACS_INT chars, bytes;
9075
9076   CHECK_STRING (string);
9077   if (NILP (coding_system))
9078     {
9079       if (! norecord)
9080         Vlast_coding_system_used = Qno_conversion;
9081       if (NILP (dst_object))
9082         return (nocopy ? Fcopy_sequence (string) : string);
9083     }
9084
9085   if (NILP (coding_system))
9086     coding_system = Qno_conversion;
9087   else
9088     CHECK_CODING_SYSTEM (coding_system);
9089   if (NILP (dst_object))
9090     dst_object = Qt;
9091   else if (! EQ (dst_object, Qt))
9092     CHECK_BUFFER (dst_object);
9093
9094   setup_coding_system (coding_system, &coding);
9095   coding.mode |= CODING_MODE_LAST_BLOCK;
9096   chars = SCHARS (string);
9097   bytes = SBYTES (string);
9098   if (encodep)
9099     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9100   else
9101     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9102   if (! norecord)
9103     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9104
9105   return (BUFFERP (dst_object)
9106           ? make_number (coding.produced_char)
9107           : coding.dst_object);
9108 }
9109
9110
9111 /* Encode or decode STRING according to CODING_SYSTEM.
9112    Do not set Vlast_coding_system_used.
9113
9114    This function is called only from macros DECODE_FILE and
9115    ENCODE_FILE, thus we ignore character composition.  */
9116
9117 Lisp_Object
9118 code_convert_string_norecord (string, coding_system, encodep)
9119      Lisp_Object string, coding_system;
9120      int encodep;
9121 {
9122   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9123 }
9124
9125
9126 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9127        2, 4, 0,
9128        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9129
9130 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9131 if the decoding operation is trivial.
9132
9133 Optional fourth arg BUFFER non-nil means that the decoded text is
9134 inserted in that buffer after point (point does not move).  In this
9135 case, the return value is the length of the decoded text.
9136
9137 This function sets `last-coding-system-used' to the precise coding system
9138 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9139 not fully specified.)  */)
9140   (string, coding_system, nocopy, buffer)
9141      Lisp_Object string, coding_system, nocopy, buffer;
9142 {
9143   return code_convert_string (string, coding_system, buffer,
9144                               0, ! NILP (nocopy), 0);
9145 }
9146
9147 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9148        2, 4, 0,
9149        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9150
9151 Optional third arg NOCOPY non-nil means it is OK to return STRING
9152 itself if the encoding operation is trivial.
9153
9154 Optional fourth arg BUFFER non-nil means that the encoded text is
9155 inserted in that buffer after point (point does not move).  In this
9156 case, the return value is the length of the encoded text.
9157
9158 This function sets `last-coding-system-used' to the precise coding system
9159 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9160 not fully specified.)  */)
9161      (string, coding_system, nocopy, buffer)
9162      Lisp_Object string, coding_system, nocopy, buffer;
9163 {
9164   return code_convert_string (string, coding_system, buffer,
9165                               1, ! NILP (nocopy), 1);
9166 }
9167
9168 \f
9169 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9170        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9171 Return the corresponding character.  */)
9172      (code)
9173      Lisp_Object code;
9174 {
9175   Lisp_Object spec, attrs, val;
9176   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9177   int c;
9178
9179   CHECK_NATNUM (code);
9180   c = XFASTINT (code);
9181   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9182   attrs = AREF (spec, 0);
9183
9184   if (ASCII_BYTE_P (c)
9185       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9186     return code;
9187
9188   val = CODING_ATTR_CHARSET_LIST (attrs);
9189   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9190   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9191   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9192
9193   if (c <= 0x7F)
9194     charset = charset_roman;
9195   else if (c >= 0xA0 && c < 0xDF)
9196     {
9197       charset = charset_kana;
9198       c -= 0x80;
9199     }
9200   else
9201     {
9202       int s1 = c >> 8, s2 = c & 0xFF;
9203
9204       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9205           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9206         error ("Invalid code: %d", code);
9207       SJIS_TO_JIS (c);
9208       charset = charset_kanji;
9209     }
9210   c = DECODE_CHAR (charset, c);
9211   if (c < 0)
9212     error ("Invalid code: %d", code);
9213   return make_number (c);
9214 }
9215
9216
9217 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9218        doc: /* Encode a Japanese character CH to shift_jis encoding.
9219 Return the corresponding code in SJIS.  */)
9220      (ch)
9221     Lisp_Object ch;
9222 {
9223   Lisp_Object spec, attrs, charset_list;
9224   int c;
9225   struct charset *charset;
9226   unsigned code;
9227
9228   CHECK_CHARACTER (ch);
9229   c = XFASTINT (ch);
9230   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9231   attrs = AREF (spec, 0);
9232
9233   if (ASCII_CHAR_P (c)
9234       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9235     return ch;
9236
9237   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9238   charset = char_charset (c, charset_list, &code);
9239   if (code == CHARSET_INVALID_CODE (charset))
9240     error ("Can't encode by shift_jis encoding: %d", c);
9241   JIS_TO_SJIS (code);
9242
9243   return make_number (code);
9244 }
9245
9246 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9247        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9248 Return the corresponding character.  */)
9249      (code)
9250      Lisp_Object code;
9251 {
9252   Lisp_Object spec, attrs, val;
9253   struct charset *charset_roman, *charset_big5, *charset;
9254   int c;
9255
9256   CHECK_NATNUM (code);
9257   c = XFASTINT (code);
9258   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9259   attrs = AREF (spec, 0);
9260
9261   if (ASCII_BYTE_P (c)
9262       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9263     return code;
9264
9265   val = CODING_ATTR_CHARSET_LIST (attrs);
9266   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9267   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9268
9269   if (c <= 0x7F)
9270     charset = charset_roman;
9271   else
9272     {
9273       int b1 = c >> 8, b2 = c & 0x7F;
9274       if (b1 < 0xA1 || b1 > 0xFE
9275           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9276         error ("Invalid code: %d", code);
9277       charset = charset_big5;
9278     }
9279   c = DECODE_CHAR (charset, (unsigned )c);
9280   if (c < 0)
9281     error ("Invalid code: %d", code);
9282   return make_number (c);
9283 }
9284
9285 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9286        doc: /* Encode the Big5 character CH to BIG5 coding system.
9287 Return the corresponding character code in Big5.  */)
9288      (ch)
9289      Lisp_Object ch;
9290 {
9291   Lisp_Object spec, attrs, charset_list;
9292   struct charset *charset;
9293   int c;
9294   unsigned code;
9295
9296   CHECK_CHARACTER (ch);
9297   c = XFASTINT (ch);
9298   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9299   attrs = AREF (spec, 0);
9300   if (ASCII_CHAR_P (c)
9301       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9302     return ch;
9303
9304   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9305   charset = char_charset (c, charset_list, &code);
9306   if (code == CHARSET_INVALID_CODE (charset))
9307     error ("Can't encode by Big5 encoding: %d", c);
9308
9309   return make_number (code);
9310 }
9311
9312 \f
9313 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9314        Sset_terminal_coding_system_internal, 1, 2, 0,
9315        doc: /* Internal use only.  */)
9316      (coding_system, terminal)
9317      Lisp_Object coding_system;
9318      Lisp_Object terminal;
9319 {
9320   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9321   CHECK_SYMBOL (coding_system);
9322   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9323   /* We had better not send unsafe characters to terminal.  */
9324   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9325   /* Characer composition should be disabled.  */
9326   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9327   terminal_coding->src_multibyte = 1;
9328   terminal_coding->dst_multibyte = 0;
9329   return Qnil;
9330 }
9331
9332 DEFUN ("set-safe-terminal-coding-system-internal",
9333        Fset_safe_terminal_coding_system_internal,
9334        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9335        doc: /* Internal use only.  */)
9336      (coding_system)
9337      Lisp_Object coding_system;
9338 {
9339   CHECK_SYMBOL (coding_system);
9340   setup_coding_system (Fcheck_coding_system (coding_system),
9341                        &safe_terminal_coding);
9342   /* Characer composition should be disabled.  */
9343   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9344   safe_terminal_coding.src_multibyte = 1;
9345   safe_terminal_coding.dst_multibyte = 0;
9346   return Qnil;
9347 }
9348
9349 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9350        Sterminal_coding_system, 0, 1, 0,
9351        doc: /* Return coding system specified for terminal output on the given terminal.
9352 TERMINAL may be a terminal object, a frame, or nil for the selected
9353 frame's terminal device.  */)
9354      (terminal)
9355      Lisp_Object terminal;
9356 {
9357   struct coding_system *terminal_coding
9358     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9359   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9360
9361   /* For backward compatibility, return nil if it is `undecided'. */
9362   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9363 }
9364
9365 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9366        Sset_keyboard_coding_system_internal, 1, 2, 0,
9367        doc: /* Internal use only.  */)
9368      (coding_system, terminal)
9369      Lisp_Object coding_system;
9370      Lisp_Object terminal;
9371 {
9372   struct terminal *t = get_terminal (terminal, 1);
9373   CHECK_SYMBOL (coding_system);
9374   setup_coding_system (Fcheck_coding_system (coding_system),
9375                        TERMINAL_KEYBOARD_CODING (t));
9376   /* Characer composition should be disabled.  */
9377   TERMINAL_KEYBOARD_CODING (t)->common_flags
9378     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9379   return Qnil;
9380 }
9381
9382 DEFUN ("keyboard-coding-system",
9383        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9384        doc: /* Return coding system specified for decoding keyboard input.  */)
9385      (terminal)
9386      Lisp_Object terminal;
9387 {
9388   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9389                          (get_terminal (terminal, 1))->id);
9390 }
9391
9392 \f
9393 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9394        Sfind_operation_coding_system,  1, MANY, 0,
9395        doc: /* Choose a coding system for an operation based on the target name.
9396 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9397 DECODING-SYSTEM is the coding system to use for decoding
9398 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9399 for encoding (in case OPERATION does encoding).
9400
9401 The first argument OPERATION specifies an I/O primitive:
9402   For file I/O, `insert-file-contents' or `write-region'.
9403   For process I/O, `call-process', `call-process-region', or `start-process'.
9404   For network I/O, `open-network-stream'.
9405
9406 The remaining arguments should be the same arguments that were passed
9407 to the primitive.  Depending on which primitive, one of those arguments
9408 is selected as the TARGET.  For example, if OPERATION does file I/O,
9409 whichever argument specifies the file name is TARGET.
9410
9411 TARGET has a meaning which depends on OPERATION:
9412   For file I/O, TARGET is a file name (except for the special case below).
9413   For process I/O, TARGET is a process name.
9414   For network I/O, TARGET is a service name or a port number.
9415
9416 This function looks up what is specified for TARGET in
9417 `file-coding-system-alist', `process-coding-system-alist',
9418 or `network-coding-system-alist' depending on OPERATION.
9419 They may specify a coding system, a cons of coding systems,
9420 or a function symbol to call.
9421 In the last case, we call the function with one argument,
9422 which is a list of all the arguments given to this function.
9423 If the function can't decide a coding system, it can return
9424 `undecided' so that the normal code-detection is performed.
9425
9426 If OPERATION is `insert-file-contents', the argument corresponding to
9427 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9428 file name to look up, and BUFFER is a buffer that contains the file's
9429 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9430 function to call for FILENAME, that function should examine the
9431 contents of BUFFER instead of reading the file.
9432
9433 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9434      (nargs, args)
9435      int nargs;
9436      Lisp_Object *args;
9437 {
9438   Lisp_Object operation, target_idx, target, val;
9439   register Lisp_Object chain;
9440
9441   if (nargs < 2)
9442     error ("Too few arguments");
9443   operation = args[0];
9444   if (!SYMBOLP (operation)
9445       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9446     error ("Invalid first argument");
9447   if (nargs < 1 + XINT (target_idx))
9448     error ("Too few arguments for operation: %s",
9449            SDATA (SYMBOL_NAME (operation)));
9450   target = args[XINT (target_idx) + 1];
9451   if (!(STRINGP (target)
9452         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9453             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9454         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9455     error ("Invalid %dth argument", XINT (target_idx) + 1);
9456   if (CONSP (target))
9457     target = XCAR (target);
9458
9459   chain = ((EQ (operation, Qinsert_file_contents)
9460             || EQ (operation, Qwrite_region))
9461            ? Vfile_coding_system_alist
9462            : (EQ (operation, Qopen_network_stream)
9463               ? Vnetwork_coding_system_alist
9464               : Vprocess_coding_system_alist));
9465   if (NILP (chain))
9466     return Qnil;
9467
9468   for (; CONSP (chain); chain = XCDR (chain))
9469     {
9470       Lisp_Object elt;
9471
9472       elt = XCAR (chain);
9473       if (CONSP (elt)
9474           && ((STRINGP (target)
9475                && STRINGP (XCAR (elt))
9476                && fast_string_match (XCAR (elt), target) >= 0)
9477               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9478         {
9479           val = XCDR (elt);
9480           /* Here, if VAL is both a valid coding system and a valid
9481              function symbol, we return VAL as a coding system.  */
9482           if (CONSP (val))
9483             return val;
9484           if (! SYMBOLP (val))
9485             return Qnil;
9486           if (! NILP (Fcoding_system_p (val)))
9487             return Fcons (val, val);
9488           if (! NILP (Ffboundp (val)))
9489             {
9490               /* We use call1 rather than safe_call1
9491                  so as to get bug reports about functions called here
9492                  which don't handle the current interface.  */
9493               val = call1 (val, Flist (nargs, args));
9494               if (CONSP (val))
9495                 return val;
9496               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9497                 return Fcons (val, val);
9498             }
9499           return Qnil;
9500         }
9501     }
9502   return Qnil;
9503 }
9504
9505 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9506        Sset_coding_system_priority, 0, MANY, 0,
9507        doc: /* Assign higher priority to the coding systems given as arguments.
9508 If multiple coding systems belong to the same category,
9509 all but the first one are ignored.
9510
9511 usage: (set-coding-system-priority &rest coding-systems)  */)
9512      (nargs, args)
9513      int nargs;
9514      Lisp_Object *args;
9515 {
9516   int i, j;
9517   int changed[coding_category_max];
9518   enum coding_category priorities[coding_category_max];
9519
9520   bzero (changed, sizeof changed);
9521
9522   for (i = j = 0; i < nargs; i++)
9523     {
9524       enum coding_category category;
9525       Lisp_Object spec, attrs;
9526
9527       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9528       attrs = AREF (spec, 0);
9529       category = XINT (CODING_ATTR_CATEGORY (attrs));
9530       if (changed[category])
9531         /* Ignore this coding system because a coding system of the
9532            same category already had a higher priority.  */
9533         continue;
9534       changed[category] = 1;
9535       priorities[j++] = category;
9536       if (coding_categories[category].id >= 0
9537           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9538         setup_coding_system (args[i], &coding_categories[category]);
9539       Fset (AREF (Vcoding_category_table, category), args[i]);
9540     }
9541
9542   /* Now we have decided top J priorities.  Reflect the order of the
9543      original priorities to the remaining priorities.  */
9544
9545   for (i = j, j = 0; i < coding_category_max; i++, j++)
9546     {
9547       while (j < coding_category_max
9548              && changed[coding_priorities[j]])
9549         j++;
9550       if (j == coding_category_max)
9551         abort ();
9552       priorities[i] = coding_priorities[j];
9553     }
9554
9555   bcopy (priorities, coding_priorities, sizeof priorities);
9556
9557   /* Update `coding-category-list'.  */
9558   Vcoding_category_list = Qnil;
9559   for (i = coding_category_max - 1; i >= 0; i--)
9560     Vcoding_category_list
9561       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9562                Vcoding_category_list);
9563
9564   return Qnil;
9565 }
9566
9567 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9568        Scoding_system_priority_list, 0, 1, 0,
9569        doc: /* Return a list of coding systems ordered by their priorities.
9570 The list contains a subset of coding systems; i.e. coding systems
9571 assigned to each coding category (see `coding-category-list').
9572
9573 HIGHESTP non-nil means just return the highest priority one.  */)
9574      (highestp)
9575      Lisp_Object highestp;
9576 {
9577   int i;
9578   Lisp_Object val;
9579
9580   for (i = 0, val = Qnil; i < coding_category_max; i++)
9581     {
9582       enum coding_category category = coding_priorities[i];
9583       int id = coding_categories[category].id;
9584       Lisp_Object attrs;
9585
9586       if (id < 0)
9587         continue;
9588       attrs = CODING_ID_ATTRS (id);
9589       if (! NILP (highestp))
9590         return CODING_ATTR_BASE_NAME (attrs);
9591       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9592     }
9593   return Fnreverse (val);
9594 }
9595
9596 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9597
9598 static Lisp_Object
9599 make_subsidiaries (base)
9600      Lisp_Object base;
9601 {
9602   Lisp_Object subsidiaries;
9603   int base_name_len = SBYTES (SYMBOL_NAME (base));
9604   char *buf = (char *) alloca (base_name_len + 6);
9605   int i;
9606
9607   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9608   subsidiaries = Fmake_vector (make_number (3), Qnil);
9609   for (i = 0; i < 3; i++)
9610     {
9611       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9612       ASET (subsidiaries, i, intern (buf));
9613     }
9614   return subsidiaries;
9615 }
9616
9617
9618 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9619        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9620        doc: /* For internal use only.
9621 usage: (define-coding-system-internal ...)  */)
9622      (nargs, args)
9623      int nargs;
9624      Lisp_Object *args;
9625 {
9626   Lisp_Object name;
9627   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9628   Lisp_Object attrs;            /* Vector of attributes.  */
9629   Lisp_Object eol_type;
9630   Lisp_Object aliases;
9631   Lisp_Object coding_type, charset_list, safe_charsets;
9632   enum coding_category category;
9633   Lisp_Object tail, val;
9634   int max_charset_id = 0;
9635   int i;
9636
9637   if (nargs < coding_arg_max)
9638     goto short_args;
9639
9640   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9641
9642   name = args[coding_arg_name];
9643   CHECK_SYMBOL (name);
9644   CODING_ATTR_BASE_NAME (attrs) = name;
9645
9646   val = args[coding_arg_mnemonic];
9647   if (! STRINGP (val))
9648     CHECK_CHARACTER (val);
9649   CODING_ATTR_MNEMONIC (attrs) = val;
9650
9651   coding_type = args[coding_arg_coding_type];
9652   CHECK_SYMBOL (coding_type);
9653   CODING_ATTR_TYPE (attrs) = coding_type;
9654
9655   charset_list = args[coding_arg_charset_list];
9656   if (SYMBOLP (charset_list))
9657     {
9658       if (EQ (charset_list, Qiso_2022))
9659         {
9660           if (! EQ (coding_type, Qiso_2022))
9661             error ("Invalid charset-list");
9662           charset_list = Viso_2022_charset_list;
9663         }
9664       else if (EQ (charset_list, Qemacs_mule))
9665         {
9666           if (! EQ (coding_type, Qemacs_mule))
9667             error ("Invalid charset-list");
9668           charset_list = Vemacs_mule_charset_list;
9669         }
9670       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9671         if (max_charset_id < XFASTINT (XCAR (tail)))
9672           max_charset_id = XFASTINT (XCAR (tail));
9673     }
9674   else
9675     {
9676       charset_list = Fcopy_sequence (charset_list);
9677       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9678         {
9679           struct charset *charset;
9680
9681           val = XCAR (tail);
9682           CHECK_CHARSET_GET_CHARSET (val, charset);
9683           if (EQ (coding_type, Qiso_2022)
9684               ? CHARSET_ISO_FINAL (charset) < 0
9685               : EQ (coding_type, Qemacs_mule)
9686               ? CHARSET_EMACS_MULE_ID (charset) < 0
9687               : 0)
9688             error ("Can't handle charset `%s'",
9689                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9690
9691           XSETCAR (tail, make_number (charset->id));
9692           if (max_charset_id < charset->id)
9693             max_charset_id = charset->id;
9694         }
9695     }
9696   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9697
9698   safe_charsets = make_uninit_string (max_charset_id + 1);
9699   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9700   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9701     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9702   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9703
9704   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9705
9706   val = args[coding_arg_decode_translation_table];
9707   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9708     CHECK_SYMBOL (val);
9709   CODING_ATTR_DECODE_TBL (attrs) = val;
9710
9711   val = args[coding_arg_encode_translation_table];
9712   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9713     CHECK_SYMBOL (val);
9714   CODING_ATTR_ENCODE_TBL (attrs) = val;
9715
9716   val = args[coding_arg_post_read_conversion];
9717   CHECK_SYMBOL (val);
9718   CODING_ATTR_POST_READ (attrs) = val;
9719
9720   val = args[coding_arg_pre_write_conversion];
9721   CHECK_SYMBOL (val);
9722   CODING_ATTR_PRE_WRITE (attrs) = val;
9723
9724   val = args[coding_arg_default_char];
9725   if (NILP (val))
9726     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9727   else
9728     {
9729       CHECK_CHARACTER (val);
9730       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9731     }
9732
9733   val = args[coding_arg_for_unibyte];
9734   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9735
9736   val = args[coding_arg_plist];
9737   CHECK_LIST (val);
9738   CODING_ATTR_PLIST (attrs) = val;
9739
9740   if (EQ (coding_type, Qcharset))
9741     {
9742       /* Generate a lisp vector of 256 elements.  Each element is nil,
9743          integer, or a list of charset IDs.
9744
9745          If Nth element is nil, the byte code N is invalid in this
9746          coding system.
9747
9748          If Nth element is a number NUM, N is the first byte of a
9749          charset whose ID is NUM.
9750
9751          If Nth element is a list of charset IDs, N is the first byte
9752          of one of them.  The list is sorted by dimensions of the
9753          charsets.  A charset of smaller dimension comes firtst. */
9754       val = Fmake_vector (make_number (256), Qnil);
9755
9756       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9757         {
9758           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9759           int dim = CHARSET_DIMENSION (charset);
9760           int idx = (dim - 1) * 4;
9761
9762           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9763             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9764
9765           for (i = charset->code_space[idx];
9766                i <= charset->code_space[idx + 1]; i++)
9767             {
9768               Lisp_Object tmp, tmp2;
9769               int dim2;
9770
9771               tmp = AREF (val, i);
9772               if (NILP (tmp))
9773                 tmp = XCAR (tail);
9774               else if (NUMBERP (tmp))
9775                 {
9776                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9777                   if (dim < dim2)
9778                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9779                   else
9780                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9781                 }
9782               else
9783                 {
9784                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9785                     {
9786                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9787                       if (dim < dim2)
9788                         break;
9789                     }
9790                   if (NILP (tmp2))
9791                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9792                   else
9793                     {
9794                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9795                       XSETCAR (tmp2, XCAR (tail));
9796                     }
9797                 }
9798               ASET (val, i, tmp);
9799             }
9800         }
9801       ASET (attrs, coding_attr_charset_valids, val);
9802       category = coding_category_charset;
9803     }
9804   else if (EQ (coding_type, Qccl))
9805     {
9806       Lisp_Object valids;
9807
9808       if (nargs < coding_arg_ccl_max)
9809         goto short_args;
9810
9811       val = args[coding_arg_ccl_decoder];
9812       CHECK_CCL_PROGRAM (val);
9813       if (VECTORP (val))
9814         val = Fcopy_sequence (val);
9815       ASET (attrs, coding_attr_ccl_decoder, val);
9816
9817       val = args[coding_arg_ccl_encoder];
9818       CHECK_CCL_PROGRAM (val);
9819       if (VECTORP (val))
9820         val = Fcopy_sequence (val);
9821       ASET (attrs, coding_attr_ccl_encoder, val);
9822
9823       val = args[coding_arg_ccl_valids];
9824       valids = Fmake_string (make_number (256), make_number (0));
9825       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9826         {
9827           int from, to;
9828
9829           val = Fcar (tail);
9830           if (INTEGERP (val))
9831             {
9832               from = to = XINT (val);
9833               if (from < 0 || from > 255)
9834                 args_out_of_range_3 (val, make_number (0), make_number (255));
9835             }
9836           else
9837             {
9838               CHECK_CONS (val);
9839               CHECK_NATNUM_CAR (val);
9840               CHECK_NATNUM_CDR (val);
9841               from = XINT (XCAR (val));
9842               if (from > 255)
9843                 args_out_of_range_3 (XCAR (val),
9844                                      make_number (0), make_number (255));
9845               to = XINT (XCDR (val));
9846               if (to < from || to > 255)
9847                 args_out_of_range_3 (XCDR (val),
9848                                      XCAR (val), make_number (255));
9849             }
9850           for (i = from; i <= to; i++)
9851             SSET (valids, i, 1);
9852         }
9853       ASET (attrs, coding_attr_ccl_valids, valids);
9854
9855       category = coding_category_ccl;
9856     }
9857   else if (EQ (coding_type, Qutf_16))
9858     {
9859       Lisp_Object bom, endian;
9860
9861       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9862
9863       if (nargs < coding_arg_utf16_max)
9864         goto short_args;
9865
9866       bom = args[coding_arg_utf16_bom];
9867       if (! NILP (bom) && ! EQ (bom, Qt))
9868         {
9869           CHECK_CONS (bom);
9870           val = XCAR (bom);
9871           CHECK_CODING_SYSTEM (val);
9872           val = XCDR (bom);
9873           CHECK_CODING_SYSTEM (val);
9874         }
9875       ASET (attrs, coding_attr_utf_bom, bom);
9876
9877       endian = args[coding_arg_utf16_endian];
9878       CHECK_SYMBOL (endian);
9879       if (NILP (endian))
9880         endian = Qbig;
9881       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9882         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9883       ASET (attrs, coding_attr_utf_16_endian, endian);
9884
9885       category = (CONSP (bom)
9886                   ? coding_category_utf_16_auto
9887                   : NILP (bom)
9888                   ? (EQ (endian, Qbig)
9889                      ? coding_category_utf_16_be_nosig
9890                      : coding_category_utf_16_le_nosig)
9891                   : (EQ (endian, Qbig)
9892                      ? coding_category_utf_16_be
9893                      : coding_category_utf_16_le));
9894     }
9895   else if (EQ (coding_type, Qiso_2022))
9896     {
9897       Lisp_Object initial, reg_usage, request, flags;
9898       int i;
9899
9900       if (nargs < coding_arg_iso2022_max)
9901         goto short_args;
9902
9903       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9904       CHECK_VECTOR (initial);
9905       for (i = 0; i < 4; i++)
9906         {
9907           val = Faref (initial, make_number (i));
9908           if (! NILP (val))
9909             {
9910               struct charset *charset;
9911
9912               CHECK_CHARSET_GET_CHARSET (val, charset);
9913               ASET (initial, i, make_number (CHARSET_ID (charset)));
9914               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9915                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9916             }
9917           else
9918             ASET (initial, i, make_number (-1));
9919         }
9920
9921       reg_usage = args[coding_arg_iso2022_reg_usage];
9922       CHECK_CONS (reg_usage);
9923       CHECK_NUMBER_CAR (reg_usage);
9924       CHECK_NUMBER_CDR (reg_usage);
9925
9926       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9927       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9928         {
9929           int id;
9930           Lisp_Object tmp;
9931
9932           val = Fcar (tail);
9933           CHECK_CONS (val);
9934           tmp = XCAR (val);
9935           CHECK_CHARSET_GET_ID (tmp, id);
9936           CHECK_NATNUM_CDR (val);
9937           if (XINT (XCDR (val)) >= 4)
9938             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9939           XSETCAR (val, make_number (id));
9940         }
9941
9942       flags = args[coding_arg_iso2022_flags];
9943       CHECK_NATNUM (flags);
9944       i = XINT (flags);
9945       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9946         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9947
9948       ASET (attrs, coding_attr_iso_initial, initial);
9949       ASET (attrs, coding_attr_iso_usage, reg_usage);
9950       ASET (attrs, coding_attr_iso_request, request);
9951       ASET (attrs, coding_attr_iso_flags, flags);
9952       setup_iso_safe_charsets (attrs);
9953
9954       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9955         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9956                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9957                     ? coding_category_iso_7_else
9958                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9959                     ? coding_category_iso_7
9960                     : coding_category_iso_7_tight);
9961       else
9962         {
9963           int id = XINT (AREF (initial, 1));
9964
9965           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9966                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9967                        || id < 0)
9968                       ? coding_category_iso_8_else
9969                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9970                       ? coding_category_iso_8_1
9971                       : coding_category_iso_8_2);
9972         }
9973       if (category != coding_category_iso_8_1
9974           && category != coding_category_iso_8_2)
9975         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9976     }
9977   else if (EQ (coding_type, Qemacs_mule))
9978     {
9979       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9980         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9981       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9982       category = coding_category_emacs_mule;
9983     }
9984   else if (EQ (coding_type, Qshift_jis))
9985     {
9986
9987       struct charset *charset;
9988
9989       if (XINT (Flength (charset_list)) != 3
9990           && XINT (Flength (charset_list)) != 4)
9991         error ("There should be three or four charsets");
9992
9993       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9994       if (CHARSET_DIMENSION (charset) != 1)
9995         error ("Dimension of charset %s is not one",
9996                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9997       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9998         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9999
10000       charset_list = XCDR (charset_list);
10001       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10002       if (CHARSET_DIMENSION (charset) != 1)
10003         error ("Dimension of charset %s is not one",
10004                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10005
10006       charset_list = XCDR (charset_list);
10007       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10008       if (CHARSET_DIMENSION (charset) != 2)
10009         error ("Dimension of charset %s is not two",
10010                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10011
10012       charset_list = XCDR (charset_list);
10013       if (! NILP (charset_list))
10014         {
10015           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10016           if (CHARSET_DIMENSION (charset) != 2)
10017             error ("Dimension of charset %s is not two",
10018                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10019         }
10020
10021       category = coding_category_sjis;
10022       Vsjis_coding_system = name;
10023     }
10024   else if (EQ (coding_type, Qbig5))
10025     {
10026       struct charset *charset;
10027
10028       if (XINT (Flength (charset_list)) != 2)
10029         error ("There should be just two charsets");
10030
10031       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10032       if (CHARSET_DIMENSION (charset) != 1)
10033         error ("Dimension of charset %s is not one",
10034                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10035       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10036         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10037
10038       charset_list = XCDR (charset_list);
10039       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10040       if (CHARSET_DIMENSION (charset) != 2)
10041         error ("Dimension of charset %s is not two",
10042                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10043
10044       category = coding_category_big5;
10045       Vbig5_coding_system = name;
10046     }
10047   else if (EQ (coding_type, Qraw_text))
10048     {
10049       category = coding_category_raw_text;
10050       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10051     }
10052   else if (EQ (coding_type, Qutf_8))
10053     {
10054       Lisp_Object bom;
10055
10056       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10057
10058       if (nargs < coding_arg_utf8_max)
10059         goto short_args;
10060
10061       bom = args[coding_arg_utf8_bom];
10062       if (! NILP (bom) && ! EQ (bom, Qt))
10063         {
10064           CHECK_CONS (bom);
10065           val = XCAR (bom);
10066           CHECK_CODING_SYSTEM (val);
10067           val = XCDR (bom);
10068           CHECK_CODING_SYSTEM (val);
10069         }
10070       ASET (attrs, coding_attr_utf_bom, bom);
10071
10072       category = (CONSP (bom) ? coding_category_utf_8_auto
10073                   : NILP (bom) ? coding_category_utf_8_nosig
10074                   : coding_category_utf_8_sig);
10075     }
10076   else if (EQ (coding_type, Qundecided))
10077     category = coding_category_undecided;
10078   else
10079     error ("Invalid coding system type: %s",
10080            SDATA (SYMBOL_NAME (coding_type)));
10081
10082   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10083   CODING_ATTR_PLIST (attrs)
10084     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10085                                 CODING_ATTR_PLIST (attrs)));
10086   CODING_ATTR_PLIST (attrs)
10087     = Fcons (QCascii_compatible_p,
10088              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10089                     CODING_ATTR_PLIST (attrs)));
10090
10091   eol_type = args[coding_arg_eol_type];
10092   if (! NILP (eol_type)
10093       && ! EQ (eol_type, Qunix)
10094       && ! EQ (eol_type, Qdos)
10095       && ! EQ (eol_type, Qmac))
10096     error ("Invalid eol-type");
10097
10098   aliases = Fcons (name, Qnil);
10099
10100   if (NILP (eol_type))
10101     {
10102       eol_type = make_subsidiaries (name);
10103       for (i = 0; i < 3; i++)
10104         {
10105           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10106
10107           this_name = AREF (eol_type, i);
10108           this_aliases = Fcons (this_name, Qnil);
10109           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10110           this_spec = Fmake_vector (make_number (3), attrs);
10111           ASET (this_spec, 1, this_aliases);
10112           ASET (this_spec, 2, this_eol_type);
10113           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10114           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10115           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10116           if (NILP (val))
10117             Vcoding_system_alist
10118               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10119                        Vcoding_system_alist);
10120         }
10121     }
10122
10123   spec_vec = Fmake_vector (make_number (3), attrs);
10124   ASET (spec_vec, 1, aliases);
10125   ASET (spec_vec, 2, eol_type);
10126
10127   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10128   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10129   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10130   if (NILP (val))
10131     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10132                                   Vcoding_system_alist);
10133
10134   {
10135     int id = coding_categories[category].id;
10136
10137     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10138       setup_coding_system (name, &coding_categories[category]);
10139   }
10140
10141   return Qnil;
10142
10143  short_args:
10144   return Fsignal (Qwrong_number_of_arguments,
10145                   Fcons (intern ("define-coding-system-internal"),
10146                          make_number (nargs)));
10147 }
10148
10149
10150 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10151        3, 3, 0,
10152        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10153   (coding_system, prop, val)
10154      Lisp_Object coding_system, prop, val;
10155 {
10156   Lisp_Object spec, attrs;
10157
10158   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10159   attrs = AREF (spec, 0);
10160   if (EQ (prop, QCmnemonic))
10161     {
10162       if (! STRINGP (val))
10163         CHECK_CHARACTER (val);
10164       CODING_ATTR_MNEMONIC (attrs) = val;
10165     }
10166   else if (EQ (prop, QCdefault_char))
10167     {
10168       if (NILP (val))
10169         val = make_number (' ');
10170       else
10171         CHECK_CHARACTER (val);
10172       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10173     }
10174   else if (EQ (prop, QCdecode_translation_table))
10175     {
10176       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10177         CHECK_SYMBOL (val);
10178       CODING_ATTR_DECODE_TBL (attrs) = val;
10179     }
10180   else if (EQ (prop, QCencode_translation_table))
10181     {
10182       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10183         CHECK_SYMBOL (val);
10184       CODING_ATTR_ENCODE_TBL (attrs) = val;
10185     }
10186   else if (EQ (prop, QCpost_read_conversion))
10187     {
10188       CHECK_SYMBOL (val);
10189       CODING_ATTR_POST_READ (attrs) = val;
10190     }
10191   else if (EQ (prop, QCpre_write_conversion))
10192     {
10193       CHECK_SYMBOL (val);
10194       CODING_ATTR_PRE_WRITE (attrs) = val;
10195     }
10196   else if (EQ (prop, QCascii_compatible_p))
10197     {
10198       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10199     }
10200
10201   CODING_ATTR_PLIST (attrs)
10202     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10203   return val;
10204 }
10205
10206
10207 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10208        Sdefine_coding_system_alias, 2, 2, 0,
10209        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10210      (alias, coding_system)
10211      Lisp_Object alias, coding_system;
10212 {
10213   Lisp_Object spec, aliases, eol_type, val;
10214
10215   CHECK_SYMBOL (alias);
10216   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10217   aliases = AREF (spec, 1);
10218   /* ALIASES should be a list of length more than zero, and the first
10219      element is a base coding system.  Append ALIAS at the tail of the
10220      list.  */
10221   while (!NILP (XCDR (aliases)))
10222     aliases = XCDR (aliases);
10223   XSETCDR (aliases, Fcons (alias, Qnil));
10224
10225   eol_type = AREF (spec, 2);
10226   if (VECTORP (eol_type))
10227     {
10228       Lisp_Object subsidiaries;
10229       int i;
10230
10231       subsidiaries = make_subsidiaries (alias);
10232       for (i = 0; i < 3; i++)
10233         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10234                                      AREF (eol_type, i));
10235     }
10236
10237   Fputhash (alias, spec, Vcoding_system_hash_table);
10238   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10239   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10240   if (NILP (val))
10241     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10242                                   Vcoding_system_alist);
10243
10244   return Qnil;
10245 }
10246
10247 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10248        1, 1, 0,
10249        doc: /* Return the base of CODING-SYSTEM.
10250 Any alias or subsidiary coding system is not a base coding system.  */)
10251   (coding_system)
10252      Lisp_Object coding_system;
10253 {
10254   Lisp_Object spec, attrs;
10255
10256   if (NILP (coding_system))
10257     return (Qno_conversion);
10258   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10259   attrs = AREF (spec, 0);
10260   return CODING_ATTR_BASE_NAME (attrs);
10261 }
10262
10263 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10264        1, 1, 0,
10265        doc: "Return the property list of CODING-SYSTEM.")
10266      (coding_system)
10267      Lisp_Object coding_system;
10268 {
10269   Lisp_Object spec, attrs;
10270
10271   if (NILP (coding_system))
10272     coding_system = Qno_conversion;
10273   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10274   attrs = AREF (spec, 0);
10275   return CODING_ATTR_PLIST (attrs);
10276 }
10277
10278
10279 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10280        1, 1, 0,
10281        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10282      (coding_system)
10283      Lisp_Object coding_system;
10284 {
10285   Lisp_Object spec;
10286
10287   if (NILP (coding_system))
10288     coding_system = Qno_conversion;
10289   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10290   return AREF (spec, 1);
10291 }
10292
10293 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10294        Scoding_system_eol_type, 1, 1, 0,
10295        doc: /* Return eol-type of CODING-SYSTEM.
10296 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10297
10298 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10299 and CR respectively.
10300
10301 A vector value indicates that a format of end-of-line should be
10302 detected automatically.  Nth element of the vector is the subsidiary
10303 coding system whose eol-type is N.  */)
10304      (coding_system)
10305      Lisp_Object coding_system;
10306 {
10307   Lisp_Object spec, eol_type;
10308   int n;
10309
10310   if (NILP (coding_system))
10311     coding_system = Qno_conversion;
10312   if (! CODING_SYSTEM_P (coding_system))
10313     return Qnil;
10314   spec = CODING_SYSTEM_SPEC (coding_system);
10315   eol_type = AREF (spec, 2);
10316   if (VECTORP (eol_type))
10317     return Fcopy_sequence (eol_type);
10318   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10319   return make_number (n);
10320 }
10321
10322 #endif /* emacs */
10323
10324 \f
10325 /*** 9. Post-amble ***/
10326
10327 void
10328 init_coding_once ()
10329 {
10330   int i;
10331
10332   for (i = 0; i < coding_category_max; i++)
10333     {
10334       coding_categories[i].id = -1;
10335       coding_priorities[i] = i;
10336     }
10337
10338   /* ISO2022 specific initialize routine.  */
10339   for (i = 0; i < 0x20; i++)
10340     iso_code_class[i] = ISO_control_0;
10341   for (i = 0x21; i < 0x7F; i++)
10342     iso_code_class[i] = ISO_graphic_plane_0;
10343   for (i = 0x80; i < 0xA0; i++)
10344     iso_code_class[i] = ISO_control_1;
10345   for (i = 0xA1; i < 0xFF; i++)
10346     iso_code_class[i] = ISO_graphic_plane_1;
10347   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10348   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10349   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10350   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10351   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10352   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10353   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10354   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10355   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10356
10357   for (i = 0; i < 256; i++)
10358     {
10359       emacs_mule_bytes[i] = 1;
10360     }
10361   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10362   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10363   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10364   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10365 }
10366
10367 #ifdef emacs
10368
10369 void
10370 syms_of_coding ()
10371 {
10372   staticpro (&Vcoding_system_hash_table);
10373   {
10374     Lisp_Object args[2];
10375     args[0] = QCtest;
10376     args[1] = Qeq;
10377     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10378   }
10379
10380   staticpro (&Vsjis_coding_system);
10381   Vsjis_coding_system = Qnil;
10382
10383   staticpro (&Vbig5_coding_system);
10384   Vbig5_coding_system = Qnil;
10385
10386   staticpro (&Vcode_conversion_reused_workbuf);
10387   Vcode_conversion_reused_workbuf = Qnil;
10388
10389   staticpro (&Vcode_conversion_workbuf_name);
10390   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
10391
10392   reused_workbuf_in_use = 0;
10393
10394   DEFSYM (Qcharset, "charset");
10395   DEFSYM (Qtarget_idx, "target-idx");
10396   DEFSYM (Qcoding_system_history, "coding-system-history");
10397   Fset (Qcoding_system_history, Qnil);
10398
10399   /* Target FILENAME is the first argument.  */
10400   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10401   /* Target FILENAME is the third argument.  */
10402   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10403
10404   DEFSYM (Qcall_process, "call-process");
10405   /* Target PROGRAM is the first argument.  */
10406   Fput (Qcall_process, Qtarget_idx, make_number (0));
10407
10408   DEFSYM (Qcall_process_region, "call-process-region");
10409   /* Target PROGRAM is the third argument.  */
10410   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10411
10412   DEFSYM (Qstart_process, "start-process");
10413   /* Target PROGRAM is the third argument.  */
10414   Fput (Qstart_process, Qtarget_idx, make_number (2));
10415
10416   DEFSYM (Qopen_network_stream, "open-network-stream");
10417   /* Target SERVICE is the fourth argument.  */
10418   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10419
10420   DEFSYM (Qcoding_system, "coding-system");
10421   DEFSYM (Qcoding_aliases, "coding-aliases");
10422
10423   DEFSYM (Qeol_type, "eol-type");
10424   DEFSYM (Qunix, "unix");
10425   DEFSYM (Qdos, "dos");
10426
10427   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10428   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10429   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10430   DEFSYM (Qdefault_char, "default-char");
10431   DEFSYM (Qundecided, "undecided");
10432   DEFSYM (Qno_conversion, "no-conversion");
10433   DEFSYM (Qraw_text, "raw-text");
10434
10435   DEFSYM (Qiso_2022, "iso-2022");
10436
10437   DEFSYM (Qutf_8, "utf-8");
10438   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10439
10440   DEFSYM (Qutf_16, "utf-16");
10441   DEFSYM (Qbig, "big");
10442   DEFSYM (Qlittle, "little");
10443
10444   DEFSYM (Qshift_jis, "shift-jis");
10445   DEFSYM (Qbig5, "big5");
10446
10447   DEFSYM (Qcoding_system_p, "coding-system-p");
10448
10449   DEFSYM (Qcoding_system_error, "coding-system-error");
10450   Fput (Qcoding_system_error, Qerror_conditions,
10451         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
10452   Fput (Qcoding_system_error, Qerror_message,
10453         build_string ("Invalid coding system"));
10454
10455   /* Intern this now in case it isn't already done.
10456      Setting this variable twice is harmless.
10457      But don't staticpro it here--that is done in alloc.c.  */
10458   Qchar_table_extra_slots = intern ("char-table-extra-slots");
10459
10460   DEFSYM (Qtranslation_table, "translation-table");
10461   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10462   DEFSYM (Qtranslation_table_id, "translation-table-id");
10463   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10464   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10465
10466   DEFSYM (Qvalid_codes, "valid-codes");
10467
10468   DEFSYM (Qemacs_mule, "emacs-mule");
10469
10470   DEFSYM (QCcategory, ":category");
10471   DEFSYM (QCmnemonic, ":mnemonic");
10472   DEFSYM (QCdefault_char, ":default-char");
10473   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10474   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10475   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10476   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10477   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10478
10479   Vcoding_category_table
10480     = Fmake_vector (make_number (coding_category_max), Qnil);
10481   staticpro (&Vcoding_category_table);
10482   /* Followings are target of code detection.  */
10483   ASET (Vcoding_category_table, coding_category_iso_7,
10484         intern ("coding-category-iso-7"));
10485   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10486         intern ("coding-category-iso-7-tight"));
10487   ASET (Vcoding_category_table, coding_category_iso_8_1,
10488         intern ("coding-category-iso-8-1"));
10489   ASET (Vcoding_category_table, coding_category_iso_8_2,
10490         intern ("coding-category-iso-8-2"));
10491   ASET (Vcoding_category_table, coding_category_iso_7_else,
10492         intern ("coding-category-iso-7-else"));
10493   ASET (Vcoding_category_table, coding_category_iso_8_else,
10494         intern ("coding-category-iso-8-else"));
10495   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10496         intern ("coding-category-utf-8-auto"));
10497   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10498         intern ("coding-category-utf-8"));
10499   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10500         intern ("coding-category-utf-8-sig"));
10501   ASET (Vcoding_category_table, coding_category_utf_16_be,
10502         intern ("coding-category-utf-16-be"));
10503   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10504         intern ("coding-category-utf-16-auto"));
10505   ASET (Vcoding_category_table, coding_category_utf_16_le,
10506         intern ("coding-category-utf-16-le"));
10507   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10508         intern ("coding-category-utf-16-be-nosig"));
10509   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10510         intern ("coding-category-utf-16-le-nosig"));
10511   ASET (Vcoding_category_table, coding_category_charset,
10512         intern ("coding-category-charset"));
10513   ASET (Vcoding_category_table, coding_category_sjis,
10514         intern ("coding-category-sjis"));
10515   ASET (Vcoding_category_table, coding_category_big5,
10516         intern ("coding-category-big5"));
10517   ASET (Vcoding_category_table, coding_category_ccl,
10518         intern ("coding-category-ccl"));
10519   ASET (Vcoding_category_table, coding_category_emacs_mule,
10520         intern ("coding-category-emacs-mule"));
10521   /* Followings are NOT target of code detection.  */
10522   ASET (Vcoding_category_table, coding_category_raw_text,
10523         intern ("coding-category-raw-text"));
10524   ASET (Vcoding_category_table, coding_category_undecided,
10525         intern ("coding-category-undecided"));
10526
10527   DEFSYM (Qinsufficient_source, "insufficient-source");
10528   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10529   DEFSYM (Qinvalid_source, "invalid-source");
10530   DEFSYM (Qinterrupted, "interrupted");
10531   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10532   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10533
10534   defsubr (&Scoding_system_p);
10535   defsubr (&Sread_coding_system);
10536   defsubr (&Sread_non_nil_coding_system);
10537   defsubr (&Scheck_coding_system);
10538   defsubr (&Sdetect_coding_region);
10539   defsubr (&Sdetect_coding_string);
10540   defsubr (&Sfind_coding_systems_region_internal);
10541   defsubr (&Sunencodable_char_position);
10542   defsubr (&Scheck_coding_systems_region);
10543   defsubr (&Sdecode_coding_region);
10544   defsubr (&Sencode_coding_region);
10545   defsubr (&Sdecode_coding_string);
10546   defsubr (&Sencode_coding_string);
10547   defsubr (&Sdecode_sjis_char);
10548   defsubr (&Sencode_sjis_char);
10549   defsubr (&Sdecode_big5_char);
10550   defsubr (&Sencode_big5_char);
10551   defsubr (&Sset_terminal_coding_system_internal);
10552   defsubr (&Sset_safe_terminal_coding_system_internal);
10553   defsubr (&Sterminal_coding_system);
10554   defsubr (&Sset_keyboard_coding_system_internal);
10555   defsubr (&Skeyboard_coding_system);
10556   defsubr (&Sfind_operation_coding_system);
10557   defsubr (&Sset_coding_system_priority);
10558   defsubr (&Sdefine_coding_system_internal);
10559   defsubr (&Sdefine_coding_system_alias);
10560   defsubr (&Scoding_system_put);
10561   defsubr (&Scoding_system_base);
10562   defsubr (&Scoding_system_plist);
10563   defsubr (&Scoding_system_aliases);
10564   defsubr (&Scoding_system_eol_type);
10565   defsubr (&Scoding_system_priority_list);
10566
10567   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10568                doc: /* List of coding systems.
10569
10570 Do not alter the value of this variable manually.  This variable should be
10571 updated by the functions `define-coding-system' and
10572 `define-coding-system-alias'.  */);
10573   Vcoding_system_list = Qnil;
10574
10575   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10576                doc: /* Alist of coding system names.
10577 Each element is one element list of coding system name.
10578 This variable is given to `completing-read' as COLLECTION argument.
10579
10580 Do not alter the value of this variable manually.  This variable should be
10581 updated by the functions `make-coding-system' and
10582 `define-coding-system-alias'.  */);
10583   Vcoding_system_alist = Qnil;
10584
10585   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10586                doc: /* List of coding-categories (symbols) ordered by priority.
10587
10588 On detecting a coding system, Emacs tries code detection algorithms
10589 associated with each coding-category one by one in this order.  When
10590 one algorithm agrees with a byte sequence of source text, the coding
10591 system bound to the corresponding coding-category is selected.
10592
10593 Don't modify this variable directly, but use `set-coding-priority'.  */);
10594   {
10595     int i;
10596
10597     Vcoding_category_list = Qnil;
10598     for (i = coding_category_max - 1; i >= 0; i--)
10599       Vcoding_category_list
10600         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10601                  Vcoding_category_list);
10602   }
10603
10604   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10605                doc: /* Specify the coding system for read operations.
10606 It is useful to bind this variable with `let', but do not set it globally.
10607 If the value is a coding system, it is used for decoding on read operation.
10608 If not, an appropriate element is used from one of the coding system alists.
10609 There are three such tables: `file-coding-system-alist',
10610 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10611   Vcoding_system_for_read = Qnil;
10612
10613   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10614                doc: /* Specify the coding system for write operations.
10615 Programs bind this variable with `let', but you should not set it globally.
10616 If the value is a coding system, it is used for encoding of output,
10617 when writing it to a file and when sending it to a file or subprocess.
10618
10619 If this does not specify a coding system, an appropriate element
10620 is used from one of the coding system alists.
10621 There are three such tables: `file-coding-system-alist',
10622 `process-coding-system-alist', and `network-coding-system-alist'.
10623 For output to files, if the above procedure does not specify a coding system,
10624 the value of `buffer-file-coding-system' is used.  */);
10625   Vcoding_system_for_write = Qnil;
10626
10627   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10628                doc: /*
10629 Coding system used in the latest file or process I/O.  */);
10630   Vlast_coding_system_used = Qnil;
10631
10632   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10633                doc: /*
10634 Error status of the last code conversion.
10635
10636 When an error was detected in the last code conversion, this variable
10637 is set to one of the following symbols.
10638   `insufficient-source'
10639   `inconsistent-eol'
10640   `invalid-source'
10641   `interrupted'
10642   `insufficient-memory'
10643 When no error was detected, the value doesn't change.  So, to check
10644 the error status of a code conversion by this variable, you must
10645 explicitly set this variable to nil before performing code
10646 conversion.  */);
10647   Vlast_code_conversion_error = Qnil;
10648
10649   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10650                doc: /*
10651 *Non-nil means always inhibit code conversion of end-of-line format.
10652 See info node `Coding Systems' and info node `Text and Binary' concerning
10653 such conversion.  */);
10654   inhibit_eol_conversion = 0;
10655
10656   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10657                doc: /*
10658 Non-nil means process buffer inherits coding system of process output.
10659 Bind it to t if the process output is to be treated as if it were a file
10660 read from some filesystem.  */);
10661   inherit_process_coding_system = 0;
10662
10663   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10664                doc: /*
10665 Alist to decide a coding system to use for a file I/O operation.
10666 The format is ((PATTERN . VAL) ...),
10667 where PATTERN is a regular expression matching a file name,
10668 VAL is a coding system, a cons of coding systems, or a function symbol.
10669 If VAL is a coding system, it is used for both decoding and encoding
10670 the file contents.
10671 If VAL is a cons of coding systems, the car part is used for decoding,
10672 and the cdr part is used for encoding.
10673 If VAL is a function symbol, the function must return a coding system
10674 or a cons of coding systems which are used as above.  The function is
10675 called with an argument that is a list of the arguments with which
10676 `find-operation-coding-system' was called.  If the function can't decide
10677 a coding system, it can return `undecided' so that the normal
10678 code-detection is performed.
10679
10680 See also the function `find-operation-coding-system'
10681 and the variable `auto-coding-alist'.  */);
10682   Vfile_coding_system_alist = Qnil;
10683
10684   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10685                doc: /*
10686 Alist to decide a coding system to use for a process I/O operation.
10687 The format is ((PATTERN . VAL) ...),
10688 where PATTERN is a regular expression matching a program name,
10689 VAL is a coding system, a cons of coding systems, or a function symbol.
10690 If VAL is a coding system, it is used for both decoding what received
10691 from the program and encoding what sent to the program.
10692 If VAL is a cons of coding systems, the car part is used for decoding,
10693 and the cdr part is used for encoding.
10694 If VAL is a function symbol, the function must return a coding system
10695 or a cons of coding systems which are used as above.
10696
10697 See also the function `find-operation-coding-system'.  */);
10698   Vprocess_coding_system_alist = Qnil;
10699
10700   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10701                doc: /*
10702 Alist to decide a coding system to use for a network I/O operation.
10703 The format is ((PATTERN . VAL) ...),
10704 where PATTERN is a regular expression matching a network service name
10705 or is a port number to connect to,
10706 VAL is a coding system, a cons of coding systems, or a function symbol.
10707 If VAL is a coding system, it is used for both decoding what received
10708 from the network stream and encoding what sent to the network stream.
10709 If VAL is a cons of coding systems, the car part is used for decoding,
10710 and the cdr part is used for encoding.
10711 If VAL is a function symbol, the function must return a coding system
10712 or a cons of coding systems which are used as above.
10713
10714 See also the function `find-operation-coding-system'.  */);
10715   Vnetwork_coding_system_alist = Qnil;
10716
10717   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10718                doc: /* Coding system to use with system messages.
10719 Also used for decoding keyboard input on X Window system.  */);
10720   Vlocale_coding_system = Qnil;
10721
10722   /* The eol mnemonics are reset in startup.el system-dependently.  */
10723   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10724                doc: /*
10725 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10726   eol_mnemonic_unix = build_string (":");
10727
10728   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10729                doc: /*
10730 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10731   eol_mnemonic_dos = build_string ("\\");
10732
10733   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10734                doc: /*
10735 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10736   eol_mnemonic_mac = build_string ("/");
10737
10738   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10739                doc: /*
10740 *String displayed in mode line when end-of-line format is not yet determined.  */);
10741   eol_mnemonic_undecided = build_string (":");
10742
10743   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10744                doc: /*
10745 *Non-nil enables character translation while encoding and decoding.  */);
10746   Venable_character_translation = Qt;
10747
10748   DEFVAR_LISP ("standard-translation-table-for-decode",
10749                &Vstandard_translation_table_for_decode,
10750                doc: /* Table for translating characters while decoding.  */);
10751   Vstandard_translation_table_for_decode = Qnil;
10752
10753   DEFVAR_LISP ("standard-translation-table-for-encode",
10754                &Vstandard_translation_table_for_encode,
10755                doc: /* Table for translating characters while encoding.  */);
10756   Vstandard_translation_table_for_encode = Qnil;
10757
10758   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10759                doc: /* Alist of charsets vs revision numbers.
10760 While encoding, if a charset (car part of an element) is found,
10761 designate it with the escape sequence identifying revision (cdr part
10762 of the element).  */);
10763   Vcharset_revision_table = Qnil;
10764
10765   DEFVAR_LISP ("default-process-coding-system",
10766                &Vdefault_process_coding_system,
10767                doc: /* Cons of coding systems used for process I/O by default.
10768 The car part is used for decoding a process output,
10769 the cdr part is used for encoding a text to be sent to a process.  */);
10770   Vdefault_process_coding_system = Qnil;
10771
10772   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10773                doc: /*
10774 Table of extra Latin codes in the range 128..159 (inclusive).
10775 This is a vector of length 256.
10776 If Nth element is non-nil, the existence of code N in a file
10777 \(or output of subprocess) doesn't prevent it to be detected as
10778 a coding system of ISO 2022 variant which has a flag
10779 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10780 or reading output of a subprocess.
10781 Only 128th through 159th elements have a meaning.  */);
10782   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10783
10784   DEFVAR_LISP ("select-safe-coding-system-function",
10785                &Vselect_safe_coding_system_function,
10786                doc: /*
10787 Function to call to select safe coding system for encoding a text.
10788
10789 If set, this function is called to force a user to select a proper
10790 coding system which can encode the text in the case that a default
10791 coding system used in each operation can't encode the text.  The
10792 function should take care that the buffer is not modified while
10793 the coding system is being selected.
10794
10795 The default value is `select-safe-coding-system' (which see).  */);
10796   Vselect_safe_coding_system_function = Qnil;
10797
10798   DEFVAR_BOOL ("coding-system-require-warning",
10799                &coding_system_require_warning,
10800                doc: /* Internal use only.
10801 If non-nil, on writing a file, `select-safe-coding-system-function' is
10802 called even if `coding-system-for-write' is non-nil.  The command
10803 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10804   coding_system_require_warning = 0;
10805
10806
10807   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10808                &inhibit_iso_escape_detection,
10809                doc: /*
10810 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10811
10812 When Emacs reads text, it tries to detect how the text is encoded.
10813 This code detection is sensitive to escape sequences.  If Emacs sees
10814 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10815 of the ISO2022 encodings, and decodes text by the corresponding coding
10816 system (e.g. `iso-2022-7bit').
10817
10818 However, there may be a case that you want to read escape sequences in
10819 a file as is.  In such a case, you can set this variable to non-nil.
10820 Then the code detection will ignore any escape sequences, and no text is
10821 detected as encoded in some ISO-2022 encoding.  The result is that all
10822 escape sequences become visible in a buffer.
10823
10824 The default value is nil, and it is strongly recommended not to change
10825 it.  That is because many Emacs Lisp source files that contain
10826 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10827 in Emacs's distribution, and they won't be decoded correctly on
10828 reading if you suppress escape sequence detection.
10829
10830 The other way to read escape sequences in a file without decoding is
10831 to explicitly specify some coding system that doesn't use ISO-2022
10832 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10833   inhibit_iso_escape_detection = 0;
10834
10835   DEFVAR_BOOL ("inhibit-null-byte-detection",
10836                &inhibit_null_byte_detection,
10837                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10838 By default, Emacs treats it as binary data, and does not attempt to
10839 decode it.  The effect is as if you specified `no-conversion' for
10840 reading that text.
10841
10842 Set this to non-nil when a regular text happens to include null bytes.
10843 Examples are Index nodes of Info files and null-byte delimited output
10844 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10845 decode text as usual.  */);
10846   inhibit_null_byte_detection = 0;
10847
10848   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10849                doc: /* Char table for translating self-inserting characters.
10850 This is applied to the result of input methods, not their input.
10851 See also `keyboard-translate-table'.
10852
10853 Use of this variable for character code unification was rendered
10854 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10855 internal character representation.  */);
10856     Vtranslation_table_for_input = Qnil;
10857
10858   {
10859     Lisp_Object args[coding_arg_max];
10860     Lisp_Object plist[16];
10861     int i;
10862
10863     for (i = 0; i < coding_arg_max; i++)
10864       args[i] = Qnil;
10865
10866     plist[0] = intern (":name");
10867     plist[1] = args[coding_arg_name] = Qno_conversion;
10868     plist[2] = intern (":mnemonic");
10869     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10870     plist[4] = intern (":coding-type");
10871     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10872     plist[6] = intern (":ascii-compatible-p");
10873     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10874     plist[8] = intern (":default-char");
10875     plist[9] = args[coding_arg_default_char] = make_number (0);
10876     plist[10] = intern (":for-unibyte");
10877     plist[11] = args[coding_arg_for_unibyte] = Qt;
10878     plist[12] = intern (":docstring");
10879     plist[13] = build_string ("Do no conversion.\n\
10880 \n\
10881 When you visit a file with this coding, the file is read into a\n\
10882 unibyte buffer as is, thus each byte of a file is treated as a\n\
10883 character.");
10884     plist[14] = intern (":eol-type");
10885     plist[15] = args[coding_arg_eol_type] = Qunix;
10886     args[coding_arg_plist] = Flist (16, plist);
10887     Fdefine_coding_system_internal (coding_arg_max, args);
10888
10889     plist[1] = args[coding_arg_name] = Qundecided;
10890     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10891     plist[5] = args[coding_arg_coding_type] = Qundecided;
10892     /* This is already set.
10893        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10894     plist[8] = intern (":charset-list");
10895     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10896     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10897     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10898     plist[15] = args[coding_arg_eol_type] = Qnil;
10899     args[coding_arg_plist] = Flist (16, plist);
10900     Fdefine_coding_system_internal (coding_arg_max, args);
10901   }
10902
10903   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10904
10905   {
10906     int i;
10907
10908     for (i = 0; i < coding_category_max; i++)
10909       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10910   }
10911 #if defined (MSDOS) || defined (WINDOWSNT)
10912   system_eol_type = Qdos;
10913 #else
10914   system_eol_type = Qunix;
10915 #endif
10916   staticpro (&system_eol_type);
10917 }
10918
10919 char *
10920 emacs_strerror (error_number)
10921      int error_number;
10922 {
10923   char *str;
10924
10925   synchronize_system_messages_locale ();
10926   str = strerror (error_number);
10927
10928   if (! NILP (Vlocale_coding_system))
10929     {
10930       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10931                                                       Vlocale_coding_system,
10932                                                       0);
10933       str = (char *) SDATA (dec);
10934     }
10935
10936   return str;
10937 }
10938
10939 #endif /* emacs */
10940
10941 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10942    (do not change this comment) */