src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEMS ***
  41
  42   A coding system is an encoding mechanism for one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-mule), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in buffers and strings
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode the character sets ASCII and Big5.  Widely
  70   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for text containing random 8-bit code.  Emacs does
  78   no code conversion on such text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it as CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of text is encoded depends on the operating system.
  97   For instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text character encoding and end-of-line encoding are
 103   independent, any coding system described above can have any
 104   end-of-line format.  So Emacs has information about end-of-line
 105   format in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX are set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 116   of the range 0x80..0x9F are in multibyte form.  */
 117 #if 0
 118 int
 119 detect_coding_emacs_mule (src, src_end, multibytep)
 120      unsigned char *src, *src_end;
 121      int multibytep;
 122 {
 123   ...
 124 }
 125 #endif
 126
 127 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 128
 129   These functions decode SRC_BYTES length of unibyte text at SOURCE
 130   encoded in CODING to Emacs' internal format.  The resulting
 131   multibyte text goes to a place pointed to by DESTINATION, the length
 132   of which should not exceed DST_BYTES.
 133
 134   These functions set the information about original and decoded texts
 135   in the members `produced', `produced_char', `consumed', and
 136   `consumed_char' of the structure *CODING.  They also set the member
 137   `result' to one of CODING_FINISH_XXX indicating how the decoding
 138   finished.
 139
 140   DST_BYTES zero means that the source area and destination area are
 141   overlapped, which means that we can produce a decoded text until it
 142   reaches the head of the not-yet-decoded source text.
 143
 144   Below is a template for these functions.  */
 145 #if 0
 146 static void
 147 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 148      struct coding_system *coding;
 149      unsigned char *source, *destination;
 150      int src_bytes, dst_bytes;
 151 {
 152   ...
 153 }
 154 #endif
 155
 156 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 157
 158   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 159   internal multibyte format to CODING.  The resulting unibyte text
 160   goes to a place pointed to by DESTINATION, the length of which
 161   should not exceed DST_BYTES.
 162
 163   These functions set the information about original and encoded texts
 164   in the members `produced', `produced_char', `consumed', and
 165   `consumed_char' of the structure *CODING.  They also set the member
 166   `result' to one of CODING_FINISH_XXX indicating how the encoding
 167   finished.
 168
 169   DST_BYTES zero means that the source area and destination area are
 170   overlapped, which means that we can produce encoded text until it
 171   reaches at the head of the not-yet-encoded source text.
 172
 173   Below is a template for these functions.  */
 174 #if 0
 175 static void
 176 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 177      struct coding_system *coding;
 178      unsigned char *source, *destination;
 179      int src_bytes, dst_bytes;
 180 {
 181   ...
 182 }
 183 #endif
 184
 185 /*** COMMONLY USED MACROS ***/
 186
 187 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 188    get one, two, and three bytes from the source text respectively.
 189    If there are not enough bytes in the source, they jump to
 190    `label_end_of_loop'.  The caller should set variables `coding',
 191    `src' and `src_end' to appropriate pointer in advance.  These
 192    macros are called from decoding routines `decode_coding_XXX', thus
 193    it is assumed that the source text is unibyte.  */
 194
 195 #define ONE_MORE_BYTE(c1)                                       \
 196   do {                                                          \
 197     if (src >= src_end)                                         \
 198       {                                                         \
 199         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 200         goto label_end_of_loop;                                 \
 201       }                                                         \
 202     c1 = *src++;                                                \
 203   } while (0)
 204
 205 #define TWO_MORE_BYTES(c1, c2)                                  \
 206   do {                                                          \
 207     if (src + 1 >= src_end)                                     \
 208       {                                                         \
 209         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 210         goto label_end_of_loop;                                 \
 211       }                                                         \
 212     c1 = *src++;                                                \
 213     c2 = *src++;                                                \
 214   } while (0)
 215
 216
 217 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 218    form if MULTIBYTEP is nonzero.  */
 219
 220 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 221   do {                                                          \
 222     if (src >= src_end)                                         \
 223       {                                                         \
 224         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 225         goto label_end_of_loop;                                 \
 226       }                                                         \
 227     c1 = *src++;                                                \
 228     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 229       c1 = *src++ - 0x20;                                       \
 230   } while (0)
 231
 232 /* Set C to the next character at the source text pointed by `src'.
 233    If there are not enough characters in the source, jump to
 234    `label_end_of_loop'.  The caller should set variables `coding'
 235    `src', `src_end', and `translation_table' to appropriate pointers
 236    in advance.  This macro is used in encoding routines
 237    `encode_coding_XXX', thus it assumes that the source text is in
 238    multibyte form except for 8-bit characters.  8-bit characters are
 239    in multibyte form if coding->src_multibyte is nonzero, else they
 240    are represented by a single byte.  */
 241
 242 #define ONE_MORE_CHAR(c)                                        \
 243   do {                                                          \
 244     int len = src_end - src;                                    \
 245     int bytes;                                                  \
 246     if (len <= 0)                                               \
 247       {                                                         \
 248         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 249         goto label_end_of_loop;                                 \
 250       }                                                         \
 251     if (coding->src_multibyte                                   \
 252         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 253       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 254     else                                                        \
 255       c = *src, bytes = 1;                                      \
 256     if (!NILP (translation_table))                              \
 257       c = translate_char (translation_table, c, -1, 0, 0);      \
 258     src += bytes;                                               \
 259   } while (0)
 260
 261
 262 /* Produce a multibyte form of character C to `dst'.  Jump to
 263    `label_end_of_loop' if there's not enough space at `dst'.
 264
 265    If we are now in the middle of a composition sequence, the decoded
 266    character may be ALTCHAR (for the current composition).  In that
 267    case, the character goes to coding->cmp_data->data instead of
 268    `dst'.
 269
 270    This macro is used in decoding routines.  */
 271
 272 #define EMIT_CHAR(c)                                                    \
 273   do {                                                                  \
 274     if (! COMPOSING_P (coding)                                          \
 275         || coding->composing == COMPOSITION_RELATIVE                    \
 276         || coding->composing == COMPOSITION_WITH_RULE)                  \
 277       {                                                                 \
 278         int bytes = CHAR_BYTES (c);                                     \
 279         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 280           {                                                             \
 281             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 282             goto label_end_of_loop;                                     \
 283           }                                                             \
 284         dst += CHAR_STRING (c, dst);                                    \
 285         coding->produced_char++;                                        \
 286       }                                                                 \
 287                                                                         \
 288     if (COMPOSING_P (coding)                                            \
 289         && coding->composing != COMPOSITION_RELATIVE)                   \
 290       {                                                                 \
 291         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 292         coding->composition_rule_follows                                \
 293           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 294       }                                                                 \
 295   } while (0)
 296
 297
 298 #define EMIT_ONE_BYTE(c)                                        \
 299   do {                                                          \
 300     if (dst >= (dst_bytes ? dst_end : src))                     \
 301       {                                                         \
 302         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 303         goto label_end_of_loop;                                 \
 304       }                                                         \
 305     *dst++ = c;                                                 \
 306   } while (0)
 307
 308 #define EMIT_TWO_BYTES(c1, c2)                                  \
 309   do {                                                          \
 310     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 311       {                                                         \
 312         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 313         goto label_end_of_loop;                                 \
 314       }                                                         \
 315     *dst++ = c1, *dst++ = c2;                                   \
 316   } while (0)
 317
 318 #define EMIT_BYTES(from, to)                                    \
 319   do {                                                          \
 320     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 321       {                                                         \
 322         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 323         goto label_end_of_loop;                                 \
 324       }                                                         \
 325     while (from < to)                                           \
 326       *dst++ = *from++;                                         \
 327   } while (0)
 328
 329 \f
 330 /*** 1. Preamble ***/
 331
 332 #ifdef emacs
 333 #include <config.h>
 334 #endif
 335
 336 #include <stdio.h>
 337
 338 #ifdef emacs
 339
 340 #include "lisp.h"
 341 #include "buffer.h"
 342 #include "charset.h"
 343 #include "composite.h"
 344 #include "ccl.h"
 345 #include "coding.h"
 346 #include "window.h"
 347
 348 #else  /* not emacs */
 349
 350 #include "mulelib.h"
 351
 352 #endif /* not emacs */
 353
 354 Lisp_Object Qcoding_system, Qeol_type;
 355 Lisp_Object Qbuffer_file_coding_system;
 356 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 357 Lisp_Object Qno_conversion, Qundecided;
 358 Lisp_Object Qcoding_system_history;
 359 Lisp_Object Qsafe_chars;
 360 Lisp_Object Qvalid_codes;
 361
 362 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 363 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 364 Lisp_Object Qstart_process, Qopen_network_stream;
 365 Lisp_Object Qtarget_idx;
 366
 367 Lisp_Object Vselect_safe_coding_system_function;
 368
 369 /* Mnemonic string for each format of end-of-line.  */
 370 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 371 /* Mnemonic string to indicate format of end-of-line is not yet
 372    decided.  */
 373 Lisp_Object eol_mnemonic_undecided;
 374
 375 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 376    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 377 int system_eol_type;
 378
 379 #ifdef emacs
 380
 381 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 382
 383 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 384
 385 /* Coding system emacs-mule and raw-text are for converting only
 386    end-of-line format.  */
 387 Lisp_Object Qemacs_mule, Qraw_text;
 388
 389 /* Coding-systems are handed between Emacs Lisp programs and C internal
 390    routines by the following three variables.  */
 391 /* Coding-system for reading files and receiving data from process.  */
 392 Lisp_Object Vcoding_system_for_read;
 393 /* Coding-system for writing files and sending data to process.  */
 394 Lisp_Object Vcoding_system_for_write;
 395 /* Coding-system actually used in the latest I/O.  */
 396 Lisp_Object Vlast_coding_system_used;
 397
 398 /* A vector of length 256 which contains information about special
 399    Latin codes (especially for dealing with Microsoft codes).  */
 400 Lisp_Object Vlatin_extra_code_table;
 401
 402 /* Flag to inhibit code conversion of end-of-line format.  */
 403 int inhibit_eol_conversion;
 404
 405 /* Flag to inhibit ISO2022 escape sequence detection.  */
 406 int inhibit_iso_escape_detection;
 407
 408 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 409 int inherit_process_coding_system;
 410
 411 /* Coding system to be used to encode text for terminal display.  */
 412 struct coding_system terminal_coding;
 413
 414 /* Coding system to be used to encode text for terminal display when
 415    terminal coding system is nil.  */
 416 struct coding_system safe_terminal_coding;
 417
 418 /* Coding system of what is sent from terminal keyboard.  */
 419 struct coding_system keyboard_coding;
 420
 421 /* Default coding system to be used to write a file.  */
 422 struct coding_system default_buffer_file_coding;
 423
 424 Lisp_Object Vfile_coding_system_alist;
 425 Lisp_Object Vprocess_coding_system_alist;
 426 Lisp_Object Vnetwork_coding_system_alist;
 427
 428 Lisp_Object Vlocale_coding_system;
 429
 430 #endif /* emacs */
 431
 432 Lisp_Object Qcoding_category, Qcoding_category_index;
 433
 434 /* List of symbols `coding-category-xxx' ordered by priority.  */
 435 Lisp_Object Vcoding_category_list;
 436
 437 /* Table of coding categories (Lisp symbols).  */
 438 Lisp_Object Vcoding_category_table;
 439
 440 /* Table of names of symbol for each coding-category.  */
 441 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 442   "coding-category-emacs-mule",
 443   "coding-category-sjis",
 444   "coding-category-iso-7",
 445   "coding-category-iso-7-tight",
 446   "coding-category-iso-8-1",
 447   "coding-category-iso-8-2",
 448   "coding-category-iso-7-else",
 449   "coding-category-iso-8-else",
 450   "coding-category-ccl",
 451   "coding-category-big5",
 452   "coding-category-utf-8",
 453   "coding-category-utf-16-be",
 454   "coding-category-utf-16-le",
 455   "coding-category-raw-text",
 456   "coding-category-binary"
 457 };
 458
 459 /* Table of pointers to coding systems corresponding to each coding
 460    categories.  */
 461 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 462
 463 /* Table of coding category masks.  Nth element is a mask for a coding
 464    category of which priority is Nth.  */
 465 static
 466 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 467
 468 /* Flag to tell if we look up translation table on character code
 469    conversion.  */
 470 Lisp_Object Venable_character_translation;
 471 /* Standard translation table to look up on decoding (reading).  */
 472 Lisp_Object Vstandard_translation_table_for_decode;
 473 /* Standard translation table to look up on encoding (writing).  */
 474 Lisp_Object Vstandard_translation_table_for_encode;
 475
 476 Lisp_Object Qtranslation_table;
 477 Lisp_Object Qtranslation_table_id;
 478 Lisp_Object Qtranslation_table_for_decode;
 479 Lisp_Object Qtranslation_table_for_encode;
 480
 481 /* Alist of charsets vs revision number.  */
 482 Lisp_Object Vcharset_revision_alist;
 483
 484 /* Default coding systems used for process I/O.  */
 485 Lisp_Object Vdefault_process_coding_system;
 486
 487 /* Global flag to tell that we can't call post-read-conversion and
 488    pre-write-conversion functions.  Usually the value is zero, but it
 489    is set to 1 temporarily while such functions are running.  This is
 490    to avoid infinite recursive call.  */
 491 static int inhibit_pre_post_conversion;
 492
 493 /* Char-table containing safe coding systems of each character.  */
 494 Lisp_Object Vchar_coding_system_table;
 495 Lisp_Object Qchar_coding_system;
 496
 497 /* Return `safe-chars' property of coding system CODING.  Don't check
 498    validity of CODING.  */
 499
 500 Lisp_Object
 501 coding_safe_chars (coding)
 502      struct coding_system *coding;
 503 {
 504   Lisp_Object coding_spec, plist, safe_chars;
 505
 506   coding_spec = Fget (coding->symbol, Qcoding_system);
 507   plist = XVECTOR (coding_spec)->contents[3];
 508   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 509   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 510 }
 511
 512 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 513   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 514
 515 \f
 516 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 517
 518 /* Emacs' internal format for representation of multiple character
 519    sets is a kind of multi-byte encoding, i.e. characters are
 520    represented by variable-length sequences of one-byte codes.
 521
 522    ASCII characters and control characters (e.g. `tab', `newline') are
 523    represented by one-byte sequences which are their ASCII codes, in
 524    the range 0x00 through 0x7F.
 525
 526    8-bit characters of the range 0x80..0x9F are represented by
 527    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 528    code + 0x20).
 529
 530    8-bit characters of the range 0xA0..0xFF are represented by
 531    one-byte sequences which are their 8-bit code.
 532
 533    The other characters are represented by a sequence of `base
 534    leading-code', optional `extended leading-code', and one or two
 535    `position-code's.  The length of the sequence is determined by the
 536    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 537    whereas extended leading-code and position-code take the range 0xA0
 538    through 0xFF.  See `charset.h' for more details about leading-code
 539    and position-code.
 540
 541    --- CODE RANGE of Emacs' internal format ---
 542    character set        range
 543    -------------        -----
 544    ascii                0x00..0x7F
 545    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 546    eight-bit-graphic    0xA0..0xBF
 547    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 548    ---------------------------------------------
 549
 550    As this is the internal character representation, the format is
 551    usually not used externally (i.e. in a file or in a data sent to a
 552    process).  But, it is possible to have a text externally in this
 553    format (i.e. by encoding by the coding system `emacs-mule').
 554
 555    In that case, a sequence of one-byte codes has a slightly different
 556    form.
 557
 558    Firstly, all characters in eight-bit-control are represented by
 559    one-byte sequences which are their 8-bit code.
 560
 561    Next, character composition data are represented by the byte
 562    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 563    where,
 564         METHOD is 0xF0 plus one of composition method (enum
 565         composition_method),
 566
 567         BYTES is 0xA0 plus the byte length of these composition data,
 568
 569         CHARS is 0xA0 plus the number of characters composed by these
 570         data,
 571
 572         COMPONENTs are characters of multibyte form or composition
 573         rules encoded by two-byte of ASCII codes.
 574
 575    In addition, for backward compatibility, the following formats are
 576    also recognized as composition data on decoding.
 577
 578    0x80 MSEQ ...
 579    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 580
 581    Here,
 582         MSEQ is a multibyte form but in these special format:
 583           ASCII: 0xA0 ASCII_CODE+0x80,
 584           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 585         RULE is a one byte code of the range 0xA0..0xF0 that
 586         represents a composition rule.
 587   */
 588
 589 enum emacs_code_class_type emacs_code_class[256];
 590
 591 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 592    Check if a text is encoded in Emacs' internal format.  If it is,
 593    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 594
 595 static int
 596 detect_coding_emacs_mule (src, src_end, multibytep)
 597       unsigned char *src, *src_end;
 598       int multibytep;
 599 {
 600   unsigned char c;
 601   int composing = 0;
 602   /* Dummy for ONE_MORE_BYTE.  */
 603   struct coding_system dummy_coding;
 604   struct coding_system *coding = &dummy_coding;
 605
 606   while (1)
 607     {
 608       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 609
 610       if (composing)
 611         {
 612           if (c < 0xA0)
 613             composing = 0;
 614           else if (c == 0xA0)
 615             {
 616               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 617               c &= 0x7F;
 618             }
 619           else
 620             c -= 0x20;
 621         }
 622
 623       if (c < 0x20)
 624         {
 625           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 626             return 0;
 627         }
 628       else if (c >= 0x80 && c < 0xA0)
 629         {
 630           if (c == 0x80)
 631             /* Old leading code for a composite character.  */
 632             composing = 1;
 633           else
 634             {
 635               unsigned char *src_base = src - 1;
 636               int bytes;
 637
 638               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 639                                                bytes))
 640                 return 0;
 641               src = src_base + bytes;
 642             }
 643         }
 644     }
 645  label_end_of_loop:
 646   return CODING_CATEGORY_MASK_EMACS_MULE;
 647 }
 648
 649
 650 /* Record the starting position START and METHOD of one composition.  */
 651
 652 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 653   do {                                                          \
 654     struct composition_data *cmp_data = coding->cmp_data;       \
 655     int *data = cmp_data->data + cmp_data->used;                \
 656     coding->cmp_data_start = cmp_data->used;                    \
 657     data[0] = -1;                                               \
 658     data[1] = cmp_data->char_offset + start;                    \
 659     data[3] = (int) method;                                     \
 660     cmp_data->used += 4;                                        \
 661   } while (0)
 662
 663 /* Record the ending position END of the current composition.  */
 664
 665 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 666   do {                                                          \
 667     struct composition_data *cmp_data = coding->cmp_data;       \
 668     int *data = cmp_data->data + coding->cmp_data_start;        \
 669     data[0] = cmp_data->used - coding->cmp_data_start;          \
 670     data[2] = cmp_data->char_offset + end;                      \
 671   } while (0)
 672
 673 /* Record one COMPONENT (alternate character or composition rule).  */
 674
 675 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
 676   (coding->cmp_data->data[coding->cmp_data->used++] = component)
 677
 678
 679 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 680    is not less than SRC_END, return -1 without incrementing Src.  */
 681
 682 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 683
 684
 685 /* Decode a character represented as a component of composition
 686    sequence of Emacs 20 style at SRC.  Set C to that character, store
 687    its multibyte form sequence at P, and set P to the end of that
 688    sequence.  If no valid character is found, set C to -1.  */
 689
 690 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 691   do {                                                          \
 692     int bytes;                                                  \
 693                                                                 \
 694     c = SAFE_ONE_MORE_BYTE ();                                  \
 695     if (c < 0)                                                  \
 696       break;                                                    \
 697     if (CHAR_HEAD_P (c))                                        \
 698       c = -1;                                                   \
 699     else if (c == 0xA0)                                         \
 700       {                                                         \
 701         c = SAFE_ONE_MORE_BYTE ();                              \
 702         if (c < 0xA0)                                           \
 703           c = -1;                                               \
 704         else                                                    \
 705           {                                                     \
 706             c -= 0xA0;                                          \
 707             *p++ = c;                                           \
 708           }                                                     \
 709       }                                                         \
 710     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 711       {                                                         \
 712         unsigned char *p0 = p;                                  \
 713                                                                 \
 714         c -= 0x20;                                              \
 715         *p++ = c;                                               \
 716         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 717         while (--bytes)                                         \
 718           {                                                     \
 719             c = SAFE_ONE_MORE_BYTE ();                          \
 720             if (c < 0)                                          \
 721               break;                                            \
 722             *p++ = c;                                           \
 723           }                                                     \
 724         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 725           c = STRING_CHAR (p0, bytes);                          \
 726         else                                                    \
 727           c = -1;                                               \
 728       }                                                         \
 729     else                                                        \
 730       c = -1;                                                   \
 731   } while (0)
 732
 733
 734 /* Decode a composition rule represented as a component of composition
 735    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 736    valid rule is found, set C to -1.  */
 737
 738 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 739   do {                                                  \
 740     c = SAFE_ONE_MORE_BYTE ();                          \
 741     c -= 0xA0;                                          \
 742     if (c < 0 || c >= 81)                               \
 743       c = -1;                                           \
 744     else                                                \
 745       {                                                 \
 746         gref = c / 9, nref = c % 9;                     \
 747         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 748       }                                                 \
 749   } while (0)
 750
 751
 752 /* Decode composition sequence encoded by `emacs-mule' at the source
 753    pointed by SRC.  SRC_END is the end of source.  Store information
 754    of the composition in CODING->cmp_data.
 755
 756    For backward compatibility, decode also a composition sequence of
 757    Emacs 20 style.  In that case, the composition sequence contains
 758    characters that should be extracted into a buffer or string.  Store
 759    those characters at *DESTINATION in multibyte form.
 760
 761    If we encounter an invalid byte sequence, return 0.
 762    If we encounter an insufficient source or destination, or
 763    insufficient space in CODING->cmp_data, return 1.
 764    Otherwise, return consumed bytes in the source.
 765
 766 */
 767 static INLINE int
 768 decode_composition_emacs_mule (coding, src, src_end,
 769                                destination, dst_end, dst_bytes)
 770      struct coding_system *coding;
 771      unsigned char *src, *src_end, **destination, *dst_end;
 772      int dst_bytes;
 773 {
 774   unsigned char *dst = *destination;
 775   int method, data_len, nchars;
 776   unsigned char *src_base = src++;
 777   /* Store components of composition.  */
 778   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 779   int ncomponent;
 780   /* Store multibyte form of characters to be composed.  This is for
 781      Emacs 20 style composition sequence.  */
 782   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 783   unsigned char *bufp = buf;
 784   int c, i, gref, nref;
 785
 786   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 787       >= COMPOSITION_DATA_SIZE)
 788     {
 789       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 790       return -1;
 791     }
 792
 793   ONE_MORE_BYTE (c);
 794   if (c - 0xF0 >= COMPOSITION_RELATIVE
 795            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 796     {
 797       int with_rule;
 798
 799       method = c - 0xF0;
 800       with_rule = (method == COMPOSITION_WITH_RULE
 801                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 802       ONE_MORE_BYTE (c);
 803       data_len = c - 0xA0;
 804       if (data_len < 4
 805           || src_base + data_len > src_end)
 806         return 0;
 807       ONE_MORE_BYTE (c);
 808       nchars = c - 0xA0;
 809       if (c < 1)
 810         return 0;
 811       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 812         {
 813           if (ncomponent % 2 && with_rule)
 814             {
 815               ONE_MORE_BYTE (gref);
 816               gref -= 32;
 817               ONE_MORE_BYTE (nref);
 818               nref -= 32;
 819               c = COMPOSITION_ENCODE_RULE (gref, nref);
 820             }
 821           else
 822             {
 823               int bytes;
 824               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 825                 c = STRING_CHAR (src, bytes);
 826               else
 827                 c = *src, bytes = 1;
 828               src += bytes;
 829             }
 830           component[ncomponent] = c;
 831         }
 832     }
 833   else
 834     {
 835       /* This may be an old Emacs 20 style format.  See the comment at
 836          the section 2 of this file.  */
 837       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 838       if (src == src_end
 839           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 840         goto label_end_of_loop;
 841
 842       src_end = src;
 843       src = src_base + 1;
 844       if (c < 0xC0)
 845         {
 846           method = COMPOSITION_RELATIVE;
 847           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 848             {
 849               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 850               if (c < 0)
 851                 break;
 852               component[ncomponent++] = c;
 853             }
 854           if (ncomponent < 2)
 855             return 0;
 856           nchars = ncomponent;
 857         }
 858       else if (c == 0xFF)
 859         {
 860           method = COMPOSITION_WITH_RULE;
 861           src++;
 862           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 863           if (c < 0)
 864             return 0;
 865           component[0] = c;
 866           for (ncomponent = 1;
 867                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 868             {
 869               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 870               if (c < 0)
 871                 break;
 872               component[ncomponent++] = c;
 873               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 874               if (c < 0)
 875                 break;
 876               component[ncomponent++] = c;
 877             }
 878           if (ncomponent < 3)
 879             return 0;
 880           nchars = (ncomponent + 1) / 2;
 881         }
 882       else
 883         return 0;
 884     }
 885
 886   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 887     {
 888       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 889       for (i = 0; i < ncomponent; i++)
 890         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 891       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 892       if (buf < bufp)
 893         {
 894           unsigned char *p = buf;
 895           EMIT_BYTES (p, bufp);
 896           *destination += bufp - buf;
 897           coding->produced_char += nchars;
 898         }
 899       return (src - src_base);
 900     }
 901  label_end_of_loop:
 902   return -1;
 903 }
 904
 905 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 906
 907 static void
 908 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 909      struct coding_system *coding;
 910      unsigned char *source, *destination;
 911      int src_bytes, dst_bytes;
 912 {
 913   unsigned char *src = source;
 914   unsigned char *src_end = source + src_bytes;
 915   unsigned char *dst = destination;
 916   unsigned char *dst_end = destination + dst_bytes;
 917   /* SRC_BASE remembers the start position in source in each loop.
 918      The loop will be exited when there's not enough source code, or
 919      when there's not enough destination area to produce a
 920      character.  */
 921   unsigned char *src_base;
 922
 923   coding->produced_char = 0;
 924   while ((src_base = src) < src_end)
 925     {
 926       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 927       int bytes;
 928
 929       if (*src == '\r')
 930         {
 931           int c = *src++;
 932
 933           if (coding->eol_type == CODING_EOL_CR)
 934             c = '\n';
 935           else if (coding->eol_type == CODING_EOL_CRLF)
 936             {
 937               ONE_MORE_BYTE (c);
 938               if (c != '\n')
 939                 {
 940                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 941                     {
 942                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 943                       goto label_end_of_loop;
 944                     }
 945                   src--;
 946                   c = '\r';
 947                 }
 948             }
 949           *dst++ = c;
 950           coding->produced_char++;
 951           continue;
 952         }
 953       else if (*src == '\n')
 954         {
 955           if ((coding->eol_type == CODING_EOL_CR
 956                || coding->eol_type == CODING_EOL_CRLF)
 957               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 958             {
 959               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 960               goto label_end_of_loop;
 961             }
 962           *dst++ = *src++;
 963           coding->produced_char++;
 964           continue;
 965         }
 966       else if (*src == 0x80)
 967         {
 968           /* Start of composition data.  */
 969           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 970                                                          &dst, dst_end,
 971                                                          dst_bytes);
 972           if (consumed < 0)
 973             goto label_end_of_loop;
 974           else if (consumed > 0)
 975             {
 976               src += consumed;
 977               continue;
 978             }
 979           bytes = CHAR_STRING (*src, tmp);
 980           p = tmp;
 981           src++;
 982         }
 983       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 984         {
 985           p = src;
 986           src += bytes;
 987         }
 988       else
 989         {
 990           bytes = CHAR_STRING (*src, tmp);
 991           p = tmp;
 992           src++;
 993         }
 994       if (dst + bytes >= (dst_bytes ? dst_end : src))
 995         {
 996           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 997           break;
 998         }
 999       while (bytes--) *dst++ = *p++;
1000       coding->produced_char++;
1001     }
1002  label_end_of_loop:
1003   coding->consumed = coding->consumed_char = src_base - source;
1004   coding->produced = dst - destination;
1005 }
1006
1007
1008 /* Encode composition data stored at DATA into a special byte sequence
1009    starting by 0x80.  Update CODING->cmp_data_start and maybe
1010    CODING->cmp_data for the next call.  */
1011
1012 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1013   do {                                                                  \
1014     unsigned char buf[1024], *p0 = buf, *p;                             \
1015     int len = data[0];                                                  \
1016     int i;                                                              \
1017                                                                         \
1018     buf[0] = 0x80;                                                      \
1019     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1020     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1021     p = buf + 4;                                                        \
1022     if (data[3] == COMPOSITION_WITH_RULE                                \
1023         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1024       {                                                                 \
1025         p += CHAR_STRING (data[4], p);                                  \
1026         for (i = 5; i < len; i += 2)                                    \
1027           {                                                             \
1028             int gref, nref;                                             \
1029              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1030             *p++ = 0x20 + gref;                                         \
1031             *p++ = 0x20 + nref;                                         \
1032             p += CHAR_STRING (data[i + 1], p);                          \
1033           }                                                             \
1034       }                                                                 \
1035     else                                                                \
1036       {                                                                 \
1037         for (i = 4; i < len; i++)                                       \
1038           p += CHAR_STRING (data[i], p);                                \
1039       }                                                                 \
1040     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1041                                                                         \
1042     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1043       {                                                                 \
1044         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1045         goto label_end_of_loop;                                         \
1046       }                                                                 \
1047     while (p0 < p)                                                      \
1048       *dst++ = *p0++;                                                   \
1049     coding->cmp_data_start += data[0];                                  \
1050     if (coding->cmp_data_start == coding->cmp_data->used                \
1051         && coding->cmp_data->next)                                      \
1052       {                                                                 \
1053         coding->cmp_data = coding->cmp_data->next;                      \
1054         coding->cmp_data_start = 0;                                     \
1055       }                                                                 \
1056   } while (0)
1057
1058
1059 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1060                             unsigned char *, int, int));
1061
1062 static void
1063 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1064      struct coding_system *coding;
1065      unsigned char *source, *destination;
1066      int src_bytes, dst_bytes;
1067 {
1068   unsigned char *src = source;
1069   unsigned char *src_end = source + src_bytes;
1070   unsigned char *dst = destination;
1071   unsigned char *dst_end = destination + dst_bytes;
1072   unsigned char *src_base;
1073   int c;
1074   int char_offset;
1075   int *data;
1076
1077   Lisp_Object translation_table;
1078
1079   translation_table = Qnil;
1080
1081   /* Optimization for the case that there's no composition.  */
1082   if (!coding->cmp_data || coding->cmp_data->used == 0)
1083     {
1084       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1085       return;
1086     }
1087
1088   char_offset = coding->cmp_data->char_offset;
1089   data = coding->cmp_data->data + coding->cmp_data_start;
1090   while (1)
1091     {
1092       src_base = src;
1093
1094       /* If SRC starts a composition, encode the information about the
1095          composition in advance.  */
1096       if (coding->cmp_data_start < coding->cmp_data->used
1097           && char_offset + coding->consumed_char == data[1])
1098         {
1099           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1100           char_offset = coding->cmp_data->char_offset;
1101           data = coding->cmp_data->data + coding->cmp_data_start;
1102         }
1103
1104       ONE_MORE_CHAR (c);
1105       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1106                         || coding->eol_type == CODING_EOL_CR))
1107         {
1108           if (coding->eol_type == CODING_EOL_CRLF)
1109             EMIT_TWO_BYTES ('\r', c);
1110           else
1111             EMIT_ONE_BYTE ('\r');
1112         }
1113       else if (SINGLE_BYTE_CHAR_P (c))
1114         EMIT_ONE_BYTE (c);
1115       else
1116         EMIT_BYTES (src_base, src);
1117       coding->consumed_char++;
1118     }
1119  label_end_of_loop:
1120   coding->consumed = src_base - source;
1121   coding->produced = coding->produced_char = dst - destination;
1122   return;
1123 }
1124
1125 \f
1126 /*** 3. ISO2022 handlers ***/
1127
1128 /* The following note describes the coding system ISO2022 briefly.
1129    Since the intention of this note is to help understand the
1130    functions in this file, some parts are NOT ACCURATE or are OVERLY
1131    SIMPLIFIED.  For thorough understanding, please refer to the
1132    original document of ISO2022.  This is equivalent to the standard
1133    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1134
1135    ISO2022 provides many mechanisms to encode several character sets
1136    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1137    is encoded using bytes less than 128.  This may make the encoded
1138    text a little bit longer, but the text passes more easily through
1139    several types of gateway, some of which strip off the MSB (Most
1140    Significant Bit).
1141
1142    There are two kinds of character sets: control character sets and
1143    graphic character sets.  The former contain control characters such
1144    as `newline' and `escape' to provide control functions (control
1145    functions are also provided by escape sequences).  The latter
1146    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1147    two control character sets and many graphic character sets.
1148
1149    Graphic character sets are classified into one of the following
1150    four classes, according to the number of bytes (DIMENSION) and
1151    number of characters in one dimension (CHARS) of the set:
1152    - DIMENSION1_CHARS94
1153    - DIMENSION1_CHARS96
1154    - DIMENSION2_CHARS94
1155    - DIMENSION2_CHARS96
1156
1157    In addition, each character set is assigned an identification tag,
1158    unique for each set, called the "final character" (denoted as <F>
1159    hereafter).  The <F> of each character set is decided by ECMA(*)
1160    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1161    (0x30..0x3F are for private use only).
1162
1163    Note (*): ECMA = European Computer Manufacturers Association
1164
1165    Here are examples of graphic character sets [NAME(<F>)]:
1166         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1167         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1168         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1169         o DIMENSION2_CHARS96 -- none for the moment
1170
1171    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1172         C0 [0x00..0x1F] -- control character plane 0
1173         GL [0x20..0x7F] -- graphic character plane 0
1174         C1 [0x80..0x9F] -- control character plane 1
1175         GR [0xA0..0xFF] -- graphic character plane 1
1176
1177    A control character set is directly designated and invoked to C0 or
1178    C1 by an escape sequence.  The most common case is that:
1179    - ISO646's  control character set is designated/invoked to C0, and
1180    - ISO6429's control character set is designated/invoked to C1,
1181    and usually these designations/invocations are omitted in encoded
1182    text.  In a 7-bit environment, only C0 can be used, and a control
1183    character for C1 is encoded by an appropriate escape sequence to
1184    fit into the environment.  All control characters for C1 are
1185    defined to have corresponding escape sequences.
1186
1187    A graphic character set is at first designated to one of four
1188    graphic registers (G0 through G3), then these graphic registers are
1189    invoked to GL or GR.  These designations and invocations can be
1190    done independently.  The most common case is that G0 is invoked to
1191    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1192    these invocations and designations are omitted in encoded text.
1193    In a 7-bit environment, only GL can be used.
1194
1195    When a graphic character set of CHARS94 is invoked to GL, codes
1196    0x20 and 0x7F of the GL area work as control characters SPACE and
1197    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1198    be used.
1199
1200    There are two ways of invocation: locking-shift and single-shift.
1201    With locking-shift, the invocation lasts until the next different
1202    invocation, whereas with single-shift, the invocation affects the
1203    following character only and doesn't affect the locking-shift
1204    state.  Invocations are done by the following control characters or
1205    escape sequences:
1206
1207    ----------------------------------------------------------------------
1208    abbrev  function                  cntrl escape seq   description
1209    ----------------------------------------------------------------------
1210    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1211    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1212    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1213    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1214    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1215    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1216    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1217    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1218    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1219    ----------------------------------------------------------------------
1220    (*) These are not used by any known coding system.
1221
1222    Control characters for these functions are defined by macros
1223    ISO_CODE_XXX in `coding.h'.
1224
1225    Designations are done by the following escape sequences:
1226    ----------------------------------------------------------------------
1227    escape sequence      description
1228    ----------------------------------------------------------------------
1229    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1230    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1231    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1232    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1233    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1234    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1235    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1236    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1237    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1238    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1239    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1240    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1241    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1242    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1243    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1244    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1245    ----------------------------------------------------------------------
1246
1247    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1248    of dimension 1, chars 94, and final character <F>, etc...
1249
1250    Note (*): Although these designations are not allowed in ISO2022,
1251    Emacs accepts them on decoding, and produces them on encoding
1252    CHARS96 character sets in a coding system which is characterized as
1253    7-bit environment, non-locking-shift, and non-single-shift.
1254
1255    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1256    '(' can be omitted.  We refer to this as "short-form" hereafter.
1257
1258    Now you may notice that there are a lot of ways of encoding the
1259    same multilingual text in ISO2022.  Actually, there exist many
1260    coding systems such as Compound Text (used in X11's inter client
1261    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1262    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1263    localized platforms), and all of these are variants of ISO2022.
1264
1265    In addition to the above, Emacs handles two more kinds of escape
1266    sequences: ISO6429's direction specification and Emacs' private
1267    sequence for specifying character composition.
1268
1269    ISO6429's direction specification takes the following form:
1270         o CSI ']'      -- end of the current direction
1271         o CSI '0' ']'  -- end of the current direction
1272         o CSI '1' ']'  -- start of left-to-right text
1273         o CSI '2' ']'  -- start of right-to-left text
1274    The control character CSI (0x9B: control sequence introducer) is
1275    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1276
1277    Character composition specification takes the following form:
1278         o ESC '0' -- start relative composition
1279         o ESC '1' -- end composition
1280         o ESC '2' -- start rule-base composition (*)
1281         o ESC '3' -- start relative composition with alternate chars  (**)
1282         o ESC '4' -- start rule-base composition with alternate chars  (**)
1283   Since these are not standard escape sequences of any ISO standard,
1284   the use of them with these meanings is restricted to Emacs only.
1285
1286   (*) This form is used only in Emacs 20.5 and older versions,
1287   but the newer versions can safely decode it.
1288   (**) This form is used only in Emacs 21.1 and newer versions,
1289   and the older versions can't decode it.
1290
1291   Here's a list of example usages of these composition escape
1292   sequences (categorized by `enum composition_method').
1293
1294   COMPOSITION_RELATIVE:
1295         ESC 0 CHAR [ CHAR ] ESC 1
1296   COMPOSITION_WITH_RULE:
1297         ESC 2 CHAR [ RULE CHAR ] ESC 1
1298   COMPOSITION_WITH_ALTCHARS:
1299         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1300   COMPOSITION_WITH_RULE_ALTCHARS:
1301         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1302
1303 enum iso_code_class_type iso_code_class[256];
1304
1305 #define CHARSET_OK(idx, charset, c)                                     \
1306   (coding_system_table[idx]                                             \
1307    && (charset == CHARSET_ASCII                                         \
1308        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
1309            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1310    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1311                                               charset)                  \
1312        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1313
1314 #define SHIFT_OUT_OK(idx) \
1315   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1316
1317 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1318    Check if a text is encoded in ISO2022.  If it is, return an
1319    integer in which appropriate flag bits any of:
1320         CODING_CATEGORY_MASK_ISO_7
1321         CODING_CATEGORY_MASK_ISO_7_TIGHT
1322         CODING_CATEGORY_MASK_ISO_8_1
1323         CODING_CATEGORY_MASK_ISO_8_2
1324         CODING_CATEGORY_MASK_ISO_7_ELSE
1325         CODING_CATEGORY_MASK_ISO_8_ELSE
1326    are set.  If a code which should never appear in ISO2022 is found,
1327    returns 0.  */
1328
1329 static int
1330 detect_coding_iso2022 (src, src_end, multibytep)
1331      unsigned char *src, *src_end;
1332      int multibytep;
1333 {
1334   int mask = CODING_CATEGORY_MASK_ISO;
1335   int mask_found = 0;
1336   int reg[4], shift_out = 0, single_shifting = 0;
1337   int c, c1, charset;
1338   /* Dummy for ONE_MORE_BYTE.  */
1339   struct coding_system dummy_coding;
1340   struct coding_system *coding = &dummy_coding;
1341   Lisp_Object safe_chars;
1342
1343   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1344   while (mask && src < src_end)
1345     {
1346       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1347       switch (c)
1348         {
1349         case ISO_CODE_ESC:
1350           if (inhibit_iso_escape_detection)
1351             break;
1352           single_shifting = 0;
1353           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1354           if (c >= '(' && c <= '/')
1355             {
1356               /* Designation sequence for a charset of dimension 1.  */
1357               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1358               if (c1 < ' ' || c1 >= 0x80
1359                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1360                 /* Invalid designation sequence.  Just ignore.  */
1361                 break;
1362               reg[(c - '(') % 4] = charset;
1363             }
1364           else if (c == '$')
1365             {
1366               /* Designation sequence for a charset of dimension 2.  */
1367               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1368               if (c >= '@' && c <= 'B')
1369                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1370                 reg[0] = charset = iso_charset_table[1][0][c];
1371               else if (c >= '(' && c <= '/')
1372                 {
1373                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1374                   if (c1 < ' ' || c1 >= 0x80
1375                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1376                     /* Invalid designation sequence.  Just ignore.  */
1377                     break;
1378                   reg[(c - '(') % 4] = charset;
1379                 }
1380               else
1381                 /* Invalid designation sequence.  Just ignore.  */
1382                 break;
1383             }
1384           else if (c == 'N' || c == 'O')
1385             {
1386               /* ESC <Fe> for SS2 or SS3.  */
1387               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1388               break;
1389             }
1390           else if (c >= '0' && c <= '4')
1391             {
1392               /* ESC <Fp> for start/end composition.  */
1393               mask_found |= CODING_CATEGORY_MASK_ISO;
1394               break;
1395             }
1396           else
1397             /* Invalid escape sequence.  Just ignore.  */
1398             break;
1399
1400           /* We found a valid designation sequence for CHARSET.  */
1401           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1402           c = MAKE_CHAR (charset, 0, 0);
1403           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1404             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1405           else
1406             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1407           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1408             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1409           else
1410             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1411           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1412             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1413           else
1414             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1415           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1416             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1417           else
1418             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1419           break;
1420
1421         case ISO_CODE_SO:
1422           if (inhibit_iso_escape_detection)
1423             break;
1424           single_shifting = 0;
1425           if (shift_out == 0
1426               && (reg[1] >= 0
1427                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1428                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1429             {
1430               /* Locking shift out.  */
1431               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1432               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1433             }
1434           break;
1435
1436         case ISO_CODE_SI:
1437           if (inhibit_iso_escape_detection)
1438             break;
1439           single_shifting = 0;
1440           if (shift_out == 1)
1441             {
1442               /* Locking shift in.  */
1443               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1444               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1445             }
1446           break;
1447
1448         case ISO_CODE_CSI:
1449           single_shifting = 0;
1450         case ISO_CODE_SS2:
1451         case ISO_CODE_SS3:
1452           {
1453             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1454
1455             if (inhibit_iso_escape_detection)
1456               break;
1457             if (c != ISO_CODE_CSI)
1458               {
1459                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1460                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1461                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1462                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1463                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1464                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1465                 single_shifting = 1;
1466               }
1467             if (VECTORP (Vlatin_extra_code_table)
1468                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1469               {
1470                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1471                     & CODING_FLAG_ISO_LATIN_EXTRA)
1472                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1473                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1474                     & CODING_FLAG_ISO_LATIN_EXTRA)
1475                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1476               }
1477             mask &= newmask;
1478             mask_found |= newmask;
1479           }
1480           break;
1481
1482         default:
1483           if (c < 0x80)
1484             {
1485               single_shifting = 0;
1486               break;
1487             }
1488           else if (c < 0xA0)
1489             {
1490               single_shifting = 0;
1491               if (VECTORP (Vlatin_extra_code_table)
1492                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1493                 {
1494                   int newmask = 0;
1495
1496                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1497                       & CODING_FLAG_ISO_LATIN_EXTRA)
1498                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1499                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1500                       & CODING_FLAG_ISO_LATIN_EXTRA)
1501                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1502                   mask &= newmask;
1503                   mask_found |= newmask;
1504                 }
1505               else
1506                 return 0;
1507             }
1508           else
1509             {
1510               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1511                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1512               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1513               /* Check the length of succeeding codes of the range
1514                  0xA0..0FF.  If the byte length is odd, we exclude
1515                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1516                  when we are not single shifting.  */
1517               if (!single_shifting
1518                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1519                 {
1520                   int i = 1;
1521                   while (src < src_end)
1522                     {
1523                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1524                       if (c < 0xA0)
1525                         break;
1526                       i++;
1527                     }
1528
1529                   if (i & 1 && src < src_end)
1530                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1531                   else
1532                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1533                 }
1534             }
1535           break;
1536         }
1537     }
1538  label_end_of_loop:
1539   return (mask & mask_found);
1540 }
1541
1542 /* Decode a character of which charset is CHARSET, the 1st position
1543    code is C1, the 2nd position code is C2, and return the decoded
1544    character code.  If the variable `translation_table' is non-nil,
1545    returned the translated code.  */
1546
1547 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1548   (NILP (translation_table)                     \
1549    ? MAKE_CHAR (charset, c1, c2)                \
1550    : translate_char (translation_table, -1, charset, c1, c2))
1551
1552 /* Set designation state into CODING.  */
1553 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1554   do {                                                                     \
1555     int charset, c;                                                        \
1556                                                                            \
1557     if (final_char < '0' || final_char >= 128)                             \
1558       goto label_invalid_code;                                             \
1559     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1560                                  make_number (chars),                      \
1561                                  make_number (final_char));                \
1562     c = MAKE_CHAR (charset, 0, 0);                                         \
1563     if (charset >= 0                                                       \
1564         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1565             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1566       {                                                                    \
1567         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1568             && reg == 0                                                    \
1569             && charset == CHARSET_ASCII)                                   \
1570           {                                                                \
1571             /* We should insert this designation sequence as is so         \
1572                that it is surely written back to a file.  */               \
1573             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1574             goto label_invalid_code;                                       \
1575           }                                                                \
1576         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1577         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1578             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1579           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1580         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1581       }                                                                    \
1582     else                                                                   \
1583       {                                                                    \
1584         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1585         goto label_invalid_code;                                           \
1586       }                                                                    \
1587   } while (0)
1588
1589 /* Allocate a memory block for storing information about compositions.
1590    The block is chained to the already allocated blocks.  */
1591
1592 void
1593 coding_allocate_composition_data (coding, char_offset)
1594      struct coding_system *coding;
1595      int char_offset;
1596 {
1597   struct composition_data *cmp_data
1598     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1599
1600   cmp_data->char_offset = char_offset;
1601   cmp_data->used = 0;
1602   cmp_data->prev = coding->cmp_data;
1603   cmp_data->next = NULL;
1604   if (coding->cmp_data)
1605     coding->cmp_data->next = cmp_data;
1606   coding->cmp_data = cmp_data;
1607   coding->cmp_data_start = 0;
1608 }
1609
1610 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1611    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1612    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1613    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1614    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1615   */
1616
1617 #define DECODE_COMPOSITION_START(c1)                                       \
1618   do {                                                                     \
1619     if (coding->composing == COMPOSITION_DISABLED)                         \
1620       {                                                                    \
1621         *dst++ = ISO_CODE_ESC;                                             \
1622         *dst++ = c1 & 0x7f;                                                \
1623         coding->produced_char += 2;                                        \
1624       }                                                                    \
1625     else if (!COMPOSING_P (coding))                                        \
1626       {                                                                    \
1627         /* This is surely the start of a composition.  We must be sure     \
1628            that coding->cmp_data has enough space to store the             \
1629            information about the composition.  If not, terminate the       \
1630            current decoding loop, allocate one more memory block for       \
1631            coding->cmp_data in the caller, then start the decoding         \
1632            loop again.  We can't allocate memory here directly because     \
1633            it may cause buffer/string relocation.  */                      \
1634         if (!coding->cmp_data                                              \
1635             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1636                 >= COMPOSITION_DATA_SIZE))                                 \
1637           {                                                                \
1638             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1639             goto label_end_of_loop;                                        \
1640           }                                                                \
1641         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1642                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1643                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1644                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1645         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1646                                       coding->composing);                  \
1647         coding->composition_rule_follows = 0;                              \
1648       }                                                                    \
1649     else                                                                   \
1650       {                                                                    \
1651         /* We are already handling a composition.  If the method is        \
1652            the following two, the codes following the current escape       \
1653            sequence are actual characters stored in a buffer.  */          \
1654         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1655             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1656           {                                                                \
1657             coding->composing = COMPOSITION_RELATIVE;                      \
1658             coding->composition_rule_follows = 0;                          \
1659           }                                                                \
1660       }                                                                    \
1661   } while (0)
1662
1663 /* Handle composition end sequence ESC 1.  */
1664
1665 #define DECODE_COMPOSITION_END(c1)                                      \
1666   do {                                                                  \
1667     if (coding->composing == COMPOSITION_DISABLED)                      \
1668       {                                                                 \
1669         *dst++ = ISO_CODE_ESC;                                          \
1670         *dst++ = c1;                                                    \
1671         coding->produced_char += 2;                                     \
1672       }                                                                 \
1673     else                                                                \
1674       {                                                                 \
1675         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1676         coding->composing = COMPOSITION_NO;                             \
1677       }                                                                 \
1678   } while (0)
1679
1680 /* Decode a composition rule from the byte C1 (and maybe one more byte
1681    from SRC) and store one encoded composition rule in
1682    coding->cmp_data.  */
1683
1684 #define DECODE_COMPOSITION_RULE(c1)                                     \
1685   do {                                                                  \
1686     int rule = 0;                                                       \
1687     (c1) -= 32;                                                         \
1688     if (c1 < 81)                /* old format (before ver.21) */        \
1689       {                                                                 \
1690         int gref = (c1) / 9;                                            \
1691         int nref = (c1) % 9;                                            \
1692         if (gref == 4) gref = 10;                                       \
1693         if (nref == 4) nref = 10;                                       \
1694         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1695       }                                                                 \
1696     else if (c1 < 93)           /* new format (after ver.21) */         \
1697       {                                                                 \
1698         ONE_MORE_BYTE (c2);                                             \
1699         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1700       }                                                                 \
1701     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1702     coding->composition_rule_follows = 0;                               \
1703   } while (0)
1704
1705
1706 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1707
1708 static void
1709 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1710      struct coding_system *coding;
1711      unsigned char *source, *destination;
1712      int src_bytes, dst_bytes;
1713 {
1714   unsigned char *src = source;
1715   unsigned char *src_end = source + src_bytes;
1716   unsigned char *dst = destination;
1717   unsigned char *dst_end = destination + dst_bytes;
1718   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1719   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1720   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1721   /* SRC_BASE remembers the start position in source in each loop.
1722      The loop will be exited when there's not enough source code
1723      (within macro ONE_MORE_BYTE), or when there's not enough
1724      destination area to produce a character (within macro
1725      EMIT_CHAR).  */
1726   unsigned char *src_base;
1727   int c, charset;
1728   Lisp_Object translation_table;
1729   Lisp_Object safe_chars;
1730
1731   safe_chars = coding_safe_chars (coding);
1732
1733   if (NILP (Venable_character_translation))
1734     translation_table = Qnil;
1735   else
1736     {
1737       translation_table = coding->translation_table_for_decode;
1738       if (NILP (translation_table))
1739         translation_table = Vstandard_translation_table_for_decode;
1740     }
1741
1742   coding->result = CODING_FINISH_NORMAL;
1743
1744   while (1)
1745     {
1746       int c1, c2;
1747
1748       src_base = src;
1749       ONE_MORE_BYTE (c1);
1750
1751       /* We produce no character or one character.  */
1752       switch (iso_code_class [c1])
1753         {
1754         case ISO_0x20_or_0x7F:
1755           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1756             {
1757               DECODE_COMPOSITION_RULE (c1);
1758               continue;
1759             }
1760           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1761             {
1762               /* This is SPACE or DEL.  */
1763               charset = CHARSET_ASCII;
1764               break;
1765             }
1766           /* This is a graphic character, we fall down ...  */
1767
1768         case ISO_graphic_plane_0:
1769           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1770             {
1771               DECODE_COMPOSITION_RULE (c1);
1772               continue;
1773             }
1774           charset = charset0;
1775           break;
1776
1777         case ISO_0xA0_or_0xFF:
1778           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1779               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1780             goto label_invalid_code;
1781           /* This is a graphic character, we fall down ... */
1782
1783         case ISO_graphic_plane_1:
1784           if (charset1 < 0)
1785             goto label_invalid_code;
1786           charset = charset1;
1787           break;
1788
1789         case ISO_control_0:
1790           if (COMPOSING_P (coding))
1791             DECODE_COMPOSITION_END ('1');
1792
1793           /* All ISO2022 control characters in this class have the
1794              same representation in Emacs internal format.  */
1795           if (c1 == '\n'
1796               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1797               && (coding->eol_type == CODING_EOL_CR
1798                   || coding->eol_type == CODING_EOL_CRLF))
1799             {
1800               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1801               goto label_end_of_loop;
1802             }
1803           charset = CHARSET_ASCII;
1804           break;
1805
1806         case ISO_control_1:
1807           if (COMPOSING_P (coding))
1808             DECODE_COMPOSITION_END ('1');
1809           goto label_invalid_code;
1810
1811         case ISO_carriage_return:
1812           if (COMPOSING_P (coding))
1813             DECODE_COMPOSITION_END ('1');
1814
1815           if (coding->eol_type == CODING_EOL_CR)
1816             c1 = '\n';
1817           else if (coding->eol_type == CODING_EOL_CRLF)
1818             {
1819               ONE_MORE_BYTE (c1);
1820               if (c1 != ISO_CODE_LF)
1821                 {
1822                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1823                     {
1824                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1825                       goto label_end_of_loop;
1826                     }
1827                   src--;
1828                   c1 = '\r';
1829                 }
1830             }
1831           charset = CHARSET_ASCII;
1832           break;
1833
1834         case ISO_shift_out:
1835           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1836               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1837             goto label_invalid_code;
1838           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1839           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1840           continue;
1841
1842         case ISO_shift_in:
1843           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1844             goto label_invalid_code;
1845           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1846           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1847           continue;
1848
1849         case ISO_single_shift_2_7:
1850         case ISO_single_shift_2:
1851           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1852             goto label_invalid_code;
1853           /* SS2 is handled as an escape sequence of ESC 'N' */
1854           c1 = 'N';
1855           goto label_escape_sequence;
1856
1857         case ISO_single_shift_3:
1858           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1859             goto label_invalid_code;
1860           /* SS2 is handled as an escape sequence of ESC 'O' */
1861           c1 = 'O';
1862           goto label_escape_sequence;
1863
1864         case ISO_control_sequence_introducer:
1865           /* CSI is handled as an escape sequence of ESC '[' ...  */
1866           c1 = '[';
1867           goto label_escape_sequence;
1868
1869         case ISO_escape:
1870           ONE_MORE_BYTE (c1);
1871         label_escape_sequence:
1872           /* Escape sequences handled by Emacs are invocation,
1873              designation, direction specification, and character
1874              composition specification.  */
1875           switch (c1)
1876             {
1877             case '&':           /* revision of following character set */
1878               ONE_MORE_BYTE (c1);
1879               if (!(c1 >= '@' && c1 <= '~'))
1880                 goto label_invalid_code;
1881               ONE_MORE_BYTE (c1);
1882               if (c1 != ISO_CODE_ESC)
1883                 goto label_invalid_code;
1884               ONE_MORE_BYTE (c1);
1885               goto label_escape_sequence;
1886
1887             case '$':           /* designation of 2-byte character set */
1888               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1889                 goto label_invalid_code;
1890               ONE_MORE_BYTE (c1);
1891               if (c1 >= '@' && c1 <= 'B')
1892                 {       /* designation of JISX0208.1978, GB2312.1980,
1893                            or JISX0208.1980 */
1894                   DECODE_DESIGNATION (0, 2, 94, c1);
1895                 }
1896               else if (c1 >= 0x28 && c1 <= 0x2B)
1897                 {       /* designation of DIMENSION2_CHARS94 character set */
1898                   ONE_MORE_BYTE (c2);
1899                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1900                 }
1901               else if (c1 >= 0x2C && c1 <= 0x2F)
1902                 {       /* designation of DIMENSION2_CHARS96 character set */
1903                   ONE_MORE_BYTE (c2);
1904                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1905                 }
1906               else
1907                 goto label_invalid_code;
1908               /* We must update these variables now.  */
1909               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1910               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1911               continue;
1912
1913             case 'n':           /* invocation of locking-shift-2 */
1914               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1915                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1916                 goto label_invalid_code;
1917               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1918               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1919               continue;
1920
1921             case 'o':           /* invocation of locking-shift-3 */
1922               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1923                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1924                 goto label_invalid_code;
1925               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1926               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927               continue;
1928
1929             case 'N':           /* invocation of single-shift-2 */
1930               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1931                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1932                 goto label_invalid_code;
1933               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1934               ONE_MORE_BYTE (c1);
1935               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1936                 goto label_invalid_code;
1937               break;
1938
1939             case 'O':           /* invocation of single-shift-3 */
1940               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1941                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1942                 goto label_invalid_code;
1943               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1944               ONE_MORE_BYTE (c1);
1945               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1946                 goto label_invalid_code;
1947               break;
1948
1949             case '0': case '2': case '3': case '4': /* start composition */
1950               DECODE_COMPOSITION_START (c1);
1951               continue;
1952
1953             case '1':           /* end composition */
1954               DECODE_COMPOSITION_END (c1);
1955               continue;
1956
1957             case '[':           /* specification of direction */
1958               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1959                 goto label_invalid_code;
1960               /* For the moment, nested direction is not supported.
1961                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1962                  left-to-right, and nonzero means right-to-left.  */
1963               ONE_MORE_BYTE (c1);
1964               switch (c1)
1965                 {
1966                 case ']':       /* end of the current direction */
1967                   coding->mode &= ~CODING_MODE_DIRECTION;
1968
1969                 case '0':       /* end of the current direction */
1970                 case '1':       /* start of left-to-right direction */
1971                   ONE_MORE_BYTE (c1);
1972                   if (c1 == ']')
1973                     coding->mode &= ~CODING_MODE_DIRECTION;
1974                   else
1975                     goto label_invalid_code;
1976                   break;
1977
1978                 case '2':       /* start of right-to-left direction */
1979                   ONE_MORE_BYTE (c1);
1980                   if (c1 == ']')
1981                     coding->mode |= CODING_MODE_DIRECTION;
1982                   else
1983                     goto label_invalid_code;
1984                   break;
1985
1986                 default:
1987                   goto label_invalid_code;
1988                 }
1989               continue;
1990
1991             default:
1992               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1993                 goto label_invalid_code;
1994               if (c1 >= 0x28 && c1 <= 0x2B)
1995                 {       /* designation of DIMENSION1_CHARS94 character set */
1996                   ONE_MORE_BYTE (c2);
1997                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1998                 }
1999               else if (c1 >= 0x2C && c1 <= 0x2F)
2000                 {       /* designation of DIMENSION1_CHARS96 character set */
2001                   ONE_MORE_BYTE (c2);
2002                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2003                 }
2004               else
2005                 goto label_invalid_code;
2006               /* We must update these variables now.  */
2007               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2008               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2009               continue;
2010             }
2011         }
2012
2013       /* Now we know CHARSET and 1st position code C1 of a character.
2014          Produce a multibyte sequence for that character while getting
2015          2nd position code C2 if necessary.  */
2016       if (CHARSET_DIMENSION (charset) == 2)
2017         {
2018           ONE_MORE_BYTE (c2);
2019           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2020             /* C2 is not in a valid range.  */
2021             goto label_invalid_code;
2022         }
2023       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2024       EMIT_CHAR (c);
2025       continue;
2026
2027     label_invalid_code:
2028       coding->errors++;
2029       if (COMPOSING_P (coding))
2030         DECODE_COMPOSITION_END ('1');
2031       src = src_base;
2032       c = *src++;
2033       EMIT_CHAR (c);
2034     }
2035
2036  label_end_of_loop:
2037   coding->consumed = coding->consumed_char = src_base - source;
2038   coding->produced = dst - destination;
2039   return;
2040 }
2041
2042
2043 /* ISO2022 encoding stuff.  */
2044
2045 /*
2046    It is not enough to say just "ISO2022" on encoding, we have to
2047    specify more details.  In Emacs, each ISO2022 coding system
2048    variant has the following specifications:
2049         1. Initial designation to G0 through G3.
2050         2. Allows short-form designation?
2051         3. ASCII should be designated to G0 before control characters?
2052         4. ASCII should be designated to G0 at end of line?
2053         5. 7-bit environment or 8-bit environment?
2054         6. Use locking-shift?
2055         7. Use Single-shift?
2056    And the following two are only for Japanese:
2057         8. Use ASCII in place of JIS0201-1976-Roman?
2058         9. Use JISX0208-1983 in place of JISX0208-1978?
2059    These specifications are encoded in `coding->flags' as flag bits
2060    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2061    details.
2062 */
2063
2064 /* Produce codes (escape sequence) for designating CHARSET to graphic
2065    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2066    '@', 'A', or 'B' and the coding system CODING allows, produce
2067    designation sequence of short-form.  */
2068
2069 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2070   do {                                                                  \
2071     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2072     char *intermediate_char_94 = "()*+";                                \
2073     char *intermediate_char_96 = ",-./";                                \
2074     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2075                                                                         \
2076     if (revision < 255)                                                 \
2077       {                                                                 \
2078         *dst++ = ISO_CODE_ESC;                                          \
2079         *dst++ = '&';                                                   \
2080         *dst++ = '@' + revision;                                        \
2081       }                                                                 \
2082     *dst++ = ISO_CODE_ESC;                                              \
2083     if (CHARSET_DIMENSION (charset) == 1)                               \
2084       {                                                                 \
2085         if (CHARSET_CHARS (charset) == 94)                              \
2086           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2087         else                                                            \
2088           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2089       }                                                                 \
2090     else                                                                \
2091       {                                                                 \
2092         *dst++ = '$';                                                   \
2093         if (CHARSET_CHARS (charset) == 94)                              \
2094           {                                                             \
2095             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2096                 || reg != 0                                             \
2097                 || final_char < '@' || final_char > 'B')                \
2098               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2099           }                                                             \
2100         else                                                            \
2101           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2102       }                                                                 \
2103     *dst++ = final_char;                                                \
2104     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2105   } while (0)
2106
2107 /* The following two macros produce codes (control character or escape
2108    sequence) for ISO2022 single-shift functions (single-shift-2 and
2109    single-shift-3).  */
2110
2111 #define ENCODE_SINGLE_SHIFT_2                           \
2112   do {                                                  \
2113     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2114       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2115     else                                                \
2116       *dst++ = ISO_CODE_SS2;                            \
2117     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2118   } while (0)
2119
2120 #define ENCODE_SINGLE_SHIFT_3                           \
2121   do {                                                  \
2122     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2123       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2124     else                                                \
2125       *dst++ = ISO_CODE_SS3;                            \
2126     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2127   } while (0)
2128
2129 /* The following four macros produce codes (control character or
2130    escape sequence) for ISO2022 locking-shift functions (shift-in,
2131    shift-out, locking-shift-2, and locking-shift-3).  */
2132
2133 #define ENCODE_SHIFT_IN                         \
2134   do {                                          \
2135     *dst++ = ISO_CODE_SI;                       \
2136     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2137   } while (0)
2138
2139 #define ENCODE_SHIFT_OUT                        \
2140   do {                                          \
2141     *dst++ = ISO_CODE_SO;                       \
2142     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2143   } while (0)
2144
2145 #define ENCODE_LOCKING_SHIFT_2                  \
2146   do {                                          \
2147     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2148     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2149   } while (0)
2150
2151 #define ENCODE_LOCKING_SHIFT_3                  \
2152   do {                                          \
2153     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2154     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2155   } while (0)
2156
2157 /* Produce codes for a DIMENSION1 character whose character set is
2158    CHARSET and whose position-code is C1.  Designation and invocation
2159    sequences are also produced in advance if necessary.  */
2160
2161 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2162   do {                                                                  \
2163     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2164       {                                                                 \
2165         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2166           *dst++ = c1 & 0x7F;                                           \
2167         else                                                            \
2168           *dst++ = c1 | 0x80;                                           \
2169         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2170         break;                                                          \
2171       }                                                                 \
2172     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2173       {                                                                 \
2174         *dst++ = c1 & 0x7F;                                             \
2175         break;                                                          \
2176       }                                                                 \
2177     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2178       {                                                                 \
2179         *dst++ = c1 | 0x80;                                             \
2180         break;                                                          \
2181       }                                                                 \
2182     else                                                                \
2183       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2184          must invoke it, or, at first, designate it to some graphic     \
2185          register.  Then repeat the loop to actually produce the        \
2186          character.  */                                                 \
2187       dst = encode_invocation_designation (charset, coding, dst);       \
2188   } while (1)
2189
2190 /* Produce codes for a DIMENSION2 character whose character set is
2191    CHARSET and whose position-codes are C1 and C2.  Designation and
2192    invocation codes are also produced in advance if necessary.  */
2193
2194 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2195   do {                                                                  \
2196     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2197       {                                                                 \
2198         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2199           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2200         else                                                            \
2201           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2202         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2203         break;                                                          \
2204       }                                                                 \
2205     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2206       {                                                                 \
2207         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2208         break;                                                          \
2209       }                                                                 \
2210     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2211       {                                                                 \
2212         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2213         break;                                                          \
2214       }                                                                 \
2215     else                                                                \
2216       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2217          must invoke it, or, at first, designate it to some graphic     \
2218          register.  Then repeat the loop to actually produce the        \
2219          character.  */                                                 \
2220       dst = encode_invocation_designation (charset, coding, dst);       \
2221   } while (1)
2222
2223 #define ENCODE_ISO_CHARACTER(c)                                 \
2224   do {                                                          \
2225     int charset, c1, c2;                                        \
2226                                                                 \
2227     SPLIT_CHAR (c, charset, c1, c2);                            \
2228     if (CHARSET_DEFINED_P (charset))                            \
2229       {                                                         \
2230         if (CHARSET_DIMENSION (charset) == 1)                   \
2231           {                                                     \
2232             if (charset == CHARSET_ASCII                        \
2233                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2234               charset = charset_latin_jisx0201;                 \
2235             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2236           }                                                     \
2237         else                                                    \
2238           {                                                     \
2239             if (charset == charset_jisx0208                     \
2240                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2241               charset = charset_jisx0208_1978;                  \
2242             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2243           }                                                     \
2244       }                                                         \
2245     else                                                        \
2246       {                                                         \
2247         *dst++ = c1;                                            \
2248         if (c2 >= 0)                                            \
2249           *dst++ = c2;                                          \
2250       }                                                         \
2251   } while (0)
2252
2253
2254 /* Instead of encoding character C, produce one or two `?'s.  */
2255
2256 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2257   do {                                                                  \
2258     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2259     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2260       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2261   } while (0)
2262
2263
2264 /* Produce designation and invocation codes at a place pointed by DST
2265    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2266    Return new DST.  */
2267
2268 unsigned char *
2269 encode_invocation_designation (charset, coding, dst)
2270      int charset;
2271      struct coding_system *coding;
2272      unsigned char *dst;
2273 {
2274   int reg;                      /* graphic register number */
2275
2276   /* At first, check designations.  */
2277   for (reg = 0; reg < 4; reg++)
2278     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2279       break;
2280
2281   if (reg >= 4)
2282     {
2283       /* CHARSET is not yet designated to any graphic registers.  */
2284       /* At first check the requested designation.  */
2285       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2286       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2287         /* Since CHARSET requests no special designation, designate it
2288            to graphic register 0.  */
2289         reg = 0;
2290
2291       ENCODE_DESIGNATION (charset, reg, coding);
2292     }
2293
2294   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2295       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2296     {
2297       /* Since the graphic register REG is not invoked to any graphic
2298          planes, invoke it to graphic plane 0.  */
2299       switch (reg)
2300         {
2301         case 0:                 /* graphic register 0 */
2302           ENCODE_SHIFT_IN;
2303           break;
2304
2305         case 1:                 /* graphic register 1 */
2306           ENCODE_SHIFT_OUT;
2307           break;
2308
2309         case 2:                 /* graphic register 2 */
2310           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2311             ENCODE_SINGLE_SHIFT_2;
2312           else
2313             ENCODE_LOCKING_SHIFT_2;
2314           break;
2315
2316         case 3:                 /* graphic register 3 */
2317           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2318             ENCODE_SINGLE_SHIFT_3;
2319           else
2320             ENCODE_LOCKING_SHIFT_3;
2321           break;
2322         }
2323     }
2324
2325   return dst;
2326 }
2327
2328 /* Produce 2-byte codes for encoded composition rule RULE.  */
2329
2330 #define ENCODE_COMPOSITION_RULE(rule)           \
2331   do {                                          \
2332     int gref, nref;                             \
2333     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2334     *dst++ = 32 + 81 + gref;                    \
2335     *dst++ = 32 + nref;                         \
2336   } while (0)
2337
2338 /* Produce codes for indicating the start of a composition sequence
2339    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2340    which specify information about the composition.  See the comment
2341    in coding.h for the format of DATA.  */
2342
2343 #define ENCODE_COMPOSITION_START(coding, data)                          \
2344   do {                                                                  \
2345     coding->composing = data[3];                                        \
2346     *dst++ = ISO_CODE_ESC;                                              \
2347     if (coding->composing == COMPOSITION_RELATIVE)                      \
2348       *dst++ = '0';                                                     \
2349     else                                                                \
2350       {                                                                 \
2351         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2352                   ? '3' : '4');                                         \
2353         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2354         coding->composition_rule_follows = 0;                           \
2355       }                                                                 \
2356   } while (0)
2357
2358 /* Produce codes for indicating the end of the current composition.  */
2359
2360 #define ENCODE_COMPOSITION_END(coding, data)                    \
2361   do {                                                          \
2362     *dst++ = ISO_CODE_ESC;                                      \
2363     *dst++ = '1';                                               \
2364     coding->cmp_data_start += data[0];                          \
2365     coding->composing = COMPOSITION_NO;                         \
2366     if (coding->cmp_data_start == coding->cmp_data->used        \
2367         && coding->cmp_data->next)                              \
2368       {                                                         \
2369         coding->cmp_data = coding->cmp_data->next;              \
2370         coding->cmp_data_start = 0;                             \
2371       }                                                         \
2372   } while (0)
2373
2374 /* Produce composition start sequence ESC 0.  Here, this sequence
2375    doesn't mean the start of a new composition but means that we have
2376    just produced components (alternate chars and composition rules) of
2377    the composition and the actual text follows in SRC.  */
2378
2379 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2380   do {                                          \
2381     *dst++ = ISO_CODE_ESC;                      \
2382     *dst++ = '0';                               \
2383     coding->composing = COMPOSITION_RELATIVE;   \
2384   } while (0)
2385
2386 /* The following three macros produce codes for indicating direction
2387    of text.  */
2388 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2389   do {                                                  \
2390     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2391       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2392     else                                                \
2393       *dst++ = ISO_CODE_CSI;                            \
2394   } while (0)
2395
2396 #define ENCODE_DIRECTION_R2L    \
2397   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2398
2399 #define ENCODE_DIRECTION_L2R    \
2400   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2401
2402 /* Produce codes for designation and invocation to reset the graphic
2403    planes and registers to initial state.  */
2404 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2405   do {                                                                      \
2406     int reg;                                                                \
2407     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2408       ENCODE_SHIFT_IN;                                                      \
2409     for (reg = 0; reg < 4; reg++)                                           \
2410       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2411           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2412               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2413         ENCODE_DESIGNATION                                                  \
2414           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2415   } while (0)
2416
2417 /* Produce designation sequences of charsets in the line started from
2418    SRC to a place pointed by DST, and return updated DST.
2419
2420    If the current block ends before any end-of-line, we may fail to
2421    find all the necessary designations.  */
2422
2423 static unsigned char *
2424 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2425      struct coding_system *coding;
2426      Lisp_Object translation_table;
2427      unsigned char *src, *src_end, *dst;
2428 {
2429   int charset, c, found = 0, reg;
2430   /* Table of charsets to be designated to each graphic register.  */
2431   int r[4];
2432
2433   for (reg = 0; reg < 4; reg++)
2434     r[reg] = -1;
2435
2436   while (found < 4)
2437     {
2438       ONE_MORE_CHAR (c);
2439       if (c == '\n')
2440         break;
2441
2442       charset = CHAR_CHARSET (c);
2443       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2444       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2445         {
2446           found++;
2447           r[reg] = charset;
2448         }
2449     }
2450
2451  label_end_of_loop:
2452   if (found)
2453     {
2454       for (reg = 0; reg < 4; reg++)
2455         if (r[reg] >= 0
2456             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2457           ENCODE_DESIGNATION (r[reg], reg, coding);
2458     }
2459
2460   return dst;
2461 }
2462
2463 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2464
2465 static void
2466 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2467      struct coding_system *coding;
2468      unsigned char *source, *destination;
2469      int src_bytes, dst_bytes;
2470 {
2471   unsigned char *src = source;
2472   unsigned char *src_end = source + src_bytes;
2473   unsigned char *dst = destination;
2474   unsigned char *dst_end = destination + dst_bytes;
2475   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2476      from DST_END to assure overflow checking is necessary only at the
2477      head of loop.  */
2478   unsigned char *adjusted_dst_end = dst_end - 19;
2479   /* SRC_BASE remembers the start position in source in each loop.
2480      The loop will be exited when there's not enough source text to
2481      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2482      there's not enough destination area to produce encoded codes
2483      (within macro EMIT_BYTES).  */
2484   unsigned char *src_base;
2485   int c;
2486   Lisp_Object translation_table;
2487   Lisp_Object safe_chars;
2488
2489   safe_chars = coding_safe_chars (coding);
2490
2491   if (NILP (Venable_character_translation))
2492     translation_table = Qnil;
2493   else
2494     {
2495       translation_table = coding->translation_table_for_encode;
2496       if (NILP (translation_table))
2497         translation_table = Vstandard_translation_table_for_encode;
2498     }
2499
2500   coding->consumed_char = 0;
2501   coding->errors = 0;
2502   while (1)
2503     {
2504       src_base = src;
2505
2506       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2507         {
2508           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2509           break;
2510         }
2511
2512       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2513           && CODING_SPEC_ISO_BOL (coding))
2514         {
2515           /* We have to produce designation sequences if any now.  */
2516           dst = encode_designation_at_bol (coding, translation_table,
2517                                            src, src_end, dst);
2518           CODING_SPEC_ISO_BOL (coding) = 0;
2519         }
2520
2521       /* Check composition start and end.  */
2522       if (coding->composing != COMPOSITION_DISABLED
2523           && coding->cmp_data_start < coding->cmp_data->used)
2524         {
2525           struct composition_data *cmp_data = coding->cmp_data;
2526           int *data = cmp_data->data + coding->cmp_data_start;
2527           int this_pos = cmp_data->char_offset + coding->consumed_char;
2528
2529           if (coding->composing == COMPOSITION_RELATIVE)
2530             {
2531               if (this_pos == data[2])
2532                 {
2533                   ENCODE_COMPOSITION_END (coding, data);
2534                   cmp_data = coding->cmp_data;
2535                   data = cmp_data->data + coding->cmp_data_start;
2536                 }
2537             }
2538           else if (COMPOSING_P (coding))
2539             {
2540               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2541               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2542                 /* We have consumed components of the composition.
2543                    What follows in SRC is the composition's base
2544                    text.  */
2545                 ENCODE_COMPOSITION_FAKE_START (coding);
2546               else
2547                 {
2548                   int c = cmp_data->data[coding->cmp_data_index++];
2549                   if (coding->composition_rule_follows)
2550                     {
2551                       ENCODE_COMPOSITION_RULE (c);
2552                       coding->composition_rule_follows = 0;
2553                     }
2554                   else
2555                     {
2556                       if (coding->flags & CODING_FLAG_ISO_SAFE
2557                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2558                         ENCODE_UNSAFE_CHARACTER (c);
2559                       else
2560                         ENCODE_ISO_CHARACTER (c);
2561                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2562                         coding->composition_rule_follows = 1;
2563                     }
2564                   continue;
2565                 }
2566             }
2567           if (!COMPOSING_P (coding))
2568             {
2569               if (this_pos == data[1])
2570                 {
2571                   ENCODE_COMPOSITION_START (coding, data);
2572                   continue;
2573                 }
2574             }
2575         }
2576
2577       ONE_MORE_CHAR (c);
2578
2579       /* Now encode the character C.  */
2580       if (c < 0x20 || c == 0x7F)
2581         {
2582           if (c == '\r')
2583             {
2584               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2585                 {
2586                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2587                     ENCODE_RESET_PLANE_AND_REGISTER;
2588                   *dst++ = c;
2589                   continue;
2590                 }
2591               /* fall down to treat '\r' as '\n' ...  */
2592               c = '\n';
2593             }
2594           if (c == '\n')
2595             {
2596               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2597                 ENCODE_RESET_PLANE_AND_REGISTER;
2598               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2599                 bcopy (coding->spec.iso2022.initial_designation,
2600                        coding->spec.iso2022.current_designation,
2601                        sizeof coding->spec.iso2022.initial_designation);
2602               if (coding->eol_type == CODING_EOL_LF
2603                   || coding->eol_type == CODING_EOL_UNDECIDED)
2604                 *dst++ = ISO_CODE_LF;
2605               else if (coding->eol_type == CODING_EOL_CRLF)
2606                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2607               else
2608                 *dst++ = ISO_CODE_CR;
2609               CODING_SPEC_ISO_BOL (coding) = 1;
2610             }
2611           else
2612             {
2613               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2614                 ENCODE_RESET_PLANE_AND_REGISTER;
2615               *dst++ = c;
2616             }
2617         }
2618       else if (ASCII_BYTE_P (c))
2619         ENCODE_ISO_CHARACTER (c);
2620       else if (SINGLE_BYTE_CHAR_P (c))
2621         {
2622           *dst++ = c;
2623           coding->errors++;
2624         }
2625       else if (coding->flags & CODING_FLAG_ISO_SAFE
2626                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2627         ENCODE_UNSAFE_CHARACTER (c);
2628       else
2629         ENCODE_ISO_CHARACTER (c);
2630
2631       coding->consumed_char++;
2632     }
2633
2634  label_end_of_loop:
2635   coding->consumed = src_base - source;
2636   coding->produced = coding->produced_char = dst - destination;
2637 }
2638
2639 \f
2640 /*** 4. SJIS and BIG5 handlers ***/
2641
2642 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2643    quite widely.  So, for the moment, Emacs supports them in the bare
2644    C code.  But, in the future, they may be supported only by CCL.  */
2645
2646 /* SJIS is a coding system encoding three character sets: ASCII, right
2647    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2648    as is.  A character of charset katakana-jisx0201 is encoded by
2649    "position-code + 0x80".  A character of charset japanese-jisx0208
2650    is encoded in 2-byte but two position-codes are divided and shifted
2651    so that it fits in the range below.
2652
2653    --- CODE RANGE of SJIS ---
2654    (character set)      (range)
2655    ASCII                0x00 .. 0x7F
2656    KATAKANA-JISX0201    0xA1 .. 0xDF
2657    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2658             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2659    -------------------------------
2660
2661 */
2662
2663 /* BIG5 is a coding system encoding two character sets: ASCII and
2664    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2665    character set and is encoded in two bytes.
2666
2667    --- CODE RANGE of BIG5 ---
2668    (character set)      (range)
2669    ASCII                0x00 .. 0x7F
2670    Big5 (1st byte)      0xA1 .. 0xFE
2671         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2672    --------------------------
2673
2674    Since the number of characters in Big5 is larger than maximum
2675    characters in Emacs' charset (96x96), it can't be handled as one
2676    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2677    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2678    contains frequently used characters and the latter contains less
2679    frequently used characters.  */
2680
2681 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2682    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2683    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2684    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2685
2686 /* Number of Big5 characters which have the same code in 1st byte.  */
2687 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2688
2689 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2690   do {                                                                  \
2691     unsigned int temp                                                   \
2692       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2693     if (b1 < 0xC9)                                                      \
2694       charset = charset_big5_1;                                         \
2695     else                                                                \
2696       {                                                                 \
2697         charset = charset_big5_2;                                       \
2698         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2699       }                                                                 \
2700     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2701     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2702   } while (0)
2703
2704 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2705   do {                                                                  \
2706     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2707     if (charset == charset_big5_2)                                      \
2708       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2709     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2710     b2 = temp % BIG5_SAME_ROW;                                          \
2711     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2712   } while (0)
2713
2714 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2715    Check if a text is encoded in SJIS.  If it is, return
2716    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2717
2718 static int
2719 detect_coding_sjis (src, src_end, multibytep)
2720      unsigned char *src, *src_end;
2721      int multibytep;
2722 {
2723   int c;
2724   /* Dummy for ONE_MORE_BYTE.  */
2725   struct coding_system dummy_coding;
2726   struct coding_system *coding = &dummy_coding;
2727
2728   while (1)
2729     {
2730       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2731       if (c < 0x80)
2732         continue;
2733       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2734         return 0;
2735       if (c <= 0x9F || c >= 0xE0)
2736         {
2737           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2738           if (c < 0x40 || c == 0x7F || c > 0xFC)
2739             return 0;
2740         }
2741     }
2742  label_end_of_loop:
2743   return CODING_CATEGORY_MASK_SJIS;
2744 }
2745
2746 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2747    Check if a text is encoded in BIG5.  If it is, return
2748    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2749
2750 static int
2751 detect_coding_big5 (src, src_end, multibytep)
2752      unsigned char *src, *src_end;
2753      int multibytep;
2754 {
2755   int c;
2756   /* Dummy for ONE_MORE_BYTE.  */
2757   struct coding_system dummy_coding;
2758   struct coding_system *coding = &dummy_coding;
2759
2760   while (1)
2761     {
2762       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2763       if (c < 0x80)
2764         continue;
2765       if (c < 0xA1 || c > 0xFE)
2766         return 0;
2767       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2768       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2769         return 0;
2770     }
2771  label_end_of_loop:
2772   return CODING_CATEGORY_MASK_BIG5;
2773 }
2774
2775 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2776    Check if a text is encoded in UTF-8.  If it is, return
2777    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2778
2779 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2780 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2781 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2782 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2783 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2784 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2785 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2786
2787 static int
2788 detect_coding_utf_8 (src, src_end, multibytep)
2789      unsigned char *src, *src_end;
2790      int multibytep;
2791 {
2792   unsigned char c;
2793   int seq_maybe_bytes;
2794   /* Dummy for ONE_MORE_BYTE.  */
2795   struct coding_system dummy_coding;
2796   struct coding_system *coding = &dummy_coding;
2797
2798   while (1)
2799     {
2800       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2801       if (UTF_8_1_OCTET_P (c))
2802         continue;
2803       else if (UTF_8_2_OCTET_LEADING_P (c))
2804         seq_maybe_bytes = 1;
2805       else if (UTF_8_3_OCTET_LEADING_P (c))
2806         seq_maybe_bytes = 2;
2807       else if (UTF_8_4_OCTET_LEADING_P (c))
2808         seq_maybe_bytes = 3;
2809       else if (UTF_8_5_OCTET_LEADING_P (c))
2810         seq_maybe_bytes = 4;
2811       else if (UTF_8_6_OCTET_LEADING_P (c))
2812         seq_maybe_bytes = 5;
2813       else
2814         return 0;
2815
2816       do
2817         {
2818           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2819           if (!UTF_8_EXTRA_OCTET_P (c))
2820             return 0;
2821           seq_maybe_bytes--;
2822         }
2823       while (seq_maybe_bytes > 0);
2824     }
2825
2826  label_end_of_loop:
2827   return CODING_CATEGORY_MASK_UTF_8;
2828 }
2829
2830 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2831    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2832    Little Endian (otherwise).  If it is, return
2833    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2834    else return 0.  */
2835
2836 #define UTF_16_INVALID_P(val)   \
2837   (((val) == 0xFFFE)            \
2838    || ((val) == 0xFFFF))
2839
2840 #define UTF_16_HIGH_SURROGATE_P(val) \
2841   (((val) & 0xD800) == 0xD800)
2842
2843 #define UTF_16_LOW_SURROGATE_P(val) \
2844   (((val) & 0xDC00) == 0xDC00)
2845
2846 static int
2847 detect_coding_utf_16 (src, src_end, multibytep)
2848      unsigned char *src, *src_end;
2849      int multibytep;
2850 {
2851   unsigned char c1, c2;
2852   /* Dummy for TWO_MORE_BYTES.  */
2853   struct coding_system dummy_coding;
2854   struct coding_system *coding = &dummy_coding;
2855
2856   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2857   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2858
2859   if ((c1 == 0xFF) && (c2 == 0xFE))
2860     return CODING_CATEGORY_MASK_UTF_16_LE;
2861   else if ((c1 == 0xFE) && (c2 == 0xFF))
2862     return CODING_CATEGORY_MASK_UTF_16_BE;
2863
2864  label_end_of_loop:
2865   return 0;
2866 }
2867
2868 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2869    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2870
2871 static void
2872 decode_coding_sjis_big5 (coding, source, destination,
2873                          src_bytes, dst_bytes, sjis_p)
2874      struct coding_system *coding;
2875      unsigned char *source, *destination;
2876      int src_bytes, dst_bytes;
2877      int sjis_p;
2878 {
2879   unsigned char *src = source;
2880   unsigned char *src_end = source + src_bytes;
2881   unsigned char *dst = destination;
2882   unsigned char *dst_end = destination + dst_bytes;
2883   /* SRC_BASE remembers the start position in source in each loop.
2884      The loop will be exited when there's not enough source code
2885      (within macro ONE_MORE_BYTE), or when there's not enough
2886      destination area to produce a character (within macro
2887      EMIT_CHAR).  */
2888   unsigned char *src_base;
2889   Lisp_Object translation_table;
2890
2891   if (NILP (Venable_character_translation))
2892     translation_table = Qnil;
2893   else
2894     {
2895       translation_table = coding->translation_table_for_decode;
2896       if (NILP (translation_table))
2897         translation_table = Vstandard_translation_table_for_decode;
2898     }
2899
2900   coding->produced_char = 0;
2901   while (1)
2902     {
2903       int c, charset, c1, c2;
2904
2905       src_base = src;
2906       ONE_MORE_BYTE (c1);
2907
2908       if (c1 < 0x80)
2909         {
2910           charset = CHARSET_ASCII;
2911           if (c1 < 0x20)
2912             {
2913               if (c1 == '\r')
2914                 {
2915                   if (coding->eol_type == CODING_EOL_CRLF)
2916                     {
2917                       ONE_MORE_BYTE (c2);
2918                       if (c2 == '\n')
2919                         c1 = c2;
2920                       else if (coding->mode
2921                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2922                         {
2923                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2924                           goto label_end_of_loop;
2925                         }
2926                       else
2927                         /* To process C2 again, SRC is subtracted by 1.  */
2928                         src--;
2929                     }
2930                   else if (coding->eol_type == CODING_EOL_CR)
2931                     c1 = '\n';
2932                 }
2933               else if (c1 == '\n'
2934                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2935                        && (coding->eol_type == CODING_EOL_CR
2936                            || coding->eol_type == CODING_EOL_CRLF))
2937                 {
2938                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2939                   goto label_end_of_loop;
2940                 }
2941             }
2942         }
2943       else
2944         {
2945           if (sjis_p)
2946             {
2947               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2948                 goto label_invalid_code;
2949               if (c1 <= 0x9F || c1 >= 0xE0)
2950                 {
2951                   /* SJIS -> JISX0208 */
2952                   ONE_MORE_BYTE (c2);
2953                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2954                     goto label_invalid_code;
2955                   DECODE_SJIS (c1, c2, c1, c2);
2956                   charset = charset_jisx0208;
2957                 }
2958               else
2959                 /* SJIS -> JISX0201-Kana */
2960                 charset = charset_katakana_jisx0201;
2961             }
2962           else
2963             {
2964               /* BIG5 -> Big5 */
2965               if (c1 < 0xA0 || c1 > 0xFE)
2966                 goto label_invalid_code;
2967               ONE_MORE_BYTE (c2);
2968               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2969                 goto label_invalid_code;
2970               DECODE_BIG5 (c1, c2, charset, c1, c2);
2971             }
2972         }
2973
2974       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2975       EMIT_CHAR (c);
2976       continue;
2977
2978     label_invalid_code:
2979       coding->errors++;
2980       src = src_base;
2981       c = *src++;
2982       EMIT_CHAR (c);
2983     }
2984
2985  label_end_of_loop:
2986   coding->consumed = coding->consumed_char = src_base - source;
2987   coding->produced = dst - destination;
2988   return;
2989 }
2990
2991 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2992    This function can encode charsets `ascii', `katakana-jisx0201',
2993    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2994    are sure that all these charsets are registered as official charset
2995    (i.e. do not have extended leading-codes).  Characters of other
2996    charsets are produced without any encoding.  If SJIS_P is 1, encode
2997    SJIS text, else encode BIG5 text.  */
2998
2999 static void
3000 encode_coding_sjis_big5 (coding, source, destination,
3001                          src_bytes, dst_bytes, sjis_p)
3002      struct coding_system *coding;
3003      unsigned char *source, *destination;
3004      int src_bytes, dst_bytes;
3005      int sjis_p;
3006 {
3007   unsigned char *src = source;
3008   unsigned char *src_end = source + src_bytes;
3009   unsigned char *dst = destination;
3010   unsigned char *dst_end = destination + dst_bytes;
3011   /* SRC_BASE remembers the start position in source in each loop.
3012      The loop will be exited when there's not enough source text to
3013      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3014      there's not enough destination area to produce encoded codes
3015      (within macro EMIT_BYTES).  */
3016   unsigned char *src_base;
3017   Lisp_Object translation_table;
3018
3019   if (NILP (Venable_character_translation))
3020     translation_table = Qnil;
3021   else
3022     {
3023       translation_table = coding->translation_table_for_encode;
3024       if (NILP (translation_table))
3025         translation_table = Vstandard_translation_table_for_encode;
3026     }
3027
3028   while (1)
3029     {
3030       int c, charset, c1, c2;
3031
3032       src_base = src;
3033       ONE_MORE_CHAR (c);
3034
3035       /* Now encode the character C.  */
3036       if (SINGLE_BYTE_CHAR_P (c))
3037         {
3038           switch (c)
3039             {
3040             case '\r':
3041               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3042                 {
3043                   EMIT_ONE_BYTE (c);
3044                   break;
3045                 }
3046               c = '\n';
3047             case '\n':
3048               if (coding->eol_type == CODING_EOL_CRLF)
3049                 {
3050                   EMIT_TWO_BYTES ('\r', c);
3051                   break;
3052                 }
3053               else if (coding->eol_type == CODING_EOL_CR)
3054                 c = '\r';
3055             default:
3056               EMIT_ONE_BYTE (c);
3057             }
3058         }
3059       else
3060         {
3061           SPLIT_CHAR (c, charset, c1, c2);
3062           if (sjis_p)
3063             {
3064               if (charset == charset_jisx0208
3065                   || charset == charset_jisx0208_1978)
3066                 {
3067                   ENCODE_SJIS (c1, c2, c1, c2);
3068                   EMIT_TWO_BYTES (c1, c2);
3069                 }
3070               else if (charset == charset_katakana_jisx0201)
3071                 EMIT_ONE_BYTE (c1 | 0x80);
3072               else if (charset == charset_latin_jisx0201)
3073                 EMIT_ONE_BYTE (c1);
3074               else
3075                 /* There's no way other than producing the internal
3076                    codes as is.  */
3077                 EMIT_BYTES (src_base, src);
3078             }
3079           else
3080             {
3081               if (charset == charset_big5_1 || charset == charset_big5_2)
3082                 {
3083                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3084                   EMIT_TWO_BYTES (c1, c2);
3085                 }
3086               else
3087                 /* There's no way other than producing the internal
3088                    codes as is.  */
3089                 EMIT_BYTES (src_base, src);
3090             }
3091         }
3092       coding->consumed_char++;
3093     }
3094
3095  label_end_of_loop:
3096   coding->consumed = src_base - source;
3097   coding->produced = coding->produced_char = dst - destination;
3098 }
3099
3100 \f
3101 /*** 5. CCL handlers ***/
3102
3103 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3104    Check if a text is encoded in a coding system of which
3105    encoder/decoder are written in CCL program.  If it is, return
3106    CODING_CATEGORY_MASK_CCL, else return 0.  */
3107
3108 static int
3109 detect_coding_ccl (src, src_end, multibytep)
3110      unsigned char *src, *src_end;
3111      int multibytep;
3112 {
3113   unsigned char *valid;
3114   int c;
3115   /* Dummy for ONE_MORE_BYTE.  */
3116   struct coding_system dummy_coding;
3117   struct coding_system *coding = &dummy_coding;
3118
3119   /* No coding system is assigned to coding-category-ccl.  */
3120   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3121     return 0;
3122
3123   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3124   while (1)
3125     {
3126       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3127       if (! valid[c])
3128         return 0;
3129     }
3130  label_end_of_loop:
3131   return CODING_CATEGORY_MASK_CCL;
3132 }
3133
3134 \f
3135 /*** 6. End-of-line handlers ***/
3136
3137 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3138
3139 static void
3140 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3141      struct coding_system *coding;
3142      unsigned char *source, *destination;
3143      int src_bytes, dst_bytes;
3144 {
3145   unsigned char *src = source;
3146   unsigned char *dst = destination;
3147   unsigned char *src_end = src + src_bytes;
3148   unsigned char *dst_end = dst + dst_bytes;
3149   Lisp_Object translation_table;
3150   /* SRC_BASE remembers the start position in source in each loop.
3151      The loop will be exited when there's not enough source code
3152      (within macro ONE_MORE_BYTE), or when there's not enough
3153      destination area to produce a character (within macro
3154      EMIT_CHAR).  */
3155   unsigned char *src_base;
3156   int c;
3157
3158   translation_table = Qnil;
3159   switch (coding->eol_type)
3160     {
3161     case CODING_EOL_CRLF:
3162       while (1)
3163         {
3164           src_base = src;
3165           ONE_MORE_BYTE (c);
3166           if (c == '\r')
3167             {
3168               ONE_MORE_BYTE (c);
3169               if (c != '\n')
3170                 {
3171                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3172                     {
3173                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
3174                       goto label_end_of_loop;
3175                     }
3176                   src--;
3177                   c = '\r';
3178                 }
3179             }
3180           else if (c == '\n'
3181                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3182             {
3183               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3184               goto label_end_of_loop;
3185             }
3186           EMIT_CHAR (c);
3187         }
3188       break;
3189
3190     case CODING_EOL_CR:
3191       while (1)
3192         {
3193           src_base = src;
3194           ONE_MORE_BYTE (c);
3195           if (c == '\n')
3196             {
3197               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3198                 {
3199                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3200                   goto label_end_of_loop;
3201                 }
3202             }
3203           else if (c == '\r')
3204             c = '\n';
3205           EMIT_CHAR (c);
3206         }
3207       break;
3208
3209     default:                    /* no need for EOL handling */
3210       while (1)
3211         {
3212           src_base = src;
3213           ONE_MORE_BYTE (c);
3214           EMIT_CHAR (c);
3215         }
3216     }
3217
3218  label_end_of_loop:
3219   coding->consumed = coding->consumed_char = src_base - source;
3220   coding->produced = dst - destination;
3221   return;
3222 }
3223
3224 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3225    format of end-of-line according to `coding->eol_type'.  It also
3226    convert multibyte form 8-bit characters to unibyte if
3227    CODING->src_multibyte is nonzero.  If `coding->mode &
3228    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3229    also means end-of-line.  */
3230
3231 static void
3232 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3233      struct coding_system *coding;
3234      unsigned char *source, *destination;
3235      int src_bytes, dst_bytes;
3236 {
3237   unsigned char *src = source;
3238   unsigned char *dst = destination;
3239   unsigned char *src_end = src + src_bytes;
3240   unsigned char *dst_end = dst + dst_bytes;
3241   Lisp_Object translation_table;
3242   /* SRC_BASE remembers the start position in source in each loop.
3243      The loop will be exited when there's not enough source text to
3244      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3245      there's not enough destination area to produce encoded codes
3246      (within macro EMIT_BYTES).  */
3247   unsigned char *src_base;
3248   int c;
3249   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3250
3251   translation_table = Qnil;
3252   if (coding->src_multibyte
3253       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3254     {
3255       src_end--;
3256       src_bytes--;
3257       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3258     }
3259
3260   if (coding->eol_type == CODING_EOL_CRLF)
3261     {
3262       while (src < src_end)
3263         {
3264           src_base = src;
3265           c = *src++;
3266           if (c >= 0x20)
3267             EMIT_ONE_BYTE (c);
3268           else if (c == '\n' || (c == '\r' && selective_display))
3269             EMIT_TWO_BYTES ('\r', '\n');
3270           else
3271             EMIT_ONE_BYTE (c);
3272         }
3273       src_base = src;
3274     label_end_of_loop:
3275       ;
3276     }
3277   else
3278     {
3279       if (!dst_bytes || src_bytes <= dst_bytes)
3280         {
3281           safe_bcopy (src, dst, src_bytes);
3282           src_base = src_end;
3283           dst += src_bytes;
3284         }
3285       else
3286         {
3287           if (coding->src_multibyte
3288               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3289             dst_bytes--;
3290           safe_bcopy (src, dst, dst_bytes);
3291           src_base = src + dst_bytes;
3292           dst = destination + dst_bytes;
3293           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3294         }
3295       if (coding->eol_type == CODING_EOL_CR)
3296         {
3297           for (src = destination; src < dst; src++)
3298             if (*src == '\n') *src = '\r';
3299         }
3300       else if (selective_display)
3301         {
3302           for (src = destination; src < dst; src++)
3303             if (*src == '\r') *src = '\n';
3304         }
3305     }
3306   if (coding->src_multibyte)
3307     dst = destination + str_as_unibyte (destination, dst - destination);
3308
3309   coding->consumed = src_base - source;
3310   coding->produced = dst - destination;
3311   coding->produced_char = coding->produced;
3312 }
3313
3314 \f
3315 /*** 7. C library functions ***/
3316
3317 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3318    has a property `coding-system'.  The value of this property is a
3319    vector of length 5 (called the coding-vector).  Among elements of
3320    this vector, the first (element[0]) and the fifth (element[4])
3321    carry important information for decoding/encoding.  Before
3322    decoding/encoding, this information should be set in fields of a
3323    structure of type `coding_system'.
3324
3325    The value of the property `coding-system' can be a symbol of another
3326    subsidiary coding-system.  In that case, Emacs gets coding-vector
3327    from that symbol.
3328
3329    `element[0]' contains information to be set in `coding->type'.  The
3330    value and its meaning is as follows:
3331
3332    0 -- coding_type_emacs_mule
3333    1 -- coding_type_sjis
3334    2 -- coding_type_iso2022
3335    3 -- coding_type_big5
3336    4 -- coding_type_ccl encoder/decoder written in CCL
3337    nil -- coding_type_no_conversion
3338    t -- coding_type_undecided (automatic conversion on decoding,
3339                                no-conversion on encoding)
3340
3341    `element[4]' contains information to be set in `coding->flags' and
3342    `coding->spec'.  The meaning varies by `coding->type'.
3343
3344    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3345    of length 32 (of which the first 13 sub-elements are used now).
3346    Meanings of these sub-elements are:
3347
3348    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3349         If the value is an integer of valid charset, the charset is
3350         assumed to be designated to graphic register N initially.
3351
3352         If the value is minus, it is a minus value of charset which
3353         reserves graphic register N, which means that the charset is
3354         not designated initially but should be designated to graphic
3355         register N just before encoding a character in that charset.
3356
3357         If the value is nil, graphic register N is never used on
3358         encoding.
3359
3360    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3361         Each value takes t or nil.  See the section ISO2022 of
3362         `coding.h' for more information.
3363
3364    If `coding->type' is `coding_type_big5', element[4] is t to denote
3365    BIG5-ETen or nil to denote BIG5-HKU.
3366
3367    If `coding->type' takes the other value, element[4] is ignored.
3368
3369    Emacs Lisp's coding systems also carry information about format of
3370    end-of-line in a value of property `eol-type'.  If the value is
3371    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3372    means CODING_EOL_CR.  If it is not integer, it should be a vector
3373    of subsidiary coding systems of which property `eol-type' has one
3374    of the above values.
3375
3376 */
3377
3378 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3379    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3380    is setup so that no conversion is necessary and return -1, else
3381    return 0.  */
3382
3383 int
3384 setup_coding_system (coding_system, coding)
3385      Lisp_Object coding_system;
3386      struct coding_system *coding;
3387 {
3388   Lisp_Object coding_spec, coding_type, eol_type, plist;
3389   Lisp_Object val;
3390
3391   /* At first, zero clear all members.  */
3392   bzero (coding, sizeof (struct coding_system));
3393
3394   /* Initialize some fields required for all kinds of coding systems.  */
3395   coding->symbol = coding_system;
3396   coding->heading_ascii = -1;
3397   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3398   coding->composing = COMPOSITION_DISABLED;
3399   coding->cmp_data = NULL;
3400
3401   if (NILP (coding_system))
3402     goto label_invalid_coding_system;
3403
3404   coding_spec = Fget (coding_system, Qcoding_system);
3405
3406   if (!VECTORP (coding_spec)
3407       || XVECTOR (coding_spec)->size != 5
3408       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3409     goto label_invalid_coding_system;
3410
3411   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3412   if (VECTORP (eol_type))
3413     {
3414       coding->eol_type = CODING_EOL_UNDECIDED;
3415       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3416     }
3417   else if (XFASTINT (eol_type) == 1)
3418     {
3419       coding->eol_type = CODING_EOL_CRLF;
3420       coding->common_flags
3421         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3422     }
3423   else if (XFASTINT (eol_type) == 2)
3424     {
3425       coding->eol_type = CODING_EOL_CR;
3426       coding->common_flags
3427         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3428     }
3429   else
3430     coding->eol_type = CODING_EOL_LF;
3431
3432   coding_type = XVECTOR (coding_spec)->contents[0];
3433   /* Try short cut.  */
3434   if (SYMBOLP (coding_type))
3435     {
3436       if (EQ (coding_type, Qt))
3437         {
3438           coding->type = coding_type_undecided;
3439           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3440         }
3441       else
3442         coding->type = coding_type_no_conversion;
3443       /* Initialize this member.  Any thing other than
3444          CODING_CATEGORY_IDX_UTF_16_BE and
3445          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3446          special treatment in detect_eol.  */
3447       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3448
3449       return 0;
3450     }
3451
3452   /* Get values of coding system properties:
3453      `post-read-conversion', `pre-write-conversion',
3454      `translation-table-for-decode', `translation-table-for-encode'.  */
3455   plist = XVECTOR (coding_spec)->contents[3];
3456   /* Pre & post conversion functions should be disabled if
3457      inhibit_eol_conversion is nonzero.  This is the case that a code
3458      conversion function is called while those functions are running.  */
3459   if (! inhibit_pre_post_conversion)
3460     {
3461       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3462       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3463     }
3464   val = Fplist_get (plist, Qtranslation_table_for_decode);
3465   if (SYMBOLP (val))
3466     val = Fget (val, Qtranslation_table_for_decode);
3467   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3468   val = Fplist_get (plist, Qtranslation_table_for_encode);
3469   if (SYMBOLP (val))
3470     val = Fget (val, Qtranslation_table_for_encode);
3471   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3472   val = Fplist_get (plist, Qcoding_category);
3473   if (!NILP (val))
3474     {
3475       val = Fget (val, Qcoding_category_index);
3476       if (INTEGERP (val))
3477         coding->category_idx = XINT (val);
3478       else
3479         goto label_invalid_coding_system;
3480     }
3481   else
3482     goto label_invalid_coding_system;
3483
3484   /* If the coding system has non-nil `composition' property, enable
3485      composition handling.  */
3486   val = Fplist_get (plist, Qcomposition);
3487   if (!NILP (val))
3488     coding->composing = COMPOSITION_NO;
3489
3490   switch (XFASTINT (coding_type))
3491     {
3492     case 0:
3493       coding->type = coding_type_emacs_mule;
3494       coding->common_flags
3495         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3496       coding->composing = COMPOSITION_NO;
3497       if (!NILP (coding->post_read_conversion))
3498         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3499       if (!NILP (coding->pre_write_conversion))
3500         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3501       break;
3502
3503     case 1:
3504       coding->type = coding_type_sjis;
3505       coding->common_flags
3506         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3507       break;
3508
3509     case 2:
3510       coding->type = coding_type_iso2022;
3511       coding->common_flags
3512         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3513       {
3514         Lisp_Object val, temp;
3515         Lisp_Object *flags;
3516         int i, charset, reg_bits = 0;
3517
3518         val = XVECTOR (coding_spec)->contents[4];
3519
3520         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3521           goto label_invalid_coding_system;
3522
3523         flags = XVECTOR (val)->contents;
3524         coding->flags
3525           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3526              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3527              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3528              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3529              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3530              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3531              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3532              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3533              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3534              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3535              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3536              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3537              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3538              );
3539
3540         /* Invoke graphic register 0 to plane 0.  */
3541         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3542         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3543         CODING_SPEC_ISO_INVOCATION (coding, 1)
3544           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3545         /* Not single shifting at first.  */
3546         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3547         /* Beginning of buffer should also be regarded as bol. */
3548         CODING_SPEC_ISO_BOL (coding) = 1;
3549
3550         for (charset = 0; charset <= MAX_CHARSET; charset++)
3551           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3552         val = Vcharset_revision_alist;
3553         while (CONSP (val))
3554           {
3555             charset = get_charset_id (Fcar_safe (XCAR (val)));
3556             if (charset >= 0
3557                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3558                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3559               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3560             val = XCDR (val);
3561           }
3562
3563         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3564            FLAGS[REG] can be one of below:
3565                 integer CHARSET: CHARSET occupies register I,
3566                 t: designate nothing to REG initially, but can be used
3567                   by any charsets,
3568                 list of integer, nil, or t: designate the first
3569                   element (if integer) to REG initially, the remaining
3570                   elements (if integer) is designated to REG on request,
3571                   if an element is t, REG can be used by any charsets,
3572                 nil: REG is never used.  */
3573         for (charset = 0; charset <= MAX_CHARSET; charset++)
3574           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3575             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3576         for (i = 0; i < 4; i++)
3577           {
3578             if (INTEGERP (flags[i])
3579                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3580                 || (charset = get_charset_id (flags[i])) >= 0)
3581               {
3582                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3583                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3584               }
3585             else if (EQ (flags[i], Qt))
3586               {
3587                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3588                 reg_bits |= 1 << i;
3589                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3590               }
3591             else if (CONSP (flags[i]))
3592               {
3593                 Lisp_Object tail;
3594                 tail = flags[i];
3595
3596                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3597                 if (INTEGERP (XCAR (tail))
3598                     && (charset = XINT (XCAR (tail)),
3599                         CHARSET_VALID_P (charset))
3600                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3601                   {
3602                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3603                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3604                   }
3605                 else
3606                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3607                 tail = XCDR (tail);
3608                 while (CONSP (tail))
3609                   {
3610                     if (INTEGERP (XCAR (tail))
3611                         && (charset = XINT (XCAR (tail)),
3612                             CHARSET_VALID_P (charset))
3613                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3614                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3615                         = i;
3616                     else if (EQ (XCAR (tail), Qt))
3617                       reg_bits |= 1 << i;
3618                     tail = XCDR (tail);
3619                   }
3620               }
3621             else
3622               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3623
3624             CODING_SPEC_ISO_DESIGNATION (coding, i)
3625               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3626           }
3627
3628         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3629           {
3630             /* REG 1 can be used only by locking shift in 7-bit env.  */
3631             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3632               reg_bits &= ~2;
3633             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3634               /* Without any shifting, only REG 0 and 1 can be used.  */
3635               reg_bits &= 3;
3636           }
3637
3638         if (reg_bits)
3639           for (charset = 0; charset <= MAX_CHARSET; charset++)
3640             {
3641               if (CHARSET_VALID_P (charset)
3642                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3643                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3644                 {
3645                   /* There exist some default graphic registers to be
3646                      used by CHARSET.  */
3647
3648                   /* We had better avoid designating a charset of
3649                      CHARS96 to REG 0 as far as possible.  */
3650                   if (CHARSET_CHARS (charset) == 96)
3651                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3652                       = (reg_bits & 2
3653                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3654                   else
3655                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3656                       = (reg_bits & 1
3657                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3658                 }
3659             }
3660       }
3661       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3662       coding->spec.iso2022.last_invalid_designation_register = -1;
3663       break;
3664
3665     case 3:
3666       coding->type = coding_type_big5;
3667       coding->common_flags
3668         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3669       coding->flags
3670         = (NILP (XVECTOR (coding_spec)->contents[4])
3671            ? CODING_FLAG_BIG5_HKU
3672            : CODING_FLAG_BIG5_ETEN);
3673       break;
3674
3675     case 4:
3676       coding->type = coding_type_ccl;
3677       coding->common_flags
3678         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3679       {
3680         val = XVECTOR (coding_spec)->contents[4];
3681         if (! CONSP (val)
3682             || setup_ccl_program (&(coding->spec.ccl.decoder),
3683                                   XCAR (val)) < 0
3684             || setup_ccl_program (&(coding->spec.ccl.encoder),
3685                                   XCDR (val)) < 0)
3686           goto label_invalid_coding_system;
3687
3688         bzero (coding->spec.ccl.valid_codes, 256);
3689         val = Fplist_get (plist, Qvalid_codes);
3690         if (CONSP (val))
3691           {
3692             Lisp_Object this;
3693
3694             for (; CONSP (val); val = XCDR (val))
3695               {
3696                 this = XCAR (val);
3697                 if (INTEGERP (this)
3698                     && XINT (this) >= 0 && XINT (this) < 256)
3699                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3700                 else if (CONSP (this)
3701                          && INTEGERP (XCAR (this))
3702                          && INTEGERP (XCDR (this)))
3703                   {
3704                     int start = XINT (XCAR (this));
3705                     int end = XINT (XCDR (this));
3706
3707                     if (start >= 0 && start <= end && end < 256)
3708                       while (start <= end)
3709                         coding->spec.ccl.valid_codes[start++] = 1;
3710                   }
3711               }
3712           }
3713       }
3714       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3715       coding->spec.ccl.cr_carryover = 0;
3716       coding->spec.ccl.eight_bit_carryover[0] = 0;
3717       break;
3718
3719     case 5:
3720       coding->type = coding_type_raw_text;
3721       break;
3722
3723     default:
3724       goto label_invalid_coding_system;
3725     }
3726   return 0;
3727
3728  label_invalid_coding_system:
3729   coding->type = coding_type_no_conversion;
3730   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3731   coding->common_flags = 0;
3732   coding->eol_type = CODING_EOL_LF;
3733   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3734   return -1;
3735 }
3736
3737 /* Free memory blocks allocated for storing composition information.  */
3738
3739 void
3740 coding_free_composition_data (coding)
3741      struct coding_system *coding;
3742 {
3743   struct composition_data *cmp_data = coding->cmp_data, *next;
3744
3745   if (!cmp_data)
3746     return;
3747   /* Memory blocks are chained.  At first, rewind to the first, then,
3748      free blocks one by one.  */
3749   while (cmp_data->prev)
3750     cmp_data = cmp_data->prev;
3751   while (cmp_data)
3752     {
3753       next = cmp_data->next;
3754       xfree (cmp_data);
3755       cmp_data = next;
3756     }
3757   coding->cmp_data = NULL;
3758 }
3759
3760 /* Set `char_offset' member of all memory blocks pointed by
3761    coding->cmp_data to POS.  */
3762
3763 void
3764 coding_adjust_composition_offset (coding, pos)
3765      struct coding_system *coding;
3766      int pos;
3767 {
3768   struct composition_data *cmp_data;
3769
3770   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3771     cmp_data->char_offset = pos;
3772 }
3773
3774 /* Setup raw-text or one of its subsidiaries in the structure
3775    coding_system CODING according to the already setup value eol_type
3776    in CODING.  CODING should be setup for some coding system in
3777    advance.  */
3778
3779 void
3780 setup_raw_text_coding_system (coding)
3781      struct coding_system *coding;
3782 {
3783   if (coding->type != coding_type_raw_text)
3784     {
3785       coding->symbol = Qraw_text;
3786       coding->type = coding_type_raw_text;
3787       if (coding->eol_type != CODING_EOL_UNDECIDED)
3788         {
3789           Lisp_Object subsidiaries;
3790           subsidiaries = Fget (Qraw_text, Qeol_type);
3791
3792           if (VECTORP (subsidiaries)
3793               && XVECTOR (subsidiaries)->size == 3)
3794             coding->symbol
3795               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3796         }
3797       setup_coding_system (coding->symbol, coding);
3798     }
3799   return;
3800 }
3801
3802 /* Emacs has a mechanism to automatically detect a coding system if it
3803    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3804    it's impossible to distinguish some coding systems accurately
3805    because they use the same range of codes.  So, at first, coding
3806    systems are categorized into 7, those are:
3807
3808    o coding-category-emacs-mule
3809
3810         The category for a coding system which has the same code range
3811         as Emacs' internal format.  Assigned the coding-system (Lisp
3812         symbol) `emacs-mule' by default.
3813
3814    o coding-category-sjis
3815
3816         The category for a coding system which has the same code range
3817         as SJIS.  Assigned the coding-system (Lisp
3818         symbol) `japanese-shift-jis' by default.
3819
3820    o coding-category-iso-7
3821
3822         The category for a coding system which has the same code range
3823         as ISO2022 of 7-bit environment.  This doesn't use any locking
3824         shift and single shift functions.  This can encode/decode all
3825         charsets.  Assigned the coding-system (Lisp symbol)
3826         `iso-2022-7bit' by default.
3827
3828    o coding-category-iso-7-tight
3829
3830         Same as coding-category-iso-7 except that this can
3831         encode/decode only the specified charsets.
3832
3833    o coding-category-iso-8-1
3834
3835         The category for a coding system which has the same code range
3836         as ISO2022 of 8-bit environment and graphic plane 1 used only
3837         for DIMENSION1 charset.  This doesn't use any locking shift
3838         and single shift functions.  Assigned the coding-system (Lisp
3839         symbol) `iso-latin-1' by default.
3840
3841    o coding-category-iso-8-2
3842
3843         The category for a coding system which has the same code range
3844         as ISO2022 of 8-bit environment and graphic plane 1 used only
3845         for DIMENSION2 charset.  This doesn't use any locking shift
3846         and single shift functions.  Assigned the coding-system (Lisp
3847         symbol) `japanese-iso-8bit' by default.
3848
3849    o coding-category-iso-7-else
3850
3851         The category for a coding system which has the same code range
3852         as ISO2022 of 7-bit environment but uses locking shift or
3853         single shift functions.  Assigned the coding-system (Lisp
3854         symbol) `iso-2022-7bit-lock' by default.
3855
3856    o coding-category-iso-8-else
3857
3858         The category for a coding system which has the same code range
3859         as ISO2022 of 8-bit environment but uses locking shift or
3860         single shift functions.  Assigned the coding-system (Lisp
3861         symbol) `iso-2022-8bit-ss2' by default.
3862
3863    o coding-category-big5
3864
3865         The category for a coding system which has the same code range
3866         as BIG5.  Assigned the coding-system (Lisp symbol)
3867         `cn-big5' by default.
3868
3869    o coding-category-utf-8
3870
3871         The category for a coding system which has the same code range
3872         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3873         symbol) `utf-8' by default.
3874
3875    o coding-category-utf-16-be
3876
3877         The category for a coding system in which a text has an
3878         Unicode signature (cf. Unicode Standard) in the order of BIG
3879         endian at the head.  Assigned the coding-system (Lisp symbol)
3880         `utf-16-be' by default.
3881
3882    o coding-category-utf-16-le
3883
3884         The category for a coding system in which a text has an
3885         Unicode signature (cf. Unicode Standard) in the order of
3886         LITTLE endian at the head.  Assigned the coding-system (Lisp
3887         symbol) `utf-16-le' by default.
3888
3889    o coding-category-ccl
3890
3891         The category for a coding system of which encoder/decoder is
3892         written in CCL programs.  The default value is nil, i.e., no
3893         coding system is assigned.
3894
3895    o coding-category-binary
3896
3897         The category for a coding system not categorized in any of the
3898         above.  Assigned the coding-system (Lisp symbol)
3899         `no-conversion' by default.
3900
3901    Each of them is a Lisp symbol and the value is an actual
3902    `coding-system' (this is also a Lisp symbol) assigned by a user.
3903    What Emacs does actually is to detect a category of coding system.
3904    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3905    decide a single possible category, it selects a category of the
3906    highest priority.  Priorities of categories are also specified by a
3907    user in a Lisp variable `coding-category-list'.
3908
3909 */
3910
3911 static
3912 int ascii_skip_code[256];
3913
3914 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3915    If it detects possible coding systems, return an integer in which
3916    appropriate flag bits are set.  Flag bits are defined by macros
3917    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3918    it should point the table `coding_priorities'.  In that case, only
3919    the flag bit for a coding system of the highest priority is set in
3920    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3921    range 0x80..0x9F are in multibyte form.
3922
3923    How many ASCII characters are at the head is returned as *SKIP.  */
3924
3925 static int
3926 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3927      unsigned char *source;
3928      int src_bytes, *priorities, *skip;
3929      int multibytep;
3930 {
3931   register unsigned char c;
3932   unsigned char *src = source, *src_end = source + src_bytes;
3933   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3934   int i;
3935
3936   /* At first, skip all ASCII characters and control characters except
3937      for three ISO2022 specific control characters.  */
3938   ascii_skip_code[ISO_CODE_SO] = 0;
3939   ascii_skip_code[ISO_CODE_SI] = 0;
3940   ascii_skip_code[ISO_CODE_ESC] = 0;
3941
3942  label_loop_detect_coding:
3943   while (src < src_end && ascii_skip_code[*src]) src++;
3944   *skip = src - source;
3945
3946   if (src >= src_end)
3947     /* We found nothing other than ASCII.  There's nothing to do.  */
3948     return 0;
3949
3950   c = *src;
3951   /* The text seems to be encoded in some multilingual coding system.
3952      Now, try to find in which coding system the text is encoded.  */
3953   if (c < 0x80)
3954     {
3955       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3956       /* C is an ISO2022 specific control code of C0.  */
3957       mask = detect_coding_iso2022 (src, src_end, multibytep);
3958       if (mask == 0)
3959         {
3960           /* No valid ISO2022 code follows C.  Try again.  */
3961           src++;
3962           if (c == ISO_CODE_ESC)
3963             ascii_skip_code[ISO_CODE_ESC] = 1;
3964           else
3965             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3966           goto label_loop_detect_coding;
3967         }
3968       if (priorities)
3969         {
3970           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3971             {
3972               if (mask & priorities[i])
3973                 return priorities[i];
3974             }
3975           return CODING_CATEGORY_MASK_RAW_TEXT;
3976         }
3977     }
3978   else
3979     {
3980       int try;
3981
3982       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3983         c = src[1] - 0x20;
3984
3985       if (c < 0xA0)
3986         {
3987           /* C is the first byte of SJIS character code,
3988              or a leading-code of Emacs' internal format (emacs-mule),
3989              or the first byte of UTF-16.  */
3990           try = (CODING_CATEGORY_MASK_SJIS
3991                   | CODING_CATEGORY_MASK_EMACS_MULE
3992                   | CODING_CATEGORY_MASK_UTF_16_BE
3993                   | CODING_CATEGORY_MASK_UTF_16_LE);
3994
3995           /* Or, if C is a special latin extra code,
3996              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3997              or is an ISO2022 control-sequence-introducer (CSI),
3998              we should also consider the possibility of ISO2022 codings.  */
3999           if ((VECTORP (Vlatin_extra_code_table)
4000                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4001               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4002               || (c == ISO_CODE_CSI
4003                   && (src < src_end
4004                       && (*src == ']'
4005                           || ((*src == '0' || *src == '1' || *src == '2')
4006                               && src + 1 < src_end
4007                               && src[1] == ']')))))
4008             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4009                      | CODING_CATEGORY_MASK_ISO_8BIT);
4010         }
4011       else
4012         /* C is a character of ISO2022 in graphic plane right,
4013            or a SJIS's 1-byte character code (i.e. JISX0201),
4014            or the first byte of BIG5's 2-byte code,
4015            or the first byte of UTF-8/16.  */
4016         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4017                 | CODING_CATEGORY_MASK_ISO_8BIT
4018                 | CODING_CATEGORY_MASK_SJIS
4019                 | CODING_CATEGORY_MASK_BIG5
4020                 | CODING_CATEGORY_MASK_UTF_8
4021                 | CODING_CATEGORY_MASK_UTF_16_BE
4022                 | CODING_CATEGORY_MASK_UTF_16_LE);
4023
4024       /* Or, we may have to consider the possibility of CCL.  */
4025       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4026           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4027               ->spec.ccl.valid_codes)[c])
4028         try |= CODING_CATEGORY_MASK_CCL;
4029
4030       mask = 0;
4031       utf16_examined_p = iso2022_examined_p = 0;
4032       if (priorities)
4033         {
4034           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4035             {
4036               if (!iso2022_examined_p
4037                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4038                 {
4039                   mask |= detect_coding_iso2022 (src, src_end);
4040                   iso2022_examined_p = 1;
4041                 }
4042               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4043                 mask |= detect_coding_sjis (src, src_end, multibytep);
4044               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4045                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4046               else if (!utf16_examined_p
4047                        && (priorities[i] & try &
4048                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4049                 {
4050                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4051                   utf16_examined_p = 1;
4052                 }
4053               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4054                 mask |= detect_coding_big5 (src, src_end, multibytep);
4055               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4056                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4057               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4058                 mask |= detect_coding_ccl (src, src_end, multibytep);
4059               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4060                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4061               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4062                 mask |= CODING_CATEGORY_MASK_BINARY;
4063               if (mask & priorities[i])
4064                 return priorities[i];
4065             }
4066           return CODING_CATEGORY_MASK_RAW_TEXT;
4067         }
4068       if (try & CODING_CATEGORY_MASK_ISO)
4069         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4070       if (try & CODING_CATEGORY_MASK_SJIS)
4071         mask |= detect_coding_sjis (src, src_end, multibytep);
4072       if (try & CODING_CATEGORY_MASK_BIG5)
4073         mask |= detect_coding_big5 (src, src_end, multibytep);
4074       if (try & CODING_CATEGORY_MASK_UTF_8)
4075         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4076       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4077         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4078       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4079         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4080       if (try & CODING_CATEGORY_MASK_CCL)
4081         mask |= detect_coding_ccl (src, src_end, multibytep);
4082     }
4083   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4084 }
4085
4086 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4087    The information of the detected coding system is set in CODING.  */
4088
4089 void
4090 detect_coding (coding, src, src_bytes)
4091      struct coding_system *coding;
4092      unsigned char *src;
4093      int src_bytes;
4094 {
4095   unsigned int idx;
4096   int skip, mask;
4097   Lisp_Object val;
4098
4099   val = Vcoding_category_list;
4100   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4101                              coding->src_multibyte);
4102   coding->heading_ascii = skip;
4103
4104   if (!mask) return;
4105
4106   /* We found a single coding system of the highest priority in MASK.  */
4107   idx = 0;
4108   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4109   if (! mask)
4110     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4111
4112   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
4113
4114   if (coding->eol_type != CODING_EOL_UNDECIDED)
4115     {
4116       Lisp_Object tmp;
4117
4118       tmp = Fget (val, Qeol_type);
4119       if (VECTORP (tmp))
4120         val = XVECTOR (tmp)->contents[coding->eol_type];
4121     }
4122
4123   /* Setup this new coding system while preserving some slots.  */
4124   {
4125     int src_multibyte = coding->src_multibyte;
4126     int dst_multibyte = coding->dst_multibyte;
4127
4128     setup_coding_system (val, coding);
4129     coding->src_multibyte = src_multibyte;
4130     coding->dst_multibyte = dst_multibyte;
4131     coding->heading_ascii = skip;
4132   }
4133 }
4134
4135 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4136    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4137    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4138
4139    How many non-eol characters are at the head is returned as *SKIP.  */
4140
4141 #define MAX_EOL_CHECK_COUNT 3
4142
4143 static int
4144 detect_eol_type (source, src_bytes, skip)
4145      unsigned char *source;
4146      int src_bytes, *skip;
4147 {
4148   unsigned char *src = source, *src_end = src + src_bytes;
4149   unsigned char c;
4150   int total = 0;                /* How many end-of-lines are found so far.  */
4151   int eol_type = CODING_EOL_UNDECIDED;
4152   int this_eol_type;
4153
4154   *skip = 0;
4155
4156   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4157     {
4158       c = *src++;
4159       if (c == '\n' || c == '\r')
4160         {
4161           if (*skip == 0)
4162             *skip = src - 1 - source;
4163           total++;
4164           if (c == '\n')
4165             this_eol_type = CODING_EOL_LF;
4166           else if (src >= src_end || *src != '\n')
4167             this_eol_type = CODING_EOL_CR;
4168           else
4169             this_eol_type = CODING_EOL_CRLF, src++;
4170
4171           if (eol_type == CODING_EOL_UNDECIDED)
4172             /* This is the first end-of-line.  */
4173             eol_type = this_eol_type;
4174           else if (eol_type != this_eol_type)
4175             {
4176               /* The found type is different from what found before.  */
4177               eol_type = CODING_EOL_INCONSISTENT;
4178               break;
4179             }
4180         }
4181     }
4182
4183   if (*skip == 0)
4184     *skip = src_end - source;
4185   return eol_type;
4186 }
4187
4188 /* Like detect_eol_type, but detect EOL type in 2-octet
4189    big-endian/little-endian format for coding systems utf-16-be and
4190    utf-16-le.  */
4191
4192 static int
4193 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4194      unsigned char *source;
4195      int src_bytes, *skip, big_endian_p;
4196 {
4197   unsigned char *src = source, *src_end = src + src_bytes;
4198   unsigned int c1, c2;
4199   int total = 0;                /* How many end-of-lines are found so far.  */
4200   int eol_type = CODING_EOL_UNDECIDED;
4201   int this_eol_type;
4202   int msb, lsb;
4203
4204   if (big_endian_p)
4205     msb = 0, lsb = 1;
4206   else
4207     msb = 1, lsb = 0;
4208
4209   *skip = 0;
4210
4211   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4212     {
4213       c1 = (src[msb] << 8) | (src[lsb]);
4214       src += 2;
4215
4216       if (c1 == '\n' || c1 == '\r')
4217         {
4218           if (*skip == 0)
4219             *skip = src - 2 - source;
4220           total++;
4221           if (c1 == '\n')
4222             {
4223               this_eol_type = CODING_EOL_LF;
4224             }
4225           else
4226             {
4227               if ((src + 1) >= src_end)
4228                 {
4229                   this_eol_type = CODING_EOL_CR;
4230                 }
4231               else
4232                 {
4233                   c2 = (src[msb] << 8) | (src[lsb]);
4234                   if (c2 == '\n')
4235                     this_eol_type = CODING_EOL_CRLF, src += 2;
4236                   else
4237                     this_eol_type = CODING_EOL_CR;
4238                 }
4239             }
4240
4241           if (eol_type == CODING_EOL_UNDECIDED)
4242             /* This is the first end-of-line.  */
4243             eol_type = this_eol_type;
4244           else if (eol_type != this_eol_type)
4245             {
4246               /* The found type is different from what found before.  */
4247               eol_type = CODING_EOL_INCONSISTENT;
4248               break;
4249             }
4250         }
4251     }
4252
4253   if (*skip == 0)
4254     *skip = src_end - source;
4255   return eol_type;
4256 }
4257
4258 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4259    is encoded.  If it detects an appropriate format of end-of-line, it
4260    sets the information in *CODING.  */
4261
4262 void
4263 detect_eol (coding, src, src_bytes)
4264      struct coding_system *coding;
4265      unsigned char *src;
4266      int src_bytes;
4267 {
4268   Lisp_Object val;
4269   int skip;
4270   int eol_type;
4271
4272   switch (coding->category_idx)
4273     {
4274     case CODING_CATEGORY_IDX_UTF_16_BE:
4275       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4276       break;
4277     case CODING_CATEGORY_IDX_UTF_16_LE:
4278       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4279       break;
4280     default:
4281       eol_type = detect_eol_type (src, src_bytes, &skip);
4282       break;
4283     }
4284
4285   if (coding->heading_ascii > skip)
4286     coding->heading_ascii = skip;
4287   else
4288     skip = coding->heading_ascii;
4289
4290   if (eol_type == CODING_EOL_UNDECIDED)
4291     return;
4292   if (eol_type == CODING_EOL_INCONSISTENT)
4293     {
4294 #if 0
4295       /* This code is suppressed until we find a better way to
4296          distinguish raw text file and binary file.  */
4297
4298       /* If we have already detected that the coding is raw-text, the
4299          coding should actually be no-conversion.  */
4300       if (coding->type == coding_type_raw_text)
4301         {
4302           setup_coding_system (Qno_conversion, coding);
4303           return;
4304         }
4305       /* Else, let's decode only text code anyway.  */
4306 #endif /* 0 */
4307       eol_type = CODING_EOL_LF;
4308     }
4309
4310   val = Fget (coding->symbol, Qeol_type);
4311   if (VECTORP (val) && XVECTOR (val)->size == 3)
4312     {
4313       int src_multibyte = coding->src_multibyte;
4314       int dst_multibyte = coding->dst_multibyte;
4315
4316       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4317       coding->src_multibyte = src_multibyte;
4318       coding->dst_multibyte = dst_multibyte;
4319       coding->heading_ascii = skip;
4320     }
4321 }
4322
4323 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4324
4325 #define DECODING_BUFFER_MAG(coding)                     \
4326   (coding->type == coding_type_iso2022                  \
4327    ? 3                                                  \
4328    : (coding->type == coding_type_ccl                   \
4329       ? coding->spec.ccl.decoder.buf_magnification      \
4330       : 2))
4331
4332 /* Return maximum size (bytes) of a buffer enough for decoding
4333    SRC_BYTES of text encoded in CODING.  */
4334
4335 int
4336 decoding_buffer_size (coding, src_bytes)
4337      struct coding_system *coding;
4338      int src_bytes;
4339 {
4340   return (src_bytes * DECODING_BUFFER_MAG (coding)
4341           + CONVERSION_BUFFER_EXTRA_ROOM);
4342 }
4343
4344 /* Return maximum size (bytes) of a buffer enough for encoding
4345    SRC_BYTES of text to CODING.  */
4346
4347 int
4348 encoding_buffer_size (coding, src_bytes)
4349      struct coding_system *coding;
4350      int src_bytes;
4351 {
4352   int magnification;
4353
4354   if (coding->type == coding_type_ccl)
4355     magnification = coding->spec.ccl.encoder.buf_magnification;
4356   else if (CODING_REQUIRE_ENCODING (coding))
4357     magnification = 3;
4358   else
4359     magnification = 1;
4360
4361   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4362 }
4363
4364 /* Working buffer for code conversion.  */
4365 struct conversion_buffer
4366 {
4367   int size;                     /* size of data.  */
4368   int on_stack;                 /* 1 if allocated by alloca.  */
4369   unsigned char *data;
4370 };
4371
4372 /* Don't use alloca for allocating memory space larger than this, lest
4373    we overflow their stack.  */
4374 #define MAX_ALLOCA 16*1024
4375
4376 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4377 #define allocate_conversion_buffer(buf, len)            \
4378   do {                                                  \
4379     if (len < MAX_ALLOCA)                               \
4380       {                                                 \
4381         buf.data = (unsigned char *) alloca (len);      \
4382         buf.on_stack = 1;                               \
4383       }                                                 \
4384     else                                                \
4385       {                                                 \
4386         buf.data = (unsigned char *) xmalloc (len);     \
4387         buf.on_stack = 0;                               \
4388       }                                                 \
4389     buf.size = len;                                     \
4390   } while (0)
4391
4392 /* Double the allocated memory for *BUF.  */
4393 static void
4394 extend_conversion_buffer (buf)
4395      struct conversion_buffer *buf;
4396 {
4397   if (buf->on_stack)
4398     {
4399       unsigned char *save = buf->data;
4400       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4401       bcopy (save, buf->data, buf->size);
4402       buf->on_stack = 0;
4403     }
4404   else
4405     {
4406       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4407     }
4408   buf->size *= 2;
4409 }
4410
4411 /* Free the allocated memory for BUF if it is not on stack.  */
4412 static void
4413 free_conversion_buffer (buf)
4414      struct conversion_buffer *buf;
4415 {
4416   if (!buf->on_stack)
4417     xfree (buf->data);
4418 }
4419
4420 int
4421 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4422      struct coding_system *coding;
4423      unsigned char *source, *destination;
4424      int src_bytes, dst_bytes, encodep;
4425 {
4426   struct ccl_program *ccl
4427     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4428   unsigned char *dst = destination;
4429
4430   ccl->suppress_error = coding->suppress_error;
4431   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4432   if (encodep)
4433     {
4434       /* On encoding, EOL format is converted within ccl_driver.  For
4435          that, setup proper information in the structure CCL.  */
4436       ccl->eol_type = coding->eol_type;
4437       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4438         ccl->eol_type = CODING_EOL_LF;
4439       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4440     }
4441   ccl->multibyte = coding->src_multibyte;
4442   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4443     {
4444       /* Move carryover bytes to DESTINATION.  */
4445       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4446       while (*p)
4447         *dst++ = *p++;
4448       coding->spec.ccl.eight_bit_carryover[0] = 0;
4449       if (dst_bytes)
4450         dst_bytes -= dst - destination;
4451     }
4452
4453   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4454                                   &(coding->consumed))
4455                       + dst - destination);
4456
4457   if (encodep)
4458     {
4459       coding->produced_char = coding->produced;
4460       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4461     }
4462   else if (!ccl->eight_bit_control)
4463     {
4464       /* The produced bytes forms a valid multibyte sequence. */
4465       coding->produced_char
4466         = multibyte_chars_in_text (destination, coding->produced);
4467       coding->spec.ccl.eight_bit_carryover[0] = 0;
4468     }
4469   else
4470     {
4471       /* On decoding, the destination should always multibyte.  But,
4472          CCL program might have been generated an invalid multibyte
4473          sequence.  Here we make such a sequence valid as
4474          multibyte.  */
4475       int bytes
4476         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4477
4478       if ((coding->consumed < src_bytes
4479            || !ccl->last_block)
4480           && coding->produced >= 1
4481           && destination[coding->produced - 1] >= 0x80)
4482         {
4483           /* We should not convert the tailing 8-bit codes to
4484              multibyte form even if they doesn't form a valid
4485              multibyte sequence.  They may form a valid sequence in
4486              the next call.  */
4487           int carryover = 0;
4488
4489           if (destination[coding->produced - 1] < 0xA0)
4490             carryover = 1;
4491           else if (coding->produced >= 2)
4492             {
4493               if (destination[coding->produced - 2] >= 0x80)
4494                 {
4495                   if (destination[coding->produced - 2] < 0xA0)
4496                     carryover = 2;
4497                   else if (coding->produced >= 3
4498                            && destination[coding->produced - 3] >= 0x80
4499                            && destination[coding->produced - 3] < 0xA0)
4500                     carryover = 3;
4501                 }
4502             }
4503           if (carryover > 0)
4504             {
4505               BCOPY_SHORT (destination + coding->produced - carryover,
4506                            coding->spec.ccl.eight_bit_carryover,
4507                            carryover);
4508               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4509               coding->produced -= carryover;
4510             }
4511         }
4512       coding->produced = str_as_multibyte (destination, bytes,
4513                                            coding->produced,
4514                                            &(coding->produced_char));
4515     }
4516
4517   switch (ccl->status)
4518     {
4519     case CCL_STAT_SUSPEND_BY_SRC:
4520       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4521       break;
4522     case CCL_STAT_SUSPEND_BY_DST:
4523       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4524       break;
4525     case CCL_STAT_QUIT:
4526     case CCL_STAT_INVALID_CMD:
4527       coding->result = CODING_FINISH_INTERRUPT;
4528       break;
4529     default:
4530       coding->result = CODING_FINISH_NORMAL;
4531       break;
4532     }
4533   return coding->result;
4534 }
4535
4536 /* Decode EOL format of the text at PTR of BYTES length destructively
4537    according to CODING->eol_type.  This is called after the CCL
4538    program produced a decoded text at PTR.  If we do CRLF->LF
4539    conversion, update CODING->produced and CODING->produced_char.  */
4540
4541 static void
4542 decode_eol_post_ccl (coding, ptr, bytes)
4543      struct coding_system *coding;
4544      unsigned char *ptr;
4545      int bytes;
4546 {
4547   Lisp_Object val, saved_coding_symbol;
4548   unsigned char *pend = ptr + bytes;
4549   int dummy;
4550
4551   /* Remember the current coding system symbol.  We set it back when
4552      an inconsistent EOL is found so that `last-coding-system-used' is
4553      set to the coding system that doesn't specify EOL conversion.  */
4554   saved_coding_symbol = coding->symbol;
4555
4556   coding->spec.ccl.cr_carryover = 0;
4557   if (coding->eol_type == CODING_EOL_UNDECIDED)
4558     {
4559       /* Here, to avoid the call of setup_coding_system, we directly
4560          call detect_eol_type.  */
4561       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4562       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4563         coding->eol_type = CODING_EOL_LF;
4564       if (coding->eol_type != CODING_EOL_UNDECIDED)
4565         {
4566           val = Fget (coding->symbol, Qeol_type);
4567           if (VECTORP (val) && XVECTOR (val)->size == 3)
4568             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4569         }
4570       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4571     }
4572
4573   if (coding->eol_type == CODING_EOL_LF
4574       || coding->eol_type == CODING_EOL_UNDECIDED)
4575     {
4576       /* We have nothing to do.  */
4577       ptr = pend;
4578     }
4579   else if (coding->eol_type == CODING_EOL_CRLF)
4580     {
4581       unsigned char *pstart = ptr, *p = ptr;
4582
4583       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4584           && *(pend - 1) == '\r')
4585         {
4586           /* If the last character is CR, we can't handle it here
4587              because LF will be in the not-yet-decoded source text.
4588              Recorded that the CR is not yet processed.  */
4589           coding->spec.ccl.cr_carryover = 1;
4590           coding->produced--;
4591           coding->produced_char--;
4592           pend--;
4593         }
4594       while (ptr < pend)
4595         {
4596           if (*ptr == '\r')
4597             {
4598               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4599                 {
4600                   *p++ = '\n';
4601                   ptr += 2;
4602                 }
4603               else
4604                 {
4605                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4606                     goto undo_eol_conversion;
4607                   *p++ = *ptr++;
4608                 }
4609             }
4610           else if (*ptr == '\n'
4611                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4612             goto undo_eol_conversion;
4613           else
4614             *p++ = *ptr++;
4615           continue;
4616
4617         undo_eol_conversion:
4618           /* We have faced with inconsistent EOL format at PTR.
4619              Convert all LFs before PTR back to CRLFs.  */
4620           for (p--, ptr--; p >= pstart; p--)
4621             {
4622               if (*p == '\n')
4623                 *ptr-- = '\n', *ptr-- = '\r';
4624               else
4625                 *ptr-- = *p;
4626             }
4627           /*  If carryover is recorded, cancel it because we don't
4628               convert CRLF anymore.  */
4629           if (coding->spec.ccl.cr_carryover)
4630             {
4631               coding->spec.ccl.cr_carryover = 0;
4632               coding->produced++;
4633               coding->produced_char++;
4634               pend++;
4635             }
4636           p = ptr = pend;
4637           coding->eol_type = CODING_EOL_LF;
4638           coding->symbol = saved_coding_symbol;
4639         }
4640       if (p < pend)
4641         {
4642           /* As each two-byte sequence CRLF was converted to LF, (PEND
4643              - P) is the number of deleted characters.  */
4644           coding->produced -= pend - p;
4645           coding->produced_char -= pend - p;
4646         }
4647     }
4648   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4649     {
4650       unsigned char *p = ptr;
4651
4652       for (; ptr < pend; ptr++)
4653         {
4654           if (*ptr == '\r')
4655             *ptr = '\n';
4656           else if (*ptr == '\n'
4657                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4658             {
4659               for (; p < ptr; p++)
4660                 {
4661                   if (*p == '\n')
4662                     *p = '\r';
4663                 }
4664               ptr = pend;
4665               coding->eol_type = CODING_EOL_LF;
4666               coding->symbol = saved_coding_symbol;
4667             }
4668         }
4669     }
4670 }
4671
4672 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4673    decoding, it may detect coding system and format of end-of-line if
4674    those are not yet decided.  The source should be unibyte, the
4675    result is multibyte if CODING->dst_multibyte is nonzero, else
4676    unibyte.  */
4677
4678 int
4679 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4680      struct coding_system *coding;
4681      unsigned char *source, *destination;
4682      int src_bytes, dst_bytes;
4683 {
4684   if (coding->type == coding_type_undecided)
4685     detect_coding (coding, source, src_bytes);
4686
4687   if (coding->eol_type == CODING_EOL_UNDECIDED
4688       && coding->type != coding_type_ccl)
4689     {
4690       detect_eol (coding, source, src_bytes);
4691       /* We had better recover the original eol format if we
4692          encounter an inconsistent eol format while decoding.  */
4693       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4694     }
4695
4696   coding->produced = coding->produced_char = 0;
4697   coding->consumed = coding->consumed_char = 0;
4698   coding->errors = 0;
4699   coding->result = CODING_FINISH_NORMAL;
4700
4701   switch (coding->type)
4702     {
4703     case coding_type_sjis:
4704       decode_coding_sjis_big5 (coding, source, destination,
4705                                src_bytes, dst_bytes, 1);
4706       break;
4707
4708     case coding_type_iso2022:
4709       decode_coding_iso2022 (coding, source, destination,
4710                              src_bytes, dst_bytes);
4711       break;
4712
4713     case coding_type_big5:
4714       decode_coding_sjis_big5 (coding, source, destination,
4715                                src_bytes, dst_bytes, 0);
4716       break;
4717
4718     case coding_type_emacs_mule:
4719       decode_coding_emacs_mule (coding, source, destination,
4720                                 src_bytes, dst_bytes);
4721       break;
4722
4723     case coding_type_ccl:
4724       if (coding->spec.ccl.cr_carryover)
4725         {
4726           /* Set the CR which is not processed by the previous call of
4727              decode_eol_post_ccl in DESTINATION.  */
4728           *destination = '\r';
4729           coding->produced++;
4730           coding->produced_char++;
4731           dst_bytes--;
4732         }
4733       ccl_coding_driver (coding, source,
4734                          destination + coding->spec.ccl.cr_carryover,
4735                          src_bytes, dst_bytes, 0);
4736       if (coding->eol_type != CODING_EOL_LF)
4737         decode_eol_post_ccl (coding, destination, coding->produced);
4738       break;
4739
4740     default:
4741       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4742     }
4743
4744   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4745       && coding->mode & CODING_MODE_LAST_BLOCK
4746       && coding->consumed == src_bytes)
4747     coding->result = CODING_FINISH_NORMAL;
4748
4749   if (coding->mode & CODING_MODE_LAST_BLOCK
4750       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4751     {
4752       unsigned char *src = source + coding->consumed;
4753       unsigned char *dst = destination + coding->produced;
4754
4755       src_bytes -= coding->consumed;
4756       coding->errors++;
4757       if (COMPOSING_P (coding))
4758         DECODE_COMPOSITION_END ('1');
4759       while (src_bytes--)
4760         {
4761           int c = *src++;
4762           dst += CHAR_STRING (c, dst);
4763           coding->produced_char++;
4764         }
4765       coding->consumed = coding->consumed_char = src - source;
4766       coding->produced = dst - destination;
4767       coding->result = CODING_FINISH_NORMAL;
4768     }
4769
4770   if (!coding->dst_multibyte)
4771     {
4772       coding->produced = str_as_unibyte (destination, coding->produced);
4773       coding->produced_char = coding->produced;
4774     }
4775
4776   return coding->result;
4777 }
4778
4779 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4780    multibyteness of the source is CODING->src_multibyte, the
4781    multibyteness of the result is always unibyte.  */
4782
4783 int
4784 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4785      struct coding_system *coding;
4786      unsigned char *source, *destination;
4787      int src_bytes, dst_bytes;
4788 {
4789   coding->produced = coding->produced_char = 0;
4790   coding->consumed = coding->consumed_char = 0;
4791   coding->errors = 0;
4792   coding->result = CODING_FINISH_NORMAL;
4793
4794   switch (coding->type)
4795     {
4796     case coding_type_sjis:
4797       encode_coding_sjis_big5 (coding, source, destination,
4798                                src_bytes, dst_bytes, 1);
4799       break;
4800
4801     case coding_type_iso2022:
4802       encode_coding_iso2022 (coding, source, destination,
4803                              src_bytes, dst_bytes);
4804       break;
4805
4806     case coding_type_big5:
4807       encode_coding_sjis_big5 (coding, source, destination,
4808                                src_bytes, dst_bytes, 0);
4809       break;
4810
4811     case coding_type_emacs_mule:
4812       encode_coding_emacs_mule (coding, source, destination,
4813                                 src_bytes, dst_bytes);
4814       break;
4815
4816     case coding_type_ccl:
4817       ccl_coding_driver (coding, source, destination,
4818                          src_bytes, dst_bytes, 1);
4819       break;
4820
4821     default:
4822       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4823     }
4824
4825   if (coding->mode & CODING_MODE_LAST_BLOCK
4826       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4827     {
4828       unsigned char *src = source + coding->consumed;
4829       unsigned char *src_end = src + src_bytes;
4830       unsigned char *dst = destination + coding->produced;
4831
4832       if (coding->type == coding_type_iso2022)
4833         ENCODE_RESET_PLANE_AND_REGISTER;
4834       if (COMPOSING_P (coding))
4835         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4836       if (coding->consumed < src_bytes)
4837         {
4838           int len = src_bytes - coding->consumed;
4839
4840           BCOPY_SHORT (source + coding->consumed, dst, len);
4841           if (coding->src_multibyte)
4842             len = str_as_unibyte (dst, len);
4843           dst += len;
4844           coding->consumed = src_bytes;
4845         }
4846       coding->produced = coding->produced_char = dst - destination;
4847       coding->result = CODING_FINISH_NORMAL;
4848     }
4849
4850   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4851       && coding->consumed == src_bytes)
4852     coding->result = CODING_FINISH_NORMAL;
4853
4854   return coding->result;
4855 }
4856
4857 /* Scan text in the region between *BEG and *END (byte positions),
4858    skip characters which we don't have to decode by coding system
4859    CODING at the head and tail, then set *BEG and *END to the region
4860    of the text we actually have to convert.  The caller should move
4861    the gap out of the region in advance if the region is from a
4862    buffer.
4863
4864    If STR is not NULL, *BEG and *END are indices into STR.  */
4865
4866 static void
4867 shrink_decoding_region (beg, end, coding, str)
4868      int *beg, *end;
4869      struct coding_system *coding;
4870      unsigned char *str;
4871 {
4872   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4873   int eol_conversion;
4874   Lisp_Object translation_table;
4875
4876   if (coding->type == coding_type_ccl
4877       || coding->type == coding_type_undecided
4878       || coding->eol_type != CODING_EOL_LF
4879       || !NILP (coding->post_read_conversion)
4880       || coding->composing != COMPOSITION_DISABLED)
4881     {
4882       /* We can't skip any data.  */
4883       return;
4884     }
4885   if (coding->type == coding_type_no_conversion
4886       || coding->type == coding_type_raw_text
4887       || coding->type == coding_type_emacs_mule)
4888     {
4889       /* We need no conversion, but don't have to skip any data here.
4890          Decoding routine handles them effectively anyway.  */
4891       return;
4892     }
4893
4894   translation_table = coding->translation_table_for_decode;
4895   if (NILP (translation_table) && !NILP (Venable_character_translation))
4896     translation_table = Vstandard_translation_table_for_decode;
4897   if (CHAR_TABLE_P (translation_table))
4898     {
4899       int i;
4900       for (i = 0; i < 128; i++)
4901         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4902           break;
4903       if (i < 128)
4904         /* Some ASCII character should be translated.  We give up
4905            shrinking.  */
4906         return;
4907     }
4908
4909   if (coding->heading_ascii >= 0)
4910     /* Detection routine has already found how much we can skip at the
4911        head.  */
4912     *beg += coding->heading_ascii;
4913
4914   if (str)
4915     {
4916       begp_orig = begp = str + *beg;
4917       endp_orig = endp = str + *end;
4918     }
4919   else
4920     {
4921       begp_orig = begp = BYTE_POS_ADDR (*beg);
4922       endp_orig = endp = begp + *end - *beg;
4923     }
4924
4925   eol_conversion = (coding->eol_type == CODING_EOL_CR
4926                     || coding->eol_type == CODING_EOL_CRLF);
4927
4928   switch (coding->type)
4929     {
4930     case coding_type_sjis:
4931     case coding_type_big5:
4932       /* We can skip all ASCII characters at the head.  */
4933       if (coding->heading_ascii < 0)
4934         {
4935           if (eol_conversion)
4936             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4937           else
4938             while (begp < endp && *begp < 0x80) begp++;
4939         }
4940       /* We can skip all ASCII characters at the tail except for the
4941          second byte of SJIS or BIG5 code.  */
4942       if (eol_conversion)
4943         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4944       else
4945         while (begp < endp && endp[-1] < 0x80) endp--;
4946       /* Do not consider LF as ascii if preceded by CR, since that
4947          confuses eol decoding. */
4948       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4949         endp++;
4950       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4951         endp++;
4952       break;
4953
4954     case coding_type_iso2022:
4955       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4956         /* We can't skip any data.  */
4957         break;
4958       if (coding->heading_ascii < 0)
4959         {
4960           /* We can skip all ASCII characters at the head except for a
4961              few control codes.  */
4962           while (begp < endp && (c = *begp) < 0x80
4963                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4964                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4965                  && (!eol_conversion || c != ISO_CODE_LF))
4966             begp++;
4967         }
4968       switch (coding->category_idx)
4969         {
4970         case CODING_CATEGORY_IDX_ISO_8_1:
4971         case CODING_CATEGORY_IDX_ISO_8_2:
4972           /* We can skip all ASCII characters at the tail.  */
4973           if (eol_conversion)
4974             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4975           else
4976             while (begp < endp && endp[-1] < 0x80) endp--;
4977           /* Do not consider LF as ascii if preceded by CR, since that
4978              confuses eol decoding. */
4979           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4980             endp++;
4981           break;
4982
4983         case CODING_CATEGORY_IDX_ISO_7:
4984         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4985           {
4986             /* We can skip all characters at the tail except for 8-bit
4987                codes and ESC and the following 2-byte at the tail.  */
4988             unsigned char *eight_bit = NULL;
4989
4990             if (eol_conversion)
4991               while (begp < endp
4992                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4993                 {
4994                   if (!eight_bit && c & 0x80) eight_bit = endp;
4995                   endp--;
4996                 }
4997             else
4998               while (begp < endp
4999                      && (c = endp[-1]) != ISO_CODE_ESC)
5000                 {
5001                   if (!eight_bit && c & 0x80) eight_bit = endp;
5002                   endp--;
5003                 }
5004             /* Do not consider LF as ascii if preceded by CR, since that
5005                confuses eol decoding. */
5006             if (begp < endp && endp < endp_orig
5007                 && endp[-1] == '\r' && endp[0] == '\n')
5008               endp++;
5009             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5010               {
5011                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5012                   /* This is an ASCII designation sequence.  We can
5013                      surely skip the tail.  But, if we have
5014                      encountered an 8-bit code, skip only the codes
5015                      after that.  */
5016                   endp = eight_bit ? eight_bit : endp + 2;
5017                 else
5018                   /* Hmmm, we can't skip the tail.  */
5019                   endp = endp_orig;
5020               }
5021             else if (eight_bit)
5022               endp = eight_bit;
5023           }
5024         }
5025       break;
5026
5027     default:
5028       abort ();
5029     }
5030   *beg += begp - begp_orig;
5031   *end += endp - endp_orig;
5032   return;
5033 }
5034
5035 /* Like shrink_decoding_region but for encoding.  */
5036
5037 static void
5038 shrink_encoding_region (beg, end, coding, str)
5039      int *beg, *end;
5040      struct coding_system *coding;
5041      unsigned char *str;
5042 {
5043   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5044   int eol_conversion;
5045   Lisp_Object translation_table;
5046
5047   if (coding->type == coding_type_ccl
5048       || coding->eol_type == CODING_EOL_CRLF
5049       || coding->eol_type == CODING_EOL_CR
5050       || coding->cmp_data && coding->cmp_data->used > 0)
5051     {
5052       /* We can't skip any data.  */
5053       return;
5054     }
5055   if (coding->type == coding_type_no_conversion
5056       || coding->type == coding_type_raw_text
5057       || coding->type == coding_type_emacs_mule
5058       || coding->type == coding_type_undecided)
5059     {
5060       /* We need no conversion, but don't have to skip any data here.
5061          Encoding routine handles them effectively anyway.  */
5062       return;
5063     }
5064
5065   translation_table = coding->translation_table_for_encode;
5066   if (NILP (translation_table) && !NILP (Venable_character_translation))
5067     translation_table = Vstandard_translation_table_for_encode;
5068   if (CHAR_TABLE_P (translation_table))
5069     {
5070       int i;
5071       for (i = 0; i < 128; i++)
5072         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5073           break;
5074       if (i < 128)
5075         /* Some ASCII character should be translated.  We give up
5076            shrinking.  */
5077         return;
5078     }
5079
5080   if (str)
5081     {
5082       begp_orig = begp = str + *beg;
5083       endp_orig = endp = str + *end;
5084     }
5085   else
5086     {
5087       begp_orig = begp = BYTE_POS_ADDR (*beg);
5088       endp_orig = endp = begp + *end - *beg;
5089     }
5090
5091   eol_conversion = (coding->eol_type == CODING_EOL_CR
5092                     || coding->eol_type == CODING_EOL_CRLF);
5093
5094   /* Here, we don't have to check coding->pre_write_conversion because
5095      the caller is expected to have handled it already.  */
5096   switch (coding->type)
5097     {
5098     case coding_type_iso2022:
5099       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5100         /* We can't skip any data.  */
5101         break;
5102       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5103         {
5104           unsigned char *bol = begp;
5105           while (begp < endp && *begp < 0x80)
5106             {
5107               begp++;
5108               if (begp[-1] == '\n')
5109                 bol = begp;
5110             }
5111           begp = bol;
5112           goto label_skip_tail;
5113         }
5114       /* fall down ... */
5115
5116     case coding_type_sjis:
5117     case coding_type_big5:
5118       /* We can skip all ASCII characters at the head and tail.  */
5119       if (eol_conversion)
5120         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5121       else
5122         while (begp < endp && *begp < 0x80) begp++;
5123     label_skip_tail:
5124       if (eol_conversion)
5125         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5126       else
5127         while (begp < endp && *(endp - 1) < 0x80) endp--;
5128       break;
5129
5130     default:
5131       abort ();
5132     }
5133
5134   *beg += begp - begp_orig;
5135   *end += endp - endp_orig;
5136   return;
5137 }
5138
5139 /* As shrinking conversion region requires some overhead, we don't try
5140    shrinking if the length of conversion region is less than this
5141    value.  */
5142 static int shrink_conversion_region_threshhold = 1024;
5143
5144 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5145   do {                                                                  \
5146     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5147       {                                                                 \
5148         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5149         else shrink_decoding_region (beg, end, coding, str);            \
5150       }                                                                 \
5151   } while (0)
5152
5153 static Lisp_Object
5154 code_convert_region_unwind (dummy)
5155      Lisp_Object dummy;
5156 {
5157   inhibit_pre_post_conversion = 0;
5158   return Qnil;
5159 }
5160
5161 /* Store information about all compositions in the range FROM and TO
5162    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5163    buffer or a string, defaults to the current buffer.  */
5164
5165 void
5166 coding_save_composition (coding, from, to, obj)
5167      struct coding_system *coding;
5168      int from, to;
5169      Lisp_Object obj;
5170 {
5171   Lisp_Object prop;
5172   int start, end;
5173
5174   if (coding->composing == COMPOSITION_DISABLED)
5175     return;
5176   if (!coding->cmp_data)
5177     coding_allocate_composition_data (coding, from);
5178   if (!find_composition (from, to, &start, &end, &prop, obj)
5179       || end > to)
5180     return;
5181   if (start < from
5182       && (!find_composition (end, to, &start, &end, &prop, obj)
5183           || end > to))
5184     return;
5185   coding->composing = COMPOSITION_NO;
5186   do
5187     {
5188       if (COMPOSITION_VALID_P (start, end, prop))
5189         {
5190           enum composition_method method = COMPOSITION_METHOD (prop);
5191           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5192               >= COMPOSITION_DATA_SIZE)
5193             coding_allocate_composition_data (coding, from);
5194           /* For relative composition, we remember start and end
5195              positions, for the other compositions, we also remember
5196              components.  */
5197           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5198           if (method != COMPOSITION_RELATIVE)
5199             {
5200               /* We must store a*/
5201               Lisp_Object val, ch;
5202
5203               val = COMPOSITION_COMPONENTS (prop);
5204               if (CONSP (val))
5205                 while (CONSP (val))
5206                   {
5207                     ch = XCAR (val), val = XCDR (val);
5208                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5209                   }
5210               else if (VECTORP (val) || STRINGP (val))
5211                 {
5212                   int len = (VECTORP (val)
5213                              ? XVECTOR (val)->size : XSTRING (val)->size);
5214                   int i;
5215                   for (i = 0; i < len; i++)
5216                     {
5217                       ch = (STRINGP (val)
5218                             ? Faref (val, make_number (i))
5219                             : XVECTOR (val)->contents[i]);
5220                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5221                     }
5222                 }
5223               else              /* INTEGERP (val) */
5224                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5225             }
5226           CODING_ADD_COMPOSITION_END (coding, end - from);
5227         }
5228       start = end;
5229     }
5230   while (start < to
5231          && find_composition (start, to, &start, &end, &prop, obj)
5232          && end <= to);
5233
5234   /* Make coding->cmp_data point to the first memory block.  */
5235   while (coding->cmp_data->prev)
5236     coding->cmp_data = coding->cmp_data->prev;
5237   coding->cmp_data_start = 0;
5238 }
5239
5240 /* Reflect the saved information about compositions to OBJ.
5241    CODING->cmp_data points to a memory block for the information.  OBJ
5242    is a buffer or a string, defaults to the current buffer.  */
5243
5244 void
5245 coding_restore_composition (coding, obj)
5246      struct coding_system *coding;
5247      Lisp_Object obj;
5248 {
5249   struct composition_data *cmp_data = coding->cmp_data;
5250
5251   if (!cmp_data)
5252     return;
5253
5254   while (cmp_data->prev)
5255     cmp_data = cmp_data->prev;
5256
5257   while (cmp_data)
5258     {
5259       int i;
5260
5261       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5262            i += cmp_data->data[i])
5263         {
5264           int *data = cmp_data->data + i;
5265           enum composition_method method = (enum composition_method) data[3];
5266           Lisp_Object components;
5267
5268           if (method == COMPOSITION_RELATIVE)
5269             components = Qnil;
5270           else
5271             {
5272               int len = data[0] - 4, j;
5273               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5274
5275               for (j = 0; j < len; j++)
5276                 args[j] = make_number (data[4 + j]);
5277               components = (method == COMPOSITION_WITH_ALTCHARS
5278                             ? Fstring (len, args) : Fvector (len, args));
5279             }
5280           compose_text (data[1], data[2], components, Qnil, obj);
5281         }
5282       cmp_data = cmp_data->next;
5283     }
5284 }
5285
5286 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5287    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5288    coding system CODING, and return the status code of code conversion
5289    (currently, this value has no meaning).
5290
5291    How many characters (and bytes) are converted to how many
5292    characters (and bytes) are recorded in members of the structure
5293    CODING.
5294
5295    If REPLACE is nonzero, we do various things as if the original text
5296    is deleted and a new text is inserted.  See the comments in
5297    replace_range (insdel.c) to know what we are doing.
5298
5299    If REPLACE is zero, it is assumed that the source text is unibyte.
5300    Otherwise, it is assumed that the source text is multibyte.  */
5301
5302 int
5303 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5304      int from, from_byte, to, to_byte, encodep, replace;
5305      struct coding_system *coding;
5306 {
5307   int len = to - from, len_byte = to_byte - from_byte;
5308   int require, inserted, inserted_byte;
5309   int head_skip, tail_skip, total_skip = 0;
5310   Lisp_Object saved_coding_symbol;
5311   int first = 1;
5312   unsigned char *src, *dst;
5313   Lisp_Object deletion;
5314   int orig_point = PT, orig_len = len;
5315   int prev_Z;
5316   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5317
5318   deletion = Qnil;
5319   saved_coding_symbol = coding->symbol;
5320
5321   if (from < PT && PT < to)
5322     {
5323       TEMP_SET_PT_BOTH (from, from_byte);
5324       orig_point = from;
5325     }
5326
5327   if (replace)
5328     {
5329       int saved_from = from;
5330       int saved_inhibit_modification_hooks;
5331
5332       prepare_to_modify_buffer (from, to, &from);
5333       if (saved_from != from)
5334         {
5335           to = from + len;
5336           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5337           len_byte = to_byte - from_byte;
5338         }
5339
5340       /* The code conversion routine can not preserve text properties
5341          for now.  So, we must remove all text properties in the
5342          region.  Here, we must suppress all modification hooks.  */
5343       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5344       inhibit_modification_hooks = 1;
5345       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5346       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5347     }
5348
5349   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5350     {
5351       /* We must detect encoding of text and eol format.  */
5352
5353       if (from < GPT && to > GPT)
5354         move_gap_both (from, from_byte);
5355       if (coding->type == coding_type_undecided)
5356         {
5357           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5358           if (coding->type == coding_type_undecided)
5359             {
5360               /* It seems that the text contains only ASCII, but we
5361                  should not leave it undecided because the deeper
5362                  decoding routine (decode_coding) tries to detect the
5363                  encodings again in vain.  */
5364               coding->type = coding_type_emacs_mule;
5365               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5366               /* As emacs-mule decoder will handle composition, we
5367                  need this setting to allocate coding->cmp_data
5368                  later.  */
5369               coding->composing = COMPOSITION_NO;
5370             }
5371         }
5372       if (coding->eol_type == CODING_EOL_UNDECIDED
5373           && coding->type != coding_type_ccl)
5374         {
5375           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5376           if (coding->eol_type == CODING_EOL_UNDECIDED)
5377             coding->eol_type = CODING_EOL_LF;
5378           /* We had better recover the original eol format if we
5379              encounter an inconsistent eol format while decoding.  */
5380           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5381         }
5382     }
5383
5384   /* Now we convert the text.  */
5385
5386   /* For encoding, we must process pre-write-conversion in advance.  */
5387   if (! inhibit_pre_post_conversion
5388       && encodep
5389       && SYMBOLP (coding->pre_write_conversion)
5390       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5391     {
5392       /* The function in pre-write-conversion may put a new text in a
5393          new buffer.  */
5394       struct buffer *prev = current_buffer;
5395       Lisp_Object new;
5396       int count = specpdl_ptr - specpdl;
5397
5398       record_unwind_protect (code_convert_region_unwind, Qnil);
5399       /* We should not call any more pre-write/post-read-conversion
5400          functions while this pre-write-conversion is running.  */
5401       inhibit_pre_post_conversion = 1;
5402       call2 (coding->pre_write_conversion,
5403              make_number (from), make_number (to));
5404       inhibit_pre_post_conversion = 0;
5405       /* Discard the unwind protect.  */
5406       specpdl_ptr--;
5407
5408       if (current_buffer != prev)
5409         {
5410           len = ZV - BEGV;
5411           new = Fcurrent_buffer ();
5412           set_buffer_internal_1 (prev);
5413           del_range_2 (from, from_byte, to, to_byte, 0);
5414           TEMP_SET_PT_BOTH (from, from_byte);
5415           insert_from_buffer (XBUFFER (new), 1, len, 0);
5416           Fkill_buffer (new);
5417           if (orig_point >= to)
5418             orig_point += len - orig_len;
5419           else if (orig_point > from)
5420             orig_point = from;
5421           orig_len = len;
5422           to = from + len;
5423           from_byte = CHAR_TO_BYTE (from);
5424           to_byte = CHAR_TO_BYTE (to);
5425           len_byte = to_byte - from_byte;
5426           TEMP_SET_PT_BOTH (from, from_byte);
5427         }
5428     }
5429
5430   if (replace)
5431     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5432
5433   if (coding->composing != COMPOSITION_DISABLED)
5434     {
5435       if (encodep)
5436         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5437       else
5438         coding_allocate_composition_data (coding, from);
5439     }
5440
5441   /* Try to skip the heading and tailing ASCIIs.  */
5442   if (coding->type != coding_type_ccl)
5443     {
5444       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5445
5446       if (from < GPT && GPT < to)
5447         move_gap_both (from, from_byte);
5448       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5449       if (from_byte == to_byte
5450           && (encodep || NILP (coding->post_read_conversion))
5451           && ! CODING_REQUIRE_FLUSHING (coding))
5452         {
5453           coding->produced = len_byte;
5454           coding->produced_char = len;
5455           if (!replace)
5456             /* We must record and adjust for this new text now.  */
5457             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5458           return 0;
5459         }
5460
5461       head_skip = from_byte - from_byte_orig;
5462       tail_skip = to_byte_orig - to_byte;
5463       total_skip = head_skip + tail_skip;
5464       from += head_skip;
5465       to -= tail_skip;
5466       len -= total_skip; len_byte -= total_skip;
5467     }
5468
5469   /* For conversion, we must put the gap before the text in addition to
5470      making the gap larger for efficient decoding.  The required gap
5471      size starts from 2000 which is the magic number used in make_gap.
5472      But, after one batch of conversion, it will be incremented if we
5473      find that it is not enough .  */
5474   require = 2000;
5475
5476   if (GAP_SIZE  < require)
5477     make_gap (require - GAP_SIZE);
5478   move_gap_both (from, from_byte);
5479
5480   inserted = inserted_byte = 0;
5481
5482   GAP_SIZE += len_byte;
5483   ZV -= len;
5484   Z -= len;
5485   ZV_BYTE -= len_byte;
5486   Z_BYTE -= len_byte;
5487
5488   if (GPT - BEG < BEG_UNCHANGED)
5489     BEG_UNCHANGED = GPT - BEG;
5490   if (Z - GPT < END_UNCHANGED)
5491     END_UNCHANGED = Z - GPT;
5492
5493   if (!encodep && coding->src_multibyte)
5494     {
5495       /* Decoding routines expects that the source text is unibyte.
5496          We must convert 8-bit characters of multibyte form to
5497          unibyte.  */
5498       int len_byte_orig = len_byte;
5499       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5500       if (len_byte < len_byte_orig)
5501         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5502                     len_byte);
5503       coding->src_multibyte = 0;
5504     }
5505
5506   for (;;)
5507     {
5508       int result;
5509
5510       /* The buffer memory is now:
5511          +--------+converted-text+---------+-------original-text-------+---+
5512          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5513                   |<---------------------- GAP ----------------------->|  */
5514       src = GAP_END_ADDR - len_byte;
5515       dst = GPT_ADDR + inserted_byte;
5516
5517       if (encodep)
5518         result = encode_coding (coding, src, dst, len_byte, 0);
5519       else
5520         result = decode_coding (coding, src, dst, len_byte, 0);
5521
5522       /* The buffer memory is now:
5523          +--------+-------converted-text----+--+------original-text----+---+
5524          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5525                   |<---------------------- GAP ----------------------->|  */
5526
5527       inserted += coding->produced_char;
5528       inserted_byte += coding->produced;
5529       len_byte -= coding->consumed;
5530
5531       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5532         {
5533           coding_allocate_composition_data (coding, from + inserted);
5534           continue;
5535         }
5536
5537       src += coding->consumed;
5538       dst += coding->produced;
5539
5540       if (result == CODING_FINISH_NORMAL)
5541         {
5542           src += len_byte;
5543           break;
5544         }
5545       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5546         {
5547           unsigned char *pend = dst, *p = pend - inserted_byte;
5548           Lisp_Object eol_type;
5549
5550           /* Encode LFs back to the original eol format (CR or CRLF).  */
5551           if (coding->eol_type == CODING_EOL_CR)
5552             {
5553               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5554             }
5555           else
5556             {
5557               int count = 0;
5558
5559               while (p < pend) if (*p++ == '\n') count++;
5560               if (src - dst < count)
5561                 {
5562                   /* We don't have sufficient room for encoding LFs
5563                      back to CRLF.  We must record converted and
5564                      not-yet-converted text back to the buffer
5565                      content, enlarge the gap, then record them out of
5566                      the buffer contents again.  */
5567                   int add = len_byte + inserted_byte;
5568
5569                   GAP_SIZE -= add;
5570                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5571                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5572                   make_gap (count - GAP_SIZE);
5573                   GAP_SIZE += add;
5574                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5575                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5576                   /* Don't forget to update SRC, DST, and PEND.  */
5577                   src = GAP_END_ADDR - len_byte;
5578                   dst = GPT_ADDR + inserted_byte;
5579                   pend = dst;
5580                 }
5581               inserted += count;
5582               inserted_byte += count;
5583               coding->produced += count;
5584               p = dst = pend + count;
5585               while (count)
5586                 {
5587                   *--p = *--pend;
5588                   if (*p == '\n') count--, *--p = '\r';
5589                 }
5590             }
5591
5592           /* Suppress eol-format conversion in the further conversion.  */
5593           coding->eol_type = CODING_EOL_LF;
5594
5595           /* Set the coding system symbol to that for Unix-like EOL.  */
5596           eol_type = Fget (saved_coding_symbol, Qeol_type);
5597           if (VECTORP (eol_type)
5598               && XVECTOR (eol_type)->size == 3
5599               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5600             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5601           else
5602             coding->symbol = saved_coding_symbol;
5603
5604           continue;
5605         }
5606       if (len_byte <= 0)
5607         {
5608           if (coding->type != coding_type_ccl
5609               || coding->mode & CODING_MODE_LAST_BLOCK)
5610             break;
5611           coding->mode |= CODING_MODE_LAST_BLOCK;
5612           continue;
5613         }
5614       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5615         {
5616           /* The source text ends in invalid codes.  Let's just
5617              make them valid buffer contents, and finish conversion.  */
5618           inserted += len_byte;
5619           inserted_byte += len_byte;
5620           while (len_byte--)
5621             *dst++ = *src++;
5622           break;
5623         }
5624       if (result == CODING_FINISH_INTERRUPT)
5625         {
5626           /* The conversion procedure was interrupted by a user.  */
5627           break;
5628         }
5629       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5630       if (coding->consumed < 1)
5631         {
5632           /* It's quite strange to require more memory without
5633              consuming any bytes.  Perhaps CCL program bug.  */
5634           break;
5635         }
5636       if (first)
5637         {
5638           /* We have just done the first batch of conversion which was
5639              stopped because of insufficient gap.  Let's reconsider the
5640              required gap size (i.e. SRT - DST) now.
5641
5642              We have converted ORIG bytes (== coding->consumed) into
5643              NEW bytes (coding->produced).  To convert the remaining
5644              LEN bytes, we may need REQUIRE bytes of gap, where:
5645                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5646                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5647              Here, we are sure that NEW >= ORIG.  */
5648           float ratio = coding->produced - coding->consumed;
5649           ratio /= coding->consumed;
5650           require = len_byte * ratio;
5651           first = 0;
5652         }
5653       if ((src - dst) < (require + 2000))
5654         {
5655           /* See the comment above the previous call of make_gap.  */
5656           int add = len_byte + inserted_byte;
5657
5658           GAP_SIZE -= add;
5659           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5660           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5661           make_gap (require + 2000);
5662           GAP_SIZE += add;
5663           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5664           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5665         }
5666     }
5667   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5668
5669   if (encodep && coding->dst_multibyte)
5670     {
5671       /* The output is unibyte.  We must convert 8-bit characters to
5672          multibyte form.  */
5673       if (inserted_byte * 2 > GAP_SIZE)
5674         {
5675           GAP_SIZE -= inserted_byte;
5676           ZV += inserted_byte; Z += inserted_byte;
5677           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5678           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5679           make_gap (inserted_byte - GAP_SIZE);
5680           GAP_SIZE += inserted_byte;
5681           ZV -= inserted_byte; Z -= inserted_byte;
5682           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5683           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5684         }
5685       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5686     }
5687
5688   /* If we shrank the conversion area, adjust it now.  */
5689   if (total_skip > 0)
5690     {
5691       if (tail_skip > 0)
5692         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5693       inserted += total_skip; inserted_byte += total_skip;
5694       GAP_SIZE += total_skip;
5695       GPT -= head_skip; GPT_BYTE -= head_skip;
5696       ZV -= total_skip; ZV_BYTE -= total_skip;
5697       Z -= total_skip; Z_BYTE -= total_skip;
5698       from -= head_skip; from_byte -= head_skip;
5699       to += tail_skip; to_byte += tail_skip;
5700     }
5701
5702   prev_Z = Z;
5703   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5704   inserted = Z - prev_Z;
5705
5706   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5707     coding_restore_composition (coding, Fcurrent_buffer ());
5708   coding_free_composition_data (coding);
5709
5710   if (! inhibit_pre_post_conversion
5711       && ! encodep && ! NILP (coding->post_read_conversion))
5712     {
5713       Lisp_Object val;
5714       int count = specpdl_ptr - specpdl;
5715
5716       if (from != PT)
5717         TEMP_SET_PT_BOTH (from, from_byte);
5718       prev_Z = Z;
5719       record_unwind_protect (code_convert_region_unwind, Qnil);
5720       /* We should not call any more pre-write/post-read-conversion
5721          functions while this post-read-conversion is running.  */
5722       inhibit_pre_post_conversion = 1;
5723       val = call1 (coding->post_read_conversion, make_number (inserted));
5724       inhibit_pre_post_conversion = 0;
5725       /* Discard the unwind protect.  */
5726       specpdl_ptr--;
5727       CHECK_NUMBER (val, 0);
5728       inserted += Z - prev_Z;
5729     }
5730
5731   if (orig_point >= from)
5732     {
5733       if (orig_point >= from + orig_len)
5734         orig_point += inserted - orig_len;
5735       else
5736         orig_point = from;
5737       TEMP_SET_PT (orig_point);
5738     }
5739
5740   if (replace)
5741     {
5742       signal_after_change (from, to - from, inserted);
5743       update_compositions (from, from + inserted, CHECK_BORDER);
5744     }
5745
5746   {
5747     coding->consumed = to_byte - from_byte;
5748     coding->consumed_char = to - from;
5749     coding->produced = inserted_byte;
5750     coding->produced_char = inserted;
5751   }
5752
5753   return 0;
5754 }
5755
5756 Lisp_Object
5757 run_pre_post_conversion_on_str (str, coding, encodep)
5758      Lisp_Object str;
5759      struct coding_system *coding;
5760      int encodep;
5761 {
5762   int count = specpdl_ptr - specpdl;
5763   struct gcpro gcpro1;
5764   int multibyte = STRING_MULTIBYTE (str);
5765
5766   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5767   record_unwind_protect (code_convert_region_unwind, Qnil);
5768   GCPRO1 (str);
5769   temp_output_buffer_setup (" *code-converting-work*");
5770   set_buffer_internal (XBUFFER (Vstandard_output));
5771   /* We must insert the contents of STR as is without
5772      unibyte<->multibyte conversion.  For that, we adjust the
5773      multibyteness of the working buffer to that of STR.  */
5774   Ferase_buffer ();
5775   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5776   insert_from_string (str, 0, 0,
5777                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5778   UNGCPRO;
5779   inhibit_pre_post_conversion = 1;
5780   if (encodep)
5781     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5782   else
5783     {
5784       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5785       call1 (coding->post_read_conversion, make_number (Z - BEG));
5786     }
5787   inhibit_pre_post_conversion = 0;
5788   str = make_buffer_string (BEG, Z, 1);
5789   return unbind_to (count, str);
5790 }
5791
5792 Lisp_Object
5793 decode_coding_string (str, coding, nocopy)
5794      Lisp_Object str;
5795      struct coding_system *coding;
5796      int nocopy;
5797 {
5798   int len;
5799   struct conversion_buffer buf;
5800   int from, to_byte;
5801   struct gcpro gcpro1;
5802   Lisp_Object saved_coding_symbol;
5803   int result;
5804   int require_decoding;
5805   int shrinked_bytes = 0;
5806   Lisp_Object newstr;
5807   int consumed, consumed_char, produced, produced_char;
5808
5809   from = 0;
5810   to_byte = STRING_BYTES (XSTRING (str));
5811
5812   saved_coding_symbol = coding->symbol;
5813   coding->src_multibyte = STRING_MULTIBYTE (str);
5814   coding->dst_multibyte = 1;
5815   if (CODING_REQUIRE_DETECTION (coding))
5816     {
5817       /* See the comments in code_convert_region.  */
5818       if (coding->type == coding_type_undecided)
5819         {
5820           detect_coding (coding, XSTRING (str)->data, to_byte);
5821           if (coding->type == coding_type_undecided)
5822             {
5823               coding->type = coding_type_emacs_mule;
5824               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5825               /* As emacs-mule decoder will handle composition, we
5826                  need this setting to allocate coding->cmp_data
5827                  later.  */
5828               coding->composing = COMPOSITION_NO;
5829             }
5830         }
5831       if (coding->eol_type == CODING_EOL_UNDECIDED
5832           && coding->type != coding_type_ccl)
5833         {
5834           saved_coding_symbol = coding->symbol;
5835           detect_eol (coding, XSTRING (str)->data, to_byte);
5836           if (coding->eol_type == CODING_EOL_UNDECIDED)
5837             coding->eol_type = CODING_EOL_LF;
5838           /* We had better recover the original eol format if we
5839              encounter an inconsistent eol format while decoding.  */
5840           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5841         }
5842     }
5843
5844   if (coding->type == coding_type_no_conversion
5845       || coding->type == coding_type_raw_text)
5846     coding->dst_multibyte = 0;
5847
5848   require_decoding = CODING_REQUIRE_DECODING (coding);
5849
5850   if (STRING_MULTIBYTE (str))
5851     {
5852       /* Decoding routines expect the source text to be unibyte.  */
5853       str = Fstring_as_unibyte (str);
5854       to_byte = STRING_BYTES (XSTRING (str));
5855       nocopy = 1;
5856       coding->src_multibyte = 0;
5857     }
5858
5859   /* Try to skip the heading and tailing ASCIIs.  */
5860   if (require_decoding && coding->type != coding_type_ccl)
5861     {
5862       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5863                                 0);
5864       if (from == to_byte)
5865         require_decoding = 0;
5866       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5867     }
5868
5869   if (!require_decoding)
5870     {
5871       coding->consumed = STRING_BYTES (XSTRING (str));
5872       coding->consumed_char = XSTRING (str)->size;
5873       if (coding->dst_multibyte)
5874         {
5875           str = Fstring_as_multibyte (str);
5876           nocopy = 1;
5877         }
5878       coding->produced = STRING_BYTES (XSTRING (str));
5879       coding->produced_char = XSTRING (str)->size;
5880       return (nocopy ? str : Fcopy_sequence (str));
5881     }
5882
5883   if (coding->composing != COMPOSITION_DISABLED)
5884     coding_allocate_composition_data (coding, from);
5885   len = decoding_buffer_size (coding, to_byte - from);
5886   allocate_conversion_buffer (buf, len);
5887
5888   consumed = consumed_char = produced = produced_char = 0;
5889   while (1)
5890     {
5891       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5892                               buf.data + produced, to_byte - from - consumed,
5893                               buf.size - produced);
5894       consumed += coding->consumed;
5895       consumed_char += coding->consumed_char;
5896       produced += coding->produced;
5897       produced_char += coding->produced_char;
5898       if (result == CODING_FINISH_NORMAL
5899           || (result == CODING_FINISH_INSUFFICIENT_SRC
5900               && coding->consumed == 0))
5901         break;
5902       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5903         coding_allocate_composition_data (coding, from + produced_char);
5904       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5905         extend_conversion_buffer (&buf);
5906       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5907         {
5908           Lisp_Object eol_type;
5909
5910           /* Recover the original EOL format.  */
5911           if (coding->eol_type == CODING_EOL_CR)
5912             {
5913               unsigned char *p;
5914               for (p = buf.data; p < buf.data + produced; p++)
5915                 if (*p == '\n') *p = '\r';
5916             }
5917           else if (coding->eol_type == CODING_EOL_CRLF)
5918             {
5919               int num_eol = 0;
5920               unsigned char *p0, *p1;
5921               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5922                 if (*p0 == '\n') num_eol++;
5923               if (produced + num_eol >= buf.size)
5924                 extend_conversion_buffer (&buf);
5925               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5926                 {
5927                   *--p1 = *--p0;
5928                   if (*p0 == '\n') *--p1 = '\r';
5929                 }
5930               produced += num_eol;
5931               produced_char += num_eol;
5932             }
5933           /* Suppress eol-format conversion in the further conversion.  */
5934           coding->eol_type = CODING_EOL_LF;
5935
5936           /* Set the coding system symbol to that for Unix-like EOL.  */
5937           eol_type = Fget (saved_coding_symbol, Qeol_type);
5938           if (VECTORP (eol_type)
5939               && XVECTOR (eol_type)->size == 3
5940               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5941             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5942           else
5943             coding->symbol = saved_coding_symbol;
5944
5945
5946         }
5947     }
5948
5949   coding->consumed = consumed;
5950   coding->consumed_char = consumed_char;
5951   coding->produced = produced;
5952   coding->produced_char = produced_char;
5953
5954   if (coding->dst_multibyte)
5955     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5956                                            produced + shrinked_bytes);
5957   else
5958     newstr = make_uninit_string (produced + shrinked_bytes);
5959   if (from > 0)
5960     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5961   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5962   if (shrinked_bytes > from)
5963     bcopy (XSTRING (str)->data + to_byte,
5964            XSTRING (newstr)->data + from + produced,
5965            shrinked_bytes - from);
5966   free_conversion_buffer (&buf);
5967
5968   if (coding->cmp_data && coding->cmp_data->used)
5969     coding_restore_composition (coding, newstr);
5970   coding_free_composition_data (coding);
5971
5972   if (SYMBOLP (coding->post_read_conversion)
5973       && !NILP (Ffboundp (coding->post_read_conversion)))
5974     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5975
5976   return newstr;
5977 }
5978
5979 Lisp_Object
5980 encode_coding_string (str, coding, nocopy)
5981      Lisp_Object str;
5982      struct coding_system *coding;
5983      int nocopy;
5984 {
5985   int len;
5986   struct conversion_buffer buf;
5987   int from, to, to_byte;
5988   int result;
5989   int shrinked_bytes = 0;
5990   Lisp_Object newstr;
5991   int consumed, consumed_char, produced, produced_char;
5992
5993   if (SYMBOLP (coding->pre_write_conversion)
5994       && !NILP (Ffboundp (coding->pre_write_conversion)))
5995     str = run_pre_post_conversion_on_str (str, coding, 1);
5996
5997   from = 0;
5998   to = XSTRING (str)->size;
5999   to_byte = STRING_BYTES (XSTRING (str));
6000
6001   /* Encoding routines determine the multibyteness of the source text
6002      by coding->src_multibyte.  */
6003   coding->src_multibyte = STRING_MULTIBYTE (str);
6004   coding->dst_multibyte = 0;
6005   if (! CODING_REQUIRE_ENCODING (coding))
6006     {
6007       coding->consumed = STRING_BYTES (XSTRING (str));
6008       coding->consumed_char = XSTRING (str)->size;
6009       if (STRING_MULTIBYTE (str))
6010         {
6011           str = Fstring_as_unibyte (str);
6012           nocopy = 1;
6013         }
6014       coding->produced = STRING_BYTES (XSTRING (str));
6015       coding->produced_char = XSTRING (str)->size;
6016       return (nocopy ? str : Fcopy_sequence (str));
6017     }
6018
6019   if (coding->composing != COMPOSITION_DISABLED)
6020     coding_save_composition (coding, from, to, str);
6021
6022   /* Try to skip the heading and tailing ASCIIs.  */
6023   if (coding->type != coding_type_ccl)
6024     {
6025       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
6026                                 1);
6027       if (from == to_byte)
6028         return (nocopy ? str : Fcopy_sequence (str));
6029       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
6030     }
6031
6032   len = encoding_buffer_size (coding, to_byte - from);
6033   allocate_conversion_buffer (buf, len);
6034
6035   consumed = consumed_char = produced = produced_char = 0;
6036   while (1)
6037     {
6038       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
6039                               buf.data + produced, to_byte - from - consumed,
6040                               buf.size - produced);
6041       consumed += coding->consumed;
6042       consumed_char += coding->consumed_char;
6043       produced += coding->produced;
6044       produced_char += coding->produced_char;
6045       if (result == CODING_FINISH_NORMAL
6046           || (result == CODING_FINISH_INSUFFICIENT_SRC
6047               && coding->consumed == 0))
6048         break;
6049       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6050       extend_conversion_buffer (&buf);
6051     }
6052
6053   coding->consumed = consumed;
6054   coding->consumed_char = consumed_char;
6055   coding->produced = produced;
6056   coding->produced_char = produced_char;
6057
6058   newstr = make_uninit_string (produced + shrinked_bytes);
6059   if (from > 0)
6060     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
6061   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
6062   if (shrinked_bytes > from)
6063     bcopy (XSTRING (str)->data + to_byte,
6064            XSTRING (newstr)->data + from + produced,
6065            shrinked_bytes - from);
6066
6067   free_conversion_buffer (&buf);
6068   coding_free_composition_data (coding);
6069
6070   return newstr;
6071 }
6072
6073 \f
6074 #ifdef emacs
6075 /*** 8. Emacs Lisp library functions ***/
6076
6077 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6078   "Return t if OBJECT is nil or a coding-system.\n\
6079 See the documentation of `make-coding-system' for information\n\
6080 about coding-system objects.")
6081   (obj)
6082      Lisp_Object obj;
6083 {
6084   if (NILP (obj))
6085     return Qt;
6086   if (!SYMBOLP (obj))
6087     return Qnil;
6088   /* Get coding-spec vector for OBJ.  */
6089   obj = Fget (obj, Qcoding_system);
6090   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6091           ? Qt : Qnil);
6092 }
6093
6094 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6095        Sread_non_nil_coding_system, 1, 1, 0,
6096   "Read a coding system from the minibuffer, prompting with string PROMPT.")
6097   (prompt)
6098      Lisp_Object prompt;
6099 {
6100   Lisp_Object val;
6101   do
6102     {
6103       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6104                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6105     }
6106   while (XSTRING (val)->size == 0);
6107   return (Fintern (val, Qnil));
6108 }
6109
6110 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6111   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
6112 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
6113   (prompt, default_coding_system)
6114      Lisp_Object prompt, default_coding_system;
6115 {
6116   Lisp_Object val;
6117   if (SYMBOLP (default_coding_system))
6118     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6119   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6120                           Qt, Qnil, Qcoding_system_history,
6121                           default_coding_system, Qnil);
6122   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6123 }
6124
6125 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6126        1, 1, 0,
6127   "Check validity of CODING-SYSTEM.\n\
6128 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
6129 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
6130 The value of property should be a vector of length 5.")
6131   (coding_system)
6132      Lisp_Object coding_system;
6133 {
6134   CHECK_SYMBOL (coding_system, 0);
6135   if (!NILP (Fcoding_system_p (coding_system)))
6136     return coding_system;
6137   while (1)
6138     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6139 }
6140 \f
6141 Lisp_Object
6142 detect_coding_system (src, src_bytes, highest, multibytep)
6143      unsigned char *src;
6144      int src_bytes, highest;
6145      int multibytep;
6146 {
6147   int coding_mask, eol_type;
6148   Lisp_Object val, tmp;
6149   int dummy;
6150
6151   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6152   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6153   if (eol_type == CODING_EOL_INCONSISTENT)
6154     eol_type = CODING_EOL_UNDECIDED;
6155
6156   if (!coding_mask)
6157     {
6158       val = Qundecided;
6159       if (eol_type != CODING_EOL_UNDECIDED)
6160         {
6161           Lisp_Object val2;
6162           val2 = Fget (Qundecided, Qeol_type);
6163           if (VECTORP (val2))
6164             val = XVECTOR (val2)->contents[eol_type];
6165         }
6166       return (highest ? val : Fcons (val, Qnil));
6167     }
6168
6169   /* At first, gather possible coding systems in VAL.  */
6170   val = Qnil;
6171   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6172     {
6173       Lisp_Object category_val, category_index;
6174
6175       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6176       category_val = Fsymbol_value (XCAR (tmp));
6177       if (!NILP (category_val)
6178           && NATNUMP (category_index)
6179           && (coding_mask & (1 << XFASTINT (category_index))))
6180         {
6181           val = Fcons (category_val, val);
6182           if (highest)
6183             break;
6184         }
6185     }
6186   if (!highest)
6187     val = Fnreverse (val);
6188
6189   /* Then, replace the elements with subsidiary coding systems.  */
6190   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6191     {
6192       if (eol_type != CODING_EOL_UNDECIDED
6193           && eol_type != CODING_EOL_INCONSISTENT)
6194         {
6195           Lisp_Object eol;
6196           eol = Fget (XCAR (tmp), Qeol_type);
6197           if (VECTORP (eol))
6198             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
6199         }
6200     }
6201   return (highest ? XCAR (val) : val);
6202 }
6203
6204 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6205        2, 3, 0,
6206   "Detect coding system of the text in the region between START and END.\n\
6207 Return a list of possible coding systems ordered by priority.\n\
6208 \n\
6209 If only ASCII characters are found, it returns a list of single element\n\
6210 `undecided' or its subsidiary coding system according to a detected\n\
6211 end-of-line format.\n\
6212 \n\
6213 If optional argument HIGHEST is non-nil, return the coding system of\n\
6214 highest priority.")
6215   (start, end, highest)
6216      Lisp_Object start, end, highest;
6217 {
6218   int from, to;
6219   int from_byte, to_byte;
6220   int include_anchor_byte = 0;
6221
6222   CHECK_NUMBER_COERCE_MARKER (start, 0);
6223   CHECK_NUMBER_COERCE_MARKER (end, 1);
6224
6225   validate_region (&start, &end);
6226   from = XINT (start), to = XINT (end);
6227   from_byte = CHAR_TO_BYTE (from);
6228   to_byte = CHAR_TO_BYTE (to);
6229
6230   if (from < GPT && to >= GPT)
6231     move_gap_both (to, to_byte);
6232   /* If we an anchor byte `\0' follows the region, we include it in
6233      the detecting source.  Then code detectors can handle the tailing
6234      byte sequence more accurately.
6235
6236      Fix me: This is not an perfect solution.  It is better that we
6237      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6238   */
6239   if (to == Z || (to == GPT && GAP_SIZE > 0))
6240     include_anchor_byte = 1;
6241   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6242                                to_byte - from_byte + include_anchor_byte,
6243                                !NILP (highest),
6244                                !NILP (current_buffer
6245                                       ->enable_multibyte_characters));
6246 }
6247
6248 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6249        1, 2, 0,
6250   "Detect coding system of the text in STRING.\n\
6251 Return a list of possible coding systems ordered by priority.\n\
6252 \n\
6253 If only ASCII characters are found, it returns a list of single element\n\
6254 `undecided' or its subsidiary coding system according to a detected\n\
6255 end-of-line format.\n\
6256 \n\
6257 If optional argument HIGHEST is non-nil, return the coding system of\n\
6258 highest priority.")
6259   (string, highest)
6260      Lisp_Object string, highest;
6261 {
6262   CHECK_STRING (string, 0);
6263
6264   return detect_coding_system (XSTRING (string)->data,
6265                                /* "+ 1" is to include the anchor byte
6266                                   `\0'.  With this, code detectors can
6267                                   handle the tailing bytes more
6268                                   accurately.  */
6269                                STRING_BYTES (XSTRING (string)) + 1,
6270                                !NILP (highest),
6271                                STRING_MULTIBYTE (string));
6272 }
6273
6274 /* Return an intersection of lists L1 and L2.  */
6275
6276 static Lisp_Object
6277 intersection (l1, l2)
6278      Lisp_Object l1, l2;
6279 {
6280   Lisp_Object val;
6281
6282   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
6283     {
6284       if (!NILP (Fmemq (XCAR (l1), l2)))
6285         val = Fcons (XCAR (l1), val);
6286     }
6287   return val;
6288 }
6289
6290
6291 /*  Subroutine for Fsafe_coding_systems_region_internal.
6292
6293     Return a list of coding systems that safely encode the multibyte
6294     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
6295     possible coding systems.  If it is nil, it means that we have not
6296     yet found any coding systems.
6297
6298     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6299     element of WORK_TABLE is set to t once the element is looked up.
6300
6301     If a non-ASCII single byte char is found, set
6302     *single_byte_char_found to 1.  */
6303
6304 static Lisp_Object
6305 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6306      unsigned char *p, *pend;
6307      Lisp_Object safe_codings, work_table;
6308      int *single_byte_char_found;
6309 {
6310   int c, len, idx;
6311   Lisp_Object val;
6312
6313   while (p < pend)
6314     {
6315       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6316       p += len;
6317       if (ASCII_BYTE_P (c))
6318         /* We can ignore ASCII characters here.  */
6319         continue;
6320       if (SINGLE_BYTE_CHAR_P (c))
6321         *single_byte_char_found = 1;
6322       if (NILP (safe_codings))
6323         continue;
6324       /* Check the safe coding systems for C.  */
6325       val = char_table_ref_and_index (work_table, c, &idx);
6326       if (EQ (val, Qt))
6327         /* This element was already checked.  Ignore it.  */
6328         continue;
6329       /* Remember that we checked this element.  */
6330       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6331
6332       /* If there are some safe coding systems for C and we have
6333          already found the other set of coding systems for the
6334          different characters, get the intersection of them.  */
6335       if (!EQ (safe_codings, Qt) && !NILP (val))
6336         val = intersection (safe_codings, val);
6337       safe_codings = val;
6338     }
6339   return safe_codings;
6340 }
6341
6342
6343 /* Return a list of coding systems that safely encode the text between
6344    START and END.  If the text contains only ASCII or is unibyte,
6345    return t.  */
6346
6347 DEFUN ("find-coding-systems-region-internal",
6348        Ffind_coding_systems_region_internal,
6349        Sfind_coding_systems_region_internal, 2, 2, 0,
6350   "Internal use only.")
6351   (start, end)
6352      Lisp_Object start, end;
6353 {
6354   Lisp_Object work_table, safe_codings;
6355   int non_ascii_p = 0;
6356   int single_byte_char_found = 0;
6357   unsigned char *p1, *p1end, *p2, *p2end, *p;
6358
6359   if (STRINGP (start))
6360     {
6361       if (!STRING_MULTIBYTE (start))
6362         return Qt;
6363       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
6364       p2 = p2end = p1end;
6365       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6366         non_ascii_p = 1;
6367     }
6368   else
6369     {
6370       int from, to, stop;
6371
6372       CHECK_NUMBER_COERCE_MARKER (start, 0);
6373       CHECK_NUMBER_COERCE_MARKER (end, 1);
6374       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6375         args_out_of_range (start, end);
6376       if (NILP (current_buffer->enable_multibyte_characters))
6377         return Qt;
6378       from = CHAR_TO_BYTE (XINT (start));
6379       to = CHAR_TO_BYTE (XINT (end));
6380       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6381       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6382       if (stop == to)
6383         p2 = p2end = p1end;
6384       else
6385         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6386       if (XINT (end) - XINT (start) != to - from)
6387         non_ascii_p = 1;
6388     }
6389
6390   if (!non_ascii_p)
6391     {
6392       /* We are sure that the text contains no multibyte character.
6393          Check if it contains eight-bit-graphic.  */
6394       p = p1;
6395       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6396       if (p == p1end)
6397         {
6398           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6399           if (p == p2end)
6400             return Qt;
6401         }
6402     }
6403
6404   /* The text contains non-ASCII characters.  */
6405   work_table = Fcopy_sequence (Vchar_coding_system_table);
6406   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6407                                     &single_byte_char_found);
6408   if (p2 < p2end)
6409     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6410                                       &single_byte_char_found);
6411
6412   if (!single_byte_char_found)
6413     {
6414       /* Append generic coding systems.  */
6415       Lisp_Object args[2];
6416       args[0] = safe_codings;
6417       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6418                                         make_number (0));
6419       safe_codings = Fappend (2, args);
6420     }
6421   else
6422     safe_codings = Fcons (Qraw_text,
6423                           Fcons (Qemacs_mule,
6424                                  Fcons (Qno_conversion, safe_codings)));
6425   return safe_codings;
6426 }
6427
6428
6429 Lisp_Object
6430 code_convert_region1 (start, end, coding_system, encodep)
6431      Lisp_Object start, end, coding_system;
6432      int encodep;
6433 {
6434   struct coding_system coding;
6435   int from, to;
6436
6437   CHECK_NUMBER_COERCE_MARKER (start, 0);
6438   CHECK_NUMBER_COERCE_MARKER (end, 1);
6439   CHECK_SYMBOL (coding_system, 2);
6440
6441   validate_region (&start, &end);
6442   from = XFASTINT (start);
6443   to = XFASTINT (end);
6444
6445   if (NILP (coding_system))
6446     return make_number (to - from);
6447
6448   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6449     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6450
6451   coding.mode |= CODING_MODE_LAST_BLOCK;
6452   coding.src_multibyte = coding.dst_multibyte
6453     = !NILP (current_buffer->enable_multibyte_characters);
6454   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6455                        &coding, encodep, 1);
6456   Vlast_coding_system_used = coding.symbol;
6457   return make_number (coding.produced_char);
6458 }
6459
6460 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6461        3, 3, "r\nzCoding system: ",
6462   "Decode the current region from the specified coding system.\n\
6463 When called from a program, takes three arguments:\n\
6464 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6465 This function sets `last-coding-system-used' to the precise coding system\n\
6466 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6467 not fully specified.)\n\
6468 It returns the length of the decoded text.")
6469   (start, end, coding_system)
6470      Lisp_Object start, end, coding_system;
6471 {
6472   return code_convert_region1 (start, end, coding_system, 0);
6473 }
6474
6475 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6476        3, 3, "r\nzCoding system: ",
6477   "Encode the current region into the specified coding system.\n\
6478 When called from a program, takes three arguments:\n\
6479 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6480 This function sets `last-coding-system-used' to the precise coding system\n\
6481 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6482 not fully specified.)\n\
6483 It returns the length of the encoded text.")
6484   (start, end, coding_system)
6485      Lisp_Object start, end, coding_system;
6486 {
6487   return code_convert_region1 (start, end, coding_system, 1);
6488 }
6489
6490 Lisp_Object
6491 code_convert_string1 (string, coding_system, nocopy, encodep)
6492      Lisp_Object string, coding_system, nocopy;
6493      int encodep;
6494 {
6495   struct coding_system coding;
6496
6497   CHECK_STRING (string, 0);
6498   CHECK_SYMBOL (coding_system, 1);
6499
6500   if (NILP (coding_system))
6501     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6502
6503   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6504     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6505
6506   coding.mode |= CODING_MODE_LAST_BLOCK;
6507   string = (encodep
6508             ? encode_coding_string (string, &coding, !NILP (nocopy))
6509             : decode_coding_string (string, &coding, !NILP (nocopy)));
6510   Vlast_coding_system_used = coding.symbol;
6511
6512   return string;
6513 }
6514
6515 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6516        2, 3, 0,
6517   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
6518 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6519 if the decoding operation is trivial.\n\
6520 This function sets `last-coding-system-used' to the precise coding system\n\
6521 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6522 not fully specified.)")
6523   (string, coding_system, nocopy)
6524      Lisp_Object string, coding_system, nocopy;
6525 {
6526   return code_convert_string1 (string, coding_system, nocopy, 0);
6527 }
6528
6529 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6530        2, 3, 0,
6531   "Encode STRING to CODING-SYSTEM, and return the result.\n\
6532 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6533 if the encoding operation is trivial.\n\
6534 This function sets `last-coding-system-used' to the precise coding system\n\
6535 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6536 not fully specified.)")
6537   (string, coding_system, nocopy)
6538      Lisp_Object string, coding_system, nocopy;
6539 {
6540   return code_convert_string1 (string, coding_system, nocopy, 1);
6541 }
6542
6543 /* Encode or decode STRING according to CODING_SYSTEM.
6544    Do not set Vlast_coding_system_used.
6545
6546    This function is called only from macros DECODE_FILE and
6547    ENCODE_FILE, thus we ignore character composition.  */
6548
6549 Lisp_Object
6550 code_convert_string_norecord (string, coding_system, encodep)
6551      Lisp_Object string, coding_system;
6552      int encodep;
6553 {
6554   struct coding_system coding;
6555
6556   CHECK_STRING (string, 0);
6557   CHECK_SYMBOL (coding_system, 1);
6558
6559   if (NILP (coding_system))
6560     return string;
6561
6562   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6563     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6564
6565   coding.composing = COMPOSITION_DISABLED;
6566   coding.mode |= CODING_MODE_LAST_BLOCK;
6567   return (encodep
6568           ? encode_coding_string (string, &coding, 1)
6569           : decode_coding_string (string, &coding, 1));
6570 }
6571 \f
6572 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6573   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6574 Return the corresponding character.")
6575   (code)
6576      Lisp_Object code;
6577 {
6578   unsigned char c1, c2, s1, s2;
6579   Lisp_Object val;
6580
6581   CHECK_NUMBER (code, 0);
6582   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6583   if (s1 == 0)
6584     {
6585       if (s2 < 0x80)
6586         XSETFASTINT (val, s2);
6587       else if (s2 >= 0xA0 || s2 <= 0xDF)
6588         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6589       else
6590         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6591     }
6592   else
6593     {
6594       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6595           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6596         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6597       DECODE_SJIS (s1, s2, c1, c2);
6598       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6599     }
6600   return val;
6601 }
6602
6603 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6604   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6605 Return the corresponding code in SJIS.")
6606   (ch)
6607      Lisp_Object ch;
6608 {
6609   int charset, c1, c2, s1, s2;
6610   Lisp_Object val;
6611
6612   CHECK_NUMBER (ch, 0);
6613   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6614   if (charset == CHARSET_ASCII)
6615     {
6616       val = ch;
6617     }
6618   else if (charset == charset_jisx0208
6619            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6620     {
6621       ENCODE_SJIS (c1, c2, s1, s2);
6622       XSETFASTINT (val, (s1 << 8) | s2);
6623     }
6624   else if (charset == charset_katakana_jisx0201
6625            && c1 > 0x20 && c2 < 0xE0)
6626     {
6627       XSETFASTINT (val, c1 | 0x80);
6628     }
6629   else
6630     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6631   return val;
6632 }
6633
6634 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6635   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6636 Return the corresponding character.")
6637   (code)
6638      Lisp_Object code;
6639 {
6640   int charset;
6641   unsigned char b1, b2, c1, c2;
6642   Lisp_Object val;
6643
6644   CHECK_NUMBER (code, 0);
6645   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6646   if (b1 == 0)
6647     {
6648       if (b2 >= 0x80)
6649         error ("Invalid BIG5 code: %x", XFASTINT (code));
6650       val = code;
6651     }
6652   else
6653     {
6654       if ((b1 < 0xA1 || b1 > 0xFE)
6655           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6656         error ("Invalid BIG5 code: %x", XFASTINT (code));
6657       DECODE_BIG5 (b1, b2, charset, c1, c2);
6658       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6659     }
6660   return val;
6661 }
6662
6663 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6664   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6665 Return the corresponding character code in Big5.")
6666   (ch)
6667      Lisp_Object ch;
6668 {
6669   int charset, c1, c2, b1, b2;
6670   Lisp_Object val;
6671
6672   CHECK_NUMBER (ch, 0);
6673   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6674   if (charset == CHARSET_ASCII)
6675     {
6676       val = ch;
6677     }
6678   else if ((charset == charset_big5_1
6679             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6680            || (charset == charset_big5_2
6681                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6682     {
6683       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6684       XSETFASTINT (val, (b1 << 8) | b2);
6685     }
6686   else
6687     error ("Can't encode to Big5: %d", XFASTINT (ch));
6688   return val;
6689 }
6690 \f
6691 DEFUN ("set-terminal-coding-system-internal",
6692        Fset_terminal_coding_system_internal,
6693        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6694   (coding_system)
6695      Lisp_Object coding_system;
6696 {
6697   CHECK_SYMBOL (coding_system, 0);
6698   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6699   /* We had better not send unsafe characters to terminal.  */
6700   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6701   /* Character composition should be disabled.  */
6702   terminal_coding.composing = COMPOSITION_DISABLED;
6703   /* Error notification should be suppressed.  */
6704   terminal_coding.suppress_error = 1;
6705   terminal_coding.src_multibyte = 1;
6706   terminal_coding.dst_multibyte = 0;
6707   return Qnil;
6708 }
6709
6710 DEFUN ("set-safe-terminal-coding-system-internal",
6711        Fset_safe_terminal_coding_system_internal,
6712        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6713   (coding_system)
6714      Lisp_Object coding_system;
6715 {
6716   CHECK_SYMBOL (coding_system, 0);
6717   setup_coding_system (Fcheck_coding_system (coding_system),
6718                        &safe_terminal_coding);
6719   /* Character composition should be disabled.  */
6720   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6721   /* Error notification should be suppressed.  */
6722   terminal_coding.suppress_error = 1;
6723   safe_terminal_coding.src_multibyte = 1;
6724   safe_terminal_coding.dst_multibyte = 0;
6725   return Qnil;
6726 }
6727
6728 DEFUN ("terminal-coding-system",
6729        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6730   "Return coding system specified for terminal output.")
6731   ()
6732 {
6733   return terminal_coding.symbol;
6734 }
6735
6736 DEFUN ("set-keyboard-coding-system-internal",
6737        Fset_keyboard_coding_system_internal,
6738        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6739   (coding_system)
6740      Lisp_Object coding_system;
6741 {
6742   CHECK_SYMBOL (coding_system, 0);
6743   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6744   /* Character composition should be disabled.  */
6745   keyboard_coding.composing = COMPOSITION_DISABLED;
6746   return Qnil;
6747 }
6748
6749 DEFUN ("keyboard-coding-system",
6750        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6751   "Return coding system specified for decoding keyboard input.")
6752   ()
6753 {
6754   return keyboard_coding.symbol;
6755 }
6756
6757 \f
6758 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6759        Sfind_operation_coding_system,  1, MANY, 0,
6760   "Choose a coding system for an operation based on the target name.\n\
6761 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6762 DECODING-SYSTEM is the coding system to use for decoding\n\
6763 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6764 for encoding (in case OPERATION does encoding).\n\
6765 \n\
6766 The first argument OPERATION specifies an I/O primitive:\n\
6767   For file I/O, `insert-file-contents' or `write-region'.\n\
6768   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6769   For network I/O, `open-network-stream'.\n\
6770 \n\
6771 The remaining arguments should be the same arguments that were passed\n\
6772 to the primitive.  Depending on which primitive, one of those arguments\n\
6773 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6774 whichever argument specifies the file name is TARGET.\n\
6775 \n\
6776 TARGET has a meaning which depends on OPERATION:\n\
6777   For file I/O, TARGET is a file name.\n\
6778   For process I/O, TARGET is a process name.\n\
6779   For network I/O, TARGET is a service name or a port number\n\
6780 \n\
6781 This function looks up what specified for TARGET in,\n\
6782 `file-coding-system-alist', `process-coding-system-alist',\n\
6783 or `network-coding-system-alist' depending on OPERATION.\n\
6784 They may specify a coding system, a cons of coding systems,\n\
6785 or a function symbol to call.\n\
6786 In the last case, we call the function with one argument,\n\
6787 which is a list of all the arguments given to this function.")
6788   (nargs, args)
6789      int nargs;
6790      Lisp_Object *args;
6791 {
6792   Lisp_Object operation, target_idx, target, val;
6793   register Lisp_Object chain;
6794
6795   if (nargs < 2)
6796     error ("Too few arguments");
6797   operation = args[0];
6798   if (!SYMBOLP (operation)
6799       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6800     error ("Invalid first argument");
6801   if (nargs < 1 + XINT (target_idx))
6802     error ("Too few arguments for operation: %s",
6803            XSYMBOL (operation)->name->data);
6804   target = args[XINT (target_idx) + 1];
6805   if (!(STRINGP (target)
6806         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6807     error ("Invalid argument %d", XINT (target_idx) + 1);
6808
6809   chain = ((EQ (operation, Qinsert_file_contents)
6810             || EQ (operation, Qwrite_region))
6811            ? Vfile_coding_system_alist
6812            : (EQ (operation, Qopen_network_stream)
6813               ? Vnetwork_coding_system_alist
6814               : Vprocess_coding_system_alist));
6815   if (NILP (chain))
6816     return Qnil;
6817
6818   for (; CONSP (chain); chain = XCDR (chain))
6819     {
6820       Lisp_Object elt;
6821       elt = XCAR (chain);
6822
6823       if (CONSP (elt)
6824           && ((STRINGP (target)
6825                && STRINGP (XCAR (elt))
6826                && fast_string_match (XCAR (elt), target) >= 0)
6827               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6828         {
6829           val = XCDR (elt);
6830           /* Here, if VAL is both a valid coding system and a valid
6831              function symbol, we return VAL as a coding system.  */
6832           if (CONSP (val))
6833             return val;
6834           if (! SYMBOLP (val))
6835             return Qnil;
6836           if (! NILP (Fcoding_system_p (val)))
6837             return Fcons (val, val);
6838           if (! NILP (Ffboundp (val)))
6839             {
6840               val = call1 (val, Flist (nargs, args));
6841               if (CONSP (val))
6842                 return val;
6843               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6844                 return Fcons (val, val);
6845             }
6846           return Qnil;
6847         }
6848     }
6849   return Qnil;
6850 }
6851
6852 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6853        Supdate_coding_systems_internal, 0, 0, 0,
6854   "Update internal database for ISO2022 and CCL based coding systems.\n\
6855 When values of any coding categories are changed, you must\n\
6856 call this function")
6857   ()
6858 {
6859   int i;
6860
6861   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6862     {
6863       Lisp_Object val;
6864
6865       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6866       if (!NILP (val))
6867         {
6868           if (! coding_system_table[i])
6869             coding_system_table[i] = ((struct coding_system *)
6870                                       xmalloc (sizeof (struct coding_system)));
6871           setup_coding_system (val, coding_system_table[i]);
6872         }
6873       else if (coding_system_table[i])
6874         {
6875           xfree (coding_system_table[i]);
6876           coding_system_table[i] = NULL;
6877         }
6878     }
6879
6880   return Qnil;
6881 }
6882
6883 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6884        Sset_coding_priority_internal, 0, 0, 0,
6885   "Update internal database for the current value of `coding-category-list'.\n\
6886 This function is internal use only.")
6887   ()
6888 {
6889   int i = 0, idx;
6890   Lisp_Object val;
6891
6892   val = Vcoding_category_list;
6893
6894   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6895     {
6896       if (! SYMBOLP (XCAR (val)))
6897         break;
6898       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6899       if (idx >= CODING_CATEGORY_IDX_MAX)
6900         break;
6901       coding_priorities[i++] = (1 << idx);
6902       val = XCDR (val);
6903     }
6904   /* If coding-category-list is valid and contains all coding
6905      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6906      the following code saves Emacs from crashing.  */
6907   while (i < CODING_CATEGORY_IDX_MAX)
6908     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6909
6910   return Qnil;
6911 }
6912
6913 #endif /* emacs */
6914
6915 \f
6916 /*** 9. Post-amble ***/
6917
6918 void
6919 init_coding_once ()
6920 {
6921   int i;
6922
6923   /* Emacs' internal format specific initialize routine.  */
6924   for (i = 0; i <= 0x20; i++)
6925     emacs_code_class[i] = EMACS_control_code;
6926   emacs_code_class[0x0A] = EMACS_linefeed_code;
6927   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6928   for (i = 0x21 ; i < 0x7F; i++)
6929     emacs_code_class[i] = EMACS_ascii_code;
6930   emacs_code_class[0x7F] = EMACS_control_code;
6931   for (i = 0x80; i < 0xFF; i++)
6932     emacs_code_class[i] = EMACS_invalid_code;
6933   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6934   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6935   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6936   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6937
6938   /* ISO2022 specific initialize routine.  */
6939   for (i = 0; i < 0x20; i++)
6940     iso_code_class[i] = ISO_control_0;
6941   for (i = 0x21; i < 0x7F; i++)
6942     iso_code_class[i] = ISO_graphic_plane_0;
6943   for (i = 0x80; i < 0xA0; i++)
6944     iso_code_class[i] = ISO_control_1;
6945   for (i = 0xA1; i < 0xFF; i++)
6946     iso_code_class[i] = ISO_graphic_plane_1;
6947   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6948   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6949   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6950   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6951   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6952   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6953   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6954   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6955   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6956   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6957
6958   setup_coding_system (Qnil, &keyboard_coding);
6959   setup_coding_system (Qnil, &terminal_coding);
6960   setup_coding_system (Qnil, &safe_terminal_coding);
6961   setup_coding_system (Qnil, &default_buffer_file_coding);
6962
6963   bzero (coding_system_table, sizeof coding_system_table);
6964
6965   bzero (ascii_skip_code, sizeof ascii_skip_code);
6966   for (i = 0; i < 128; i++)
6967     ascii_skip_code[i] = 1;
6968
6969 #if defined (MSDOS) || defined (WINDOWSNT)
6970   system_eol_type = CODING_EOL_CRLF;
6971 #else
6972   system_eol_type = CODING_EOL_LF;
6973 #endif
6974
6975   inhibit_pre_post_conversion = 0;
6976 }
6977
6978 #ifdef emacs
6979
6980 void
6981 syms_of_coding ()
6982 {
6983   Qtarget_idx = intern ("target-idx");
6984   staticpro (&Qtarget_idx);
6985
6986   Qcoding_system_history = intern ("coding-system-history");
6987   staticpro (&Qcoding_system_history);
6988   Fset (Qcoding_system_history, Qnil);
6989
6990   /* Target FILENAME is the first argument.  */
6991   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6992   /* Target FILENAME is the third argument.  */
6993   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6994
6995   Qcall_process = intern ("call-process");
6996   staticpro (&Qcall_process);
6997   /* Target PROGRAM is the first argument.  */
6998   Fput (Qcall_process, Qtarget_idx, make_number (0));
6999
7000   Qcall_process_region = intern ("call-process-region");
7001   staticpro (&Qcall_process_region);
7002   /* Target PROGRAM is the third argument.  */
7003   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7004
7005   Qstart_process = intern ("start-process");
7006   staticpro (&Qstart_process);
7007   /* Target PROGRAM is the third argument.  */
7008   Fput (Qstart_process, Qtarget_idx, make_number (2));
7009
7010   Qopen_network_stream = intern ("open-network-stream");
7011   staticpro (&Qopen_network_stream);
7012   /* Target SERVICE is the fourth argument.  */
7013   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7014
7015   Qcoding_system = intern ("coding-system");
7016   staticpro (&Qcoding_system);
7017
7018   Qeol_type = intern ("eol-type");
7019   staticpro (&Qeol_type);
7020
7021   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7022   staticpro (&Qbuffer_file_coding_system);
7023
7024   Qpost_read_conversion = intern ("post-read-conversion");
7025   staticpro (&Qpost_read_conversion);
7026
7027   Qpre_write_conversion = intern ("pre-write-conversion");
7028   staticpro (&Qpre_write_conversion);
7029
7030   Qno_conversion = intern ("no-conversion");
7031   staticpro (&Qno_conversion);
7032
7033   Qundecided = intern ("undecided");
7034   staticpro (&Qundecided);
7035
7036   Qcoding_system_p = intern ("coding-system-p");
7037   staticpro (&Qcoding_system_p);
7038
7039   Qcoding_system_error = intern ("coding-system-error");
7040   staticpro (&Qcoding_system_error);
7041
7042   Fput (Qcoding_system_error, Qerror_conditions,
7043         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7044   Fput (Qcoding_system_error, Qerror_message,
7045         build_string ("Invalid coding system"));
7046
7047   Qcoding_category = intern ("coding-category");
7048   staticpro (&Qcoding_category);
7049   Qcoding_category_index = intern ("coding-category-index");
7050   staticpro (&Qcoding_category_index);
7051
7052   Vcoding_category_table
7053     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7054   staticpro (&Vcoding_category_table);
7055   {
7056     int i;
7057     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7058       {
7059         XVECTOR (Vcoding_category_table)->contents[i]
7060           = intern (coding_category_name[i]);
7061         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7062               Qcoding_category_index, make_number (i));
7063       }
7064   }
7065
7066   Qtranslation_table = intern ("translation-table");
7067   staticpro (&Qtranslation_table);
7068   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7069
7070   Qtranslation_table_id = intern ("translation-table-id");
7071   staticpro (&Qtranslation_table_id);
7072
7073   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7074   staticpro (&Qtranslation_table_for_decode);
7075
7076   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7077   staticpro (&Qtranslation_table_for_encode);
7078
7079   Qsafe_chars = intern ("safe-chars");
7080   staticpro (&Qsafe_chars);
7081
7082   Qchar_coding_system = intern ("char-coding-system");
7083   staticpro (&Qchar_coding_system);
7084
7085   /* Intern this now in case it isn't already done.
7086      Setting this variable twice is harmless.
7087      But don't staticpro it here--that is done in alloc.c.  */
7088   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7089   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7090   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
7091
7092   Qvalid_codes = intern ("valid-codes");
7093   staticpro (&Qvalid_codes);
7094
7095   Qemacs_mule = intern ("emacs-mule");
7096   staticpro (&Qemacs_mule);
7097
7098   Qraw_text = intern ("raw-text");
7099   staticpro (&Qraw_text);
7100
7101   defsubr (&Scoding_system_p);
7102   defsubr (&Sread_coding_system);
7103   defsubr (&Sread_non_nil_coding_system);
7104   defsubr (&Scheck_coding_system);
7105   defsubr (&Sdetect_coding_region);
7106   defsubr (&Sdetect_coding_string);
7107   defsubr (&Sfind_coding_systems_region_internal);
7108   defsubr (&Sdecode_coding_region);
7109   defsubr (&Sencode_coding_region);
7110   defsubr (&Sdecode_coding_string);
7111   defsubr (&Sencode_coding_string);
7112   defsubr (&Sdecode_sjis_char);
7113   defsubr (&Sencode_sjis_char);
7114   defsubr (&Sdecode_big5_char);
7115   defsubr (&Sencode_big5_char);
7116   defsubr (&Sset_terminal_coding_system_internal);
7117   defsubr (&Sset_safe_terminal_coding_system_internal);
7118   defsubr (&Sterminal_coding_system);
7119   defsubr (&Sset_keyboard_coding_system_internal);
7120   defsubr (&Skeyboard_coding_system);
7121   defsubr (&Sfind_operation_coding_system);
7122   defsubr (&Supdate_coding_systems_internal);
7123   defsubr (&Sset_coding_priority_internal);
7124
7125   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7126     "List of coding systems.\n\
7127 \n\
7128 Do not alter the value of this variable manually.  This variable should be\n\
7129 updated by the functions `make-coding-system' and\n\
7130 `define-coding-system-alias'.");
7131   Vcoding_system_list = Qnil;
7132
7133   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7134     "Alist of coding system names.\n\
7135 Each element is one element list of coding system name.\n\
7136 This variable is given to `completing-read' as TABLE argument.\n\
7137 \n\
7138 Do not alter the value of this variable manually.  This variable should be\n\
7139 updated by the functions `make-coding-system' and\n\
7140 `define-coding-system-alias'.");
7141   Vcoding_system_alist = Qnil;
7142
7143   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7144     "List of coding-categories (symbols) ordered by priority.\n\
7145 \n\
7146 On detecting a coding system, Emacs tries code detection algorithms\n\
7147 associated with each coding-category one by one in this order.  When\n\
7148 one algorithm agrees with a byte sequence of source text, the coding\n\
7149 system bound to the corresponding coding-category is selected.");
7150   {
7151     int i;
7152
7153     Vcoding_category_list = Qnil;
7154     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7155       Vcoding_category_list
7156         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7157                  Vcoding_category_list);
7158   }
7159
7160   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7161     "Specify the coding system for read operations.\n\
7162 It is useful to bind this variable with `let', but do not set it globally.\n\
7163 If the value is a coding system, it is used for decoding on read operation.\n\
7164 If not, an appropriate element is used from one of the coding system alists:\n\
7165 There are three such tables, `file-coding-system-alist',\n\
7166 `process-coding-system-alist', and `network-coding-system-alist'.");
7167   Vcoding_system_for_read = Qnil;
7168
7169   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7170     "Specify the coding system for write operations.\n\
7171 Programs bind this variable with `let', but you should not set it globally.\n\
7172 If the value is a coding system, it is used for encoding of output,\n\
7173 when writing it to a file and when sending it to a file or subprocess.\n\
7174 \n\
7175 If this does not specify a coding system, an appropriate element\n\
7176 is used from one of the coding system alists:\n\
7177 There are three such tables, `file-coding-system-alist',\n\
7178 `process-coding-system-alist', and `network-coding-system-alist'.\n\
7179 For output to files, if the above procedure does not specify a coding system,\n\
7180 the value of `buffer-file-coding-system' is used.");
7181   Vcoding_system_for_write = Qnil;
7182
7183   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7184     "Coding system used in the latest file or process I/O.");
7185   Vlast_coding_system_used = Qnil;
7186
7187   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7188     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
7189 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
7190 such conversion.");
7191   inhibit_eol_conversion = 0;
7192
7193   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7194     "Non-nil means process buffer inherits coding system of process output.\n\
7195 Bind it to t if the process output is to be treated as if it were a file\n\
7196 read from some filesystem.");
7197   inherit_process_coding_system = 0;
7198
7199   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7200     "Alist to decide a coding system to use for a file I/O operation.\n\
7201 The format is ((PATTERN . VAL) ...),\n\
7202 where PATTERN is a regular expression matching a file name,\n\
7203 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7204 If VAL is a coding system, it is used for both decoding and encoding\n\
7205 the file contents.\n\
7206 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7207 and the cdr part is used for encoding.\n\
7208 If VAL is a function symbol, the function must return a coding system\n\
7209 or a cons of coding systems which are used as above.\n\
7210 \n\
7211 See also the function `find-operation-coding-system'\n\
7212 and the variable `auto-coding-alist'.");
7213   Vfile_coding_system_alist = Qnil;
7214
7215   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7216     "Alist to decide a coding system to use for a process I/O operation.\n\
7217 The format is ((PATTERN . VAL) ...),\n\
7218 where PATTERN is a regular expression matching a program name,\n\
7219 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7220 If VAL is a coding system, it is used for both decoding what received\n\
7221 from the program and encoding what sent to the program.\n\
7222 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7223 and the cdr part is used for encoding.\n\
7224 If VAL is a function symbol, the function must return a coding system\n\
7225 or a cons of coding systems which are used as above.\n\
7226 \n\
7227 See also the function `find-operation-coding-system'.");
7228   Vprocess_coding_system_alist = Qnil;
7229
7230   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7231     "Alist to decide a coding system to use for a network I/O operation.\n\
7232 The format is ((PATTERN . VAL) ...),\n\
7233 where PATTERN is a regular expression matching a network service name\n\
7234 or is a port number to connect to,\n\
7235 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7236 If VAL is a coding system, it is used for both decoding what received\n\
7237 from the network stream and encoding what sent to the network stream.\n\
7238 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7239 and the cdr part is used for encoding.\n\
7240 If VAL is a function symbol, the function must return a coding system\n\
7241 or a cons of coding systems which are used as above.\n\
7242 \n\
7243 See also the function `find-operation-coding-system'.");
7244   Vnetwork_coding_system_alist = Qnil;
7245
7246   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7247     "Coding system to use with system messages.");
7248   Vlocale_coding_system = Qnil;
7249
7250   /* The eol mnemonics are reset in startup.el system-dependently.  */
7251   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7252     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
7253   eol_mnemonic_unix = build_string (":");
7254
7255   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7256     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
7257   eol_mnemonic_dos = build_string ("\\");
7258
7259   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7260     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
7261   eol_mnemonic_mac = build_string ("/");
7262
7263   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7264     "*String displayed in mode line when end-of-line format is not yet determined.");
7265   eol_mnemonic_undecided = build_string (":");
7266
7267   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7268     "*Non-nil enables character translation while encoding and decoding.");
7269   Venable_character_translation = Qt;
7270
7271   DEFVAR_LISP ("standard-translation-table-for-decode",
7272     &Vstandard_translation_table_for_decode,
7273     "Table for translating characters while decoding.");
7274   Vstandard_translation_table_for_decode = Qnil;
7275
7276   DEFVAR_LISP ("standard-translation-table-for-encode",
7277     &Vstandard_translation_table_for_encode,
7278     "Table for translating characters while encoding.");
7279   Vstandard_translation_table_for_encode = Qnil;
7280
7281   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7282     "Alist of charsets vs revision numbers.\n\
7283 While encoding, if a charset (car part of an element) is found,\n\
7284 designate it with the escape sequence identifying revision (cdr part of the element).");
7285   Vcharset_revision_alist = Qnil;
7286
7287   DEFVAR_LISP ("default-process-coding-system",
7288                &Vdefault_process_coding_system,
7289     "Cons of coding systems used for process I/O by default.\n\
7290 The car part is used for decoding a process output,\n\
7291 the cdr part is used for encoding a text to be sent to a process.");
7292   Vdefault_process_coding_system = Qnil;
7293
7294   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7295     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
7296 This is a vector of length 256.\n\
7297 If Nth element is non-nil, the existence of code N in a file\n\
7298 \(or output of subprocess) doesn't prevent it to be detected as\n\
7299 a coding system of ISO 2022 variant which has a flag\n\
7300 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
7301 or reading output of a subprocess.\n\
7302 Only 128th through 159th elements has a meaning.");
7303   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7304
7305   DEFVAR_LISP ("select-safe-coding-system-function",
7306                &Vselect_safe_coding_system_function,
7307     "Function to call to select safe coding system for encoding a text.\n\
7308 \n\
7309 If set, this function is called to force a user to select a proper\n\
7310 coding system which can encode the text in the case that a default\n\
7311 coding system used in each operation can't encode the text.\n\
7312 \n\
7313 The default value is `select-safe-coding-system' (which see).");
7314   Vselect_safe_coding_system_function = Qnil;
7315
7316   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7317     "Char-table containing safe coding systems of each characters.\n\
7318 Each element doesn't include such generic coding systems that can\n\
7319 encode any characters.   They are in the first extra slot.");
7320   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7321
7322   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7323                &inhibit_iso_escape_detection,
7324     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
7325 \n\
7326 By default, on reading a file, Emacs tries to detect how the text is\n\
7327 encoded.  This code detection is sensitive to escape sequences.  If\n\
7328 the sequence is valid as ISO2022, the code is determined as one of\n\
7329 the ISO2022 encodings, and the file is decoded by the corresponding\n\
7330 coding system (e.g. `iso-2022-7bit').\n\
7331 \n\
7332 However, there may be a case that you want to read escape sequences in\n\
7333 a file as is.  In such a case, you can set this variable to non-nil.\n\
7334 Then, as the code detection ignores any escape sequences, no file is\n\
7335 detected as encoded in some ISO2022 encoding.  The result is that all\n\
7336 escape sequences become visible in a buffer.\n\
7337 \n\
7338 The default value is nil, and it is strongly recommended not to change\n\
7339 it.  That is because many Emacs Lisp source files that contain\n\
7340 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
7341 in Emacs's distribution, and they won't be decoded correctly on\n\
7342 reading if you suppress escape sequence detection.\n\
7343 \n\
7344 The other way to read escape sequences in a file without decoding is\n\
7345 to explicitly specify some coding system that doesn't use ISO2022's\n\
7346 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
7347   inhibit_iso_escape_detection = 0;
7348 }
7349
7350 char *
7351 emacs_strerror (error_number)
7352      int error_number;
7353 {
7354   char *str;
7355
7356   synchronize_system_messages_locale ();
7357   str = strerror (error_number);
7358
7359   if (! NILP (Vlocale_coding_system))
7360     {
7361       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7362                                                       Vlocale_coding_system,
7363                                                       0);
7364       str = (char *) XSTRING (dec)->data;
7365     }
7366
7367   return str;
7368 }
7369
7370 #endif /* emacs */
7371