src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEMS ***
  41
  42   A coding system is an encoding mechanism for one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-mule), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in buffers and strings
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode the character sets ASCII and Big5.  Widely
  70   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for text containing random 8-bit code.  Emacs does
  78   no code conversion on such text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it as CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of text is encoded depends on the operating system.
  97   For instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text character encoding and end-of-line encoding are
 103   independent, any coding system described above can have any
 104   end-of-line format.  So Emacs has information about end-of-line
 105   format in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX are set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 116   of the range 0x80..0x9F are in multibyte form.  */
 117 #if 0
 118 int
 119 detect_coding_emacs_mule (src, src_end, multibytep)
 120      unsigned char *src, *src_end;
 121      int multibytep;
 122 {
 123   ...
 124 }
 125 #endif
 126
 127 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 128
 129   These functions decode SRC_BYTES length of unibyte text at SOURCE
 130   encoded in CODING to Emacs' internal format.  The resulting
 131   multibyte text goes to a place pointed to by DESTINATION, the length
 132   of which should not exceed DST_BYTES.
 133
 134   These functions set the information about original and decoded texts
 135   in the members `produced', `produced_char', `consumed', and
 136   `consumed_char' of the structure *CODING.  They also set the member
 137   `result' to one of CODING_FINISH_XXX indicating how the decoding
 138   finished.
 139
 140   DST_BYTES zero means that the source area and destination area are
 141   overlapped, which means that we can produce a decoded text until it
 142   reaches the head of the not-yet-decoded source text.
 143
 144   Below is a template for these functions.  */
 145 #if 0
 146 static void
 147 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 148      struct coding_system *coding;
 149      unsigned char *source, *destination;
 150      int src_bytes, dst_bytes;
 151 {
 152   ...
 153 }
 154 #endif
 155
 156 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 157
 158   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 159   internal multibyte format to CODING.  The resulting unibyte text
 160   goes to a place pointed to by DESTINATION, the length of which
 161   should not exceed DST_BYTES.
 162
 163   These functions set the information about original and encoded texts
 164   in the members `produced', `produced_char', `consumed', and
 165   `consumed_char' of the structure *CODING.  They also set the member
 166   `result' to one of CODING_FINISH_XXX indicating how the encoding
 167   finished.
 168
 169   DST_BYTES zero means that the source area and destination area are
 170   overlapped, which means that we can produce encoded text until it
 171   reaches at the head of the not-yet-encoded source text.
 172
 173   Below is a template for these functions.  */
 174 #if 0
 175 static void
 176 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 177      struct coding_system *coding;
 178      unsigned char *source, *destination;
 179      int src_bytes, dst_bytes;
 180 {
 181   ...
 182 }
 183 #endif
 184
 185 /*** COMMONLY USED MACROS ***/
 186
 187 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 188    get one, two, and three bytes from the source text respectively.
 189    If there are not enough bytes in the source, they jump to
 190    `label_end_of_loop'.  The caller should set variables `coding',
 191    `src' and `src_end' to appropriate pointer in advance.  These
 192    macros are called from decoding routines `decode_coding_XXX', thus
 193    it is assumed that the source text is unibyte.  */
 194
 195 #define ONE_MORE_BYTE(c1)                                       \
 196   do {                                                          \
 197     if (src >= src_end)                                         \
 198       {                                                         \
 199         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 200         goto label_end_of_loop;                                 \
 201       }                                                         \
 202     c1 = *src++;                                                \
 203   } while (0)
 204
 205 #define TWO_MORE_BYTES(c1, c2)                                  \
 206   do {                                                          \
 207     if (src + 1 >= src_end)                                     \
 208       {                                                         \
 209         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 210         goto label_end_of_loop;                                 \
 211       }                                                         \
 212     c1 = *src++;                                                \
 213     c2 = *src++;                                                \
 214   } while (0)
 215
 216
 217 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 218    form if MULTIBYTEP is nonzero.  */
 219
 220 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 221   do {                                                          \
 222     if (src >= src_end)                                         \
 223       {                                                         \
 224         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 225         goto label_end_of_loop;                                 \
 226       }                                                         \
 227     c1 = *src++;                                                \
 228     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 229       c1 = *src++ - 0x20;                                       \
 230   } while (0)
 231
 232 /* Set C to the next character at the source text pointed by `src'.
 233    If there are not enough characters in the source, jump to
 234    `label_end_of_loop'.  The caller should set variables `coding'
 235    `src', `src_end', and `translation_table' to appropriate pointers
 236    in advance.  This macro is used in encoding routines
 237    `encode_coding_XXX', thus it assumes that the source text is in
 238    multibyte form except for 8-bit characters.  8-bit characters are
 239    in multibyte form if coding->src_multibyte is nonzero, else they
 240    are represented by a single byte.  */
 241
 242 #define ONE_MORE_CHAR(c)                                        \
 243   do {                                                          \
 244     int len = src_end - src;                                    \
 245     int bytes;                                                  \
 246     if (len <= 0)                                               \
 247       {                                                         \
 248         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 249         goto label_end_of_loop;                                 \
 250       }                                                         \
 251     if (coding->src_multibyte                                   \
 252         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 253       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 254     else                                                        \
 255       c = *src, bytes = 1;                                      \
 256     if (!NILP (translation_table))                              \
 257       c = translate_char (translation_table, c, -1, 0, 0);      \
 258     src += bytes;                                               \
 259   } while (0)
 260
 261
 262 /* Produce a multibyte form of character C to `dst'.  Jump to
 263    `label_end_of_loop' if there's not enough space at `dst'.
 264
 265    If we are now in the middle of a composition sequence, the decoded
 266    character may be ALTCHAR (for the current composition).  In that
 267    case, the character goes to coding->cmp_data->data instead of
 268    `dst'.
 269
 270    This macro is used in decoding routines.  */
 271
 272 #define EMIT_CHAR(c)                                                    \
 273   do {                                                                  \
 274     if (! COMPOSING_P (coding)                                          \
 275         || coding->composing == COMPOSITION_RELATIVE                    \
 276         || coding->composing == COMPOSITION_WITH_RULE)                  \
 277       {                                                                 \
 278         int bytes = CHAR_BYTES (c);                                     \
 279         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 280           {                                                             \
 281             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 282             goto label_end_of_loop;                                     \
 283           }                                                             \
 284         dst += CHAR_STRING (c, dst);                                    \
 285         coding->produced_char++;                                        \
 286       }                                                                 \
 287                                                                         \
 288     if (COMPOSING_P (coding)                                            \
 289         && coding->composing != COMPOSITION_RELATIVE)                   \
 290       {                                                                 \
 291         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 292         coding->composition_rule_follows                                \
 293           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 294       }                                                                 \
 295   } while (0)
 296
 297
 298 #define EMIT_ONE_BYTE(c)                                        \
 299   do {                                                          \
 300     if (dst >= (dst_bytes ? dst_end : src))                     \
 301       {                                                         \
 302         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 303         goto label_end_of_loop;                                 \
 304       }                                                         \
 305     *dst++ = c;                                                 \
 306   } while (0)
 307
 308 #define EMIT_TWO_BYTES(c1, c2)                                  \
 309   do {                                                          \
 310     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 311       {                                                         \
 312         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 313         goto label_end_of_loop;                                 \
 314       }                                                         \
 315     *dst++ = c1, *dst++ = c2;                                   \
 316   } while (0)
 317
 318 #define EMIT_BYTES(from, to)                                    \
 319   do {                                                          \
 320     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 321       {                                                         \
 322         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 323         goto label_end_of_loop;                                 \
 324       }                                                         \
 325     while (from < to)                                           \
 326       *dst++ = *from++;                                         \
 327   } while (0)
 328
 329 \f
 330 /*** 1. Preamble ***/
 331
 332 #ifdef emacs
 333 #include <config.h>
 334 #endif
 335
 336 #include <stdio.h>
 337
 338 #ifdef emacs
 339
 340 #include "lisp.h"
 341 #include "buffer.h"
 342 #include "charset.h"
 343 #include "composite.h"
 344 #include "ccl.h"
 345 #include "coding.h"
 346 #include "window.h"
 347
 348 #else  /* not emacs */
 349
 350 #include "mulelib.h"
 351
 352 #endif /* not emacs */
 353
 354 Lisp_Object Qcoding_system, Qeol_type;
 355 Lisp_Object Qbuffer_file_coding_system;
 356 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 357 Lisp_Object Qno_conversion, Qundecided;
 358 Lisp_Object Qcoding_system_history;
 359 Lisp_Object Qsafe_chars;
 360 Lisp_Object Qvalid_codes;
 361
 362 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 363 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 364 Lisp_Object Qstart_process, Qopen_network_stream;
 365 Lisp_Object Qtarget_idx;
 366
 367 Lisp_Object Vselect_safe_coding_system_function;
 368
 369 /* Mnemonic string for each format of end-of-line.  */
 370 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 371 /* Mnemonic string to indicate format of end-of-line is not yet
 372    decided.  */
 373 Lisp_Object eol_mnemonic_undecided;
 374
 375 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 376    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 377 int system_eol_type;
 378
 379 #ifdef emacs
 380
 381 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 382
 383 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 384
 385 /* Coding system emacs-mule and raw-text are for converting only
 386    end-of-line format.  */
 387 Lisp_Object Qemacs_mule, Qraw_text;
 388
 389 /* Coding-systems are handed between Emacs Lisp programs and C internal
 390    routines by the following three variables.  */
 391 /* Coding-system for reading files and receiving data from process.  */
 392 Lisp_Object Vcoding_system_for_read;
 393 /* Coding-system for writing files and sending data to process.  */
 394 Lisp_Object Vcoding_system_for_write;
 395 /* Coding-system actually used in the latest I/O.  */
 396 Lisp_Object Vlast_coding_system_used;
 397
 398 /* A vector of length 256 which contains information about special
 399    Latin codes (especially for dealing with Microsoft codes).  */
 400 Lisp_Object Vlatin_extra_code_table;
 401
 402 /* Flag to inhibit code conversion of end-of-line format.  */
 403 int inhibit_eol_conversion;
 404
 405 /* Flag to inhibit ISO2022 escape sequence detection.  */
 406 int inhibit_iso_escape_detection;
 407
 408 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 409 int inherit_process_coding_system;
 410
 411 /* Coding system to be used to encode text for terminal display.  */
 412 struct coding_system terminal_coding;
 413
 414 /* Coding system to be used to encode text for terminal display when
 415    terminal coding system is nil.  */
 416 struct coding_system safe_terminal_coding;
 417
 418 /* Coding system of what is sent from terminal keyboard.  */
 419 struct coding_system keyboard_coding;
 420
 421 /* Default coding system to be used to write a file.  */
 422 struct coding_system default_buffer_file_coding;
 423
 424 Lisp_Object Vfile_coding_system_alist;
 425 Lisp_Object Vprocess_coding_system_alist;
 426 Lisp_Object Vnetwork_coding_system_alist;
 427
 428 Lisp_Object Vlocale_coding_system;
 429
 430 #endif /* emacs */
 431
 432 Lisp_Object Qcoding_category, Qcoding_category_index;
 433
 434 /* List of symbols `coding-category-xxx' ordered by priority.  */
 435 Lisp_Object Vcoding_category_list;
 436
 437 /* Table of coding categories (Lisp symbols).  */
 438 Lisp_Object Vcoding_category_table;
 439
 440 /* Table of names of symbol for each coding-category.  */
 441 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 442   "coding-category-emacs-mule",
 443   "coding-category-sjis",
 444   "coding-category-iso-7",
 445   "coding-category-iso-7-tight",
 446   "coding-category-iso-8-1",
 447   "coding-category-iso-8-2",
 448   "coding-category-iso-7-else",
 449   "coding-category-iso-8-else",
 450   "coding-category-ccl",
 451   "coding-category-big5",
 452   "coding-category-utf-8",
 453   "coding-category-utf-16-be",
 454   "coding-category-utf-16-le",
 455   "coding-category-raw-text",
 456   "coding-category-binary"
 457 };
 458
 459 /* Table of pointers to coding systems corresponding to each coding
 460    categories.  */
 461 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 462
 463 /* Table of coding category masks.  Nth element is a mask for a coding
 464    category of which priority is Nth.  */
 465 static
 466 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 467
 468 /* Flag to tell if we look up translation table on character code
 469    conversion.  */
 470 Lisp_Object Venable_character_translation;
 471 /* Standard translation table to look up on decoding (reading).  */
 472 Lisp_Object Vstandard_translation_table_for_decode;
 473 /* Standard translation table to look up on encoding (writing).  */
 474 Lisp_Object Vstandard_translation_table_for_encode;
 475
 476 Lisp_Object Qtranslation_table;
 477 Lisp_Object Qtranslation_table_id;
 478 Lisp_Object Qtranslation_table_for_decode;
 479 Lisp_Object Qtranslation_table_for_encode;
 480
 481 /* Alist of charsets vs revision number.  */
 482 Lisp_Object Vcharset_revision_alist;
 483
 484 /* Default coding systems used for process I/O.  */
 485 Lisp_Object Vdefault_process_coding_system;
 486
 487 /* Global flag to tell that we can't call post-read-conversion and
 488    pre-write-conversion functions.  Usually the value is zero, but it
 489    is set to 1 temporarily while such functions are running.  This is
 490    to avoid infinite recursive call.  */
 491 static int inhibit_pre_post_conversion;
 492
 493 /* Char-table containing safe coding systems of each character.  */
 494 Lisp_Object Vchar_coding_system_table;
 495 Lisp_Object Qchar_coding_system;
 496
 497 /* Return `safe-chars' property of coding system CODING.  Don't check
 498    validity of CODING.  */
 499
 500 Lisp_Object
 501 coding_safe_chars (coding)
 502      struct coding_system *coding;
 503 {
 504   Lisp_Object coding_spec, plist, safe_chars;
 505
 506   coding_spec = Fget (coding->symbol, Qcoding_system);
 507   plist = XVECTOR (coding_spec)->contents[3];
 508   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 509   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 510 }
 511
 512 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 513   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 514
 515 \f
 516 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 517
 518 /* Emacs' internal format for representation of multiple character
 519    sets is a kind of multi-byte encoding, i.e. characters are
 520    represented by variable-length sequences of one-byte codes.
 521
 522    ASCII characters and control characters (e.g. `tab', `newline') are
 523    represented by one-byte sequences which are their ASCII codes, in
 524    the range 0x00 through 0x7F.
 525
 526    8-bit characters of the range 0x80..0x9F are represented by
 527    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 528    code + 0x20).
 529
 530    8-bit characters of the range 0xA0..0xFF are represented by
 531    one-byte sequences which are their 8-bit code.
 532
 533    The other characters are represented by a sequence of `base
 534    leading-code', optional `extended leading-code', and one or two
 535    `position-code's.  The length of the sequence is determined by the
 536    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 537    whereas extended leading-code and position-code take the range 0xA0
 538    through 0xFF.  See `charset.h' for more details about leading-code
 539    and position-code.
 540
 541    --- CODE RANGE of Emacs' internal format ---
 542    character set        range
 543    -------------        -----
 544    ascii                0x00..0x7F
 545    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 546    eight-bit-graphic    0xA0..0xBF
 547    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 548    ---------------------------------------------
 549
 550    As this is the internal character representation, the format is
 551    usually not used externally (i.e. in a file or in a data sent to a
 552    process).  But, it is possible to have a text externally in this
 553    format (i.e. by encoding by the coding system `emacs-mule').
 554
 555    In that case, a sequence of one-byte codes has a slightly different
 556    form.
 557
 558    At first, all characters in eight-bit-control are represented by
 559    one-byte sequences which are their 8-bit code.
 560
 561    Next, character composition data are represented by the byte
 562    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 563    where,
 564         METHOD is 0xF0 plus one of composition method (enum
 565         composition_method),
 566
 567         BYTES is 0x20 plus a byte length of this composition data,
 568
 569         CHARS is 0x20 plus a number of characters composed by this
 570         data,
 571
 572         COMPONENTs are characters of multibyte form or composition
 573         rules encoded by two-byte of ASCII codes.
 574
 575    In addition, for backward compatibility, the following formats are
 576    also recognized as composition data on decoding.
 577
 578    0x80 MSEQ ...
 579    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 580
 581    Here,
 582         MSEQ is a multibyte form but in these special format:
 583           ASCII: 0xA0 ASCII_CODE+0x80,
 584           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 585         RULE is a one byte code of the range 0xA0..0xF0 that
 586         represents a composition rule.
 587   */
 588
 589 enum emacs_code_class_type emacs_code_class[256];
 590
 591 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 592    Check if a text is encoded in Emacs' internal format.  If it is,
 593    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 594
 595 static int
 596 detect_coding_emacs_mule (src, src_end, multibytep)
 597       unsigned char *src, *src_end;
 598       int multibytep;
 599 {
 600   unsigned char c;
 601   int composing = 0;
 602   /* Dummy for ONE_MORE_BYTE.  */
 603   struct coding_system dummy_coding;
 604   struct coding_system *coding = &dummy_coding;
 605
 606   while (1)
 607     {
 608       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 609
 610       if (composing)
 611         {
 612           if (c < 0xA0)
 613             composing = 0;
 614           else if (c == 0xA0)
 615             {
 616               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 617               c &= 0x7F;
 618             }
 619           else
 620             c -= 0x20;
 621         }
 622
 623       if (c < 0x20)
 624         {
 625           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 626             return 0;
 627         }
 628       else if (c >= 0x80 && c < 0xA0)
 629         {
 630           if (c == 0x80)
 631             /* Old leading code for a composite character.  */
 632             composing = 1;
 633           else
 634             {
 635               unsigned char *src_base = src - 1;
 636               int bytes;
 637
 638               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 639                                                bytes))
 640                 return 0;
 641               src = src_base + bytes;
 642             }
 643         }
 644     }
 645  label_end_of_loop:
 646   return CODING_CATEGORY_MASK_EMACS_MULE;
 647 }
 648
 649
 650 /* Record the starting position START and METHOD of one composition.  */
 651
 652 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 653   do {                                                          \
 654     struct composition_data *cmp_data = coding->cmp_data;       \
 655     int *data = cmp_data->data + cmp_data->used;                \
 656     coding->cmp_data_start = cmp_data->used;                    \
 657     data[0] = -1;                                               \
 658     data[1] = cmp_data->char_offset + start;                    \
 659     data[3] = (int) method;                                     \
 660     cmp_data->used += 4;                                        \
 661   } while (0)
 662
 663 /* Record the ending position END of the current composition.  */
 664
 665 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 666   do {                                                          \
 667     struct composition_data *cmp_data = coding->cmp_data;       \
 668     int *data = cmp_data->data + coding->cmp_data_start;        \
 669     data[0] = cmp_data->used - coding->cmp_data_start;          \
 670     data[2] = cmp_data->char_offset + end;                      \
 671   } while (0)
 672
 673 /* Record one COMPONENT (alternate character or composition rule).  */
 674
 675 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
 676   (coding->cmp_data->data[coding->cmp_data->used++] = component)
 677
 678
 679 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 680    is not less than SRC_END, return -1 without incrementing Src.  */
 681
 682 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 683
 684
 685 /* Decode a character represented as a component of composition
 686    sequence of Emacs 20 style at SRC.  Set C to that character, store
 687    its multibyte form sequence at P, and set P to the end of that
 688    sequence.  If no valid character is found, set C to -1.  */
 689
 690 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 691   do {                                                          \
 692     int bytes;                                                  \
 693                                                                 \
 694     c = SAFE_ONE_MORE_BYTE ();                                  \
 695     if (c < 0)                                                  \
 696       break;                                                    \
 697     if (CHAR_HEAD_P (c))                                        \
 698       c = -1;                                                   \
 699     else if (c == 0xA0)                                         \
 700       {                                                         \
 701         c = SAFE_ONE_MORE_BYTE ();                              \
 702         if (c < 0xA0)                                           \
 703           c = -1;                                               \
 704         else                                                    \
 705           {                                                     \
 706             c -= 0xA0;                                          \
 707             *p++ = c;                                           \
 708           }                                                     \
 709       }                                                         \
 710     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 711       {                                                         \
 712         unsigned char *p0 = p;                                  \
 713                                                                 \
 714         c -= 0x20;                                              \
 715         *p++ = c;                                               \
 716         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 717         while (--bytes)                                         \
 718           {                                                     \
 719             c = SAFE_ONE_MORE_BYTE ();                          \
 720             if (c < 0)                                          \
 721               break;                                            \
 722             *p++ = c;                                           \
 723           }                                                     \
 724         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 725           c = STRING_CHAR (p0, bytes);                          \
 726         else                                                    \
 727           c = -1;                                               \
 728       }                                                         \
 729     else                                                        \
 730       c = -1;                                                   \
 731   } while (0)
 732
 733
 734 /* Decode a composition rule represented as a component of composition
 735    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 736    valid rule is found, set C to -1.  */
 737
 738 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 739   do {                                                  \
 740     c = SAFE_ONE_MORE_BYTE ();                          \
 741     c -= 0xA0;                                          \
 742     if (c < 0 || c >= 81)                               \
 743       c = -1;                                           \
 744     else                                                \
 745       {                                                 \
 746         gref = c / 9, nref = c % 9;                     \
 747         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 748       }                                                 \
 749   } while (0)
 750
 751
 752 /* Decode composition sequence encoded by `emacs-mule' at the source
 753    pointed by SRC.  SRC_END is the end of source.  Store information
 754    of the composition in CODING->cmp_data.
 755
 756    For backward compatibility, decode also a composition sequence of
 757    Emacs 20 style.  In that case, the composition sequence contains
 758    characters that should be extracted into a buffer or string.  Store
 759    those characters at *DESTINATION in multibyte form.
 760
 761    If we encounter an invalid byte sequence, return 0.
 762    If we encounter an insufficient source or destination, or
 763    insufficient space in CODING->cmp_data, return 1.
 764    Otherwise, return consumed bytes in the source.
 765
 766 */
 767 static INLINE int
 768 decode_composition_emacs_mule (coding, src, src_end,
 769                                destination, dst_end, dst_bytes)
 770      struct coding_system *coding;
 771      unsigned char *src, *src_end, **destination, *dst_end;
 772      int dst_bytes;
 773 {
 774   unsigned char *dst = *destination;
 775   int method, data_len, nchars;
 776   unsigned char *src_base = src++;
 777   /* Store components of composition.  */
 778   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 779   int ncomponent;
 780   /* Store multibyte form of characters to be composed.  This is for
 781      Emacs 20 style composition sequence.  */
 782   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 783   unsigned char *bufp = buf;
 784   int c, i, gref, nref;
 785
 786   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 787       >= COMPOSITION_DATA_SIZE)
 788     {
 789       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 790       return -1;
 791     }
 792
 793   ONE_MORE_BYTE (c);
 794   if (c - 0xF0 >= COMPOSITION_RELATIVE
 795            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 796     {
 797       int with_rule;
 798
 799       method = c - 0xF0;
 800       with_rule = (method == COMPOSITION_WITH_RULE
 801                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 802       ONE_MORE_BYTE (c);
 803       data_len = c - 0xA0;
 804       if (data_len < 4
 805           || src_base + data_len > src_end)
 806         return 0;
 807       ONE_MORE_BYTE (c);
 808       nchars = c - 0xA0;
 809       if (c < 1)
 810         return 0;
 811       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 812         {
 813           if (ncomponent % 2 && with_rule)
 814             {
 815               ONE_MORE_BYTE (gref);
 816               gref -= 32;
 817               ONE_MORE_BYTE (nref);
 818               nref -= 32;
 819               c = COMPOSITION_ENCODE_RULE (gref, nref);
 820             }
 821           else
 822             {
 823               int bytes;
 824               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 825                 c = STRING_CHAR (src, bytes);
 826               else
 827                 c = *src, bytes = 1;
 828               src += bytes;
 829             }
 830           component[ncomponent] = c;
 831         }
 832     }
 833   else
 834     {
 835       /* This may be an old Emacs 20 style format.  See the comment at
 836          the section 2 of this file.  */
 837       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 838       if (src == src_end
 839           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 840         goto label_end_of_loop;
 841
 842       src_end = src;
 843       src = src_base + 1;
 844       if (c < 0xC0)
 845         {
 846           method = COMPOSITION_RELATIVE;
 847           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 848             {
 849               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 850               if (c < 0)
 851                 break;
 852               component[ncomponent++] = c;
 853             }
 854           if (ncomponent < 2)
 855             return 0;
 856           nchars = ncomponent;
 857         }
 858       else if (c == 0xFF)
 859         {
 860           method = COMPOSITION_WITH_RULE;
 861           src++;
 862           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 863           if (c < 0)
 864             return 0;
 865           component[0] = c;
 866           for (ncomponent = 1;
 867                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 868             {
 869               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 870               if (c < 0)
 871                 break;
 872               component[ncomponent++] = c;
 873               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 874               if (c < 0)
 875                 break;
 876               component[ncomponent++] = c;
 877             }
 878           if (ncomponent < 3)
 879             return 0;
 880           nchars = (ncomponent + 1) / 2;
 881         }
 882       else
 883         return 0;
 884     }
 885
 886   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 887     {
 888       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 889       for (i = 0; i < ncomponent; i++)
 890         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 891       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 892       if (buf < bufp)
 893         {
 894           unsigned char *p = buf;
 895           EMIT_BYTES (p, bufp);
 896           *destination += bufp - buf;
 897           coding->produced_char += nchars;
 898         }
 899       return (src - src_base);
 900     }
 901  label_end_of_loop:
 902   return -1;
 903 }
 904
 905 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 906
 907 static void
 908 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 909      struct coding_system *coding;
 910      unsigned char *source, *destination;
 911      int src_bytes, dst_bytes;
 912 {
 913   unsigned char *src = source;
 914   unsigned char *src_end = source + src_bytes;
 915   unsigned char *dst = destination;
 916   unsigned char *dst_end = destination + dst_bytes;
 917   /* SRC_BASE remembers the start position in source in each loop.
 918      The loop will be exited when there's not enough source code, or
 919      when there's not enough destination area to produce a
 920      character.  */
 921   unsigned char *src_base;
 922
 923   coding->produced_char = 0;
 924   while ((src_base = src) < src_end)
 925     {
 926       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 927       int bytes;
 928
 929       if (*src == '\r')
 930         {
 931           int c = *src++;
 932
 933           if (coding->eol_type == CODING_EOL_CR)
 934             c = '\n';
 935           else if (coding->eol_type == CODING_EOL_CRLF)
 936             {
 937               ONE_MORE_BYTE (c);
 938               if (c != '\n')
 939                 {
 940                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 941                     {
 942                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 943                       goto label_end_of_loop;
 944                     }
 945                   src--;
 946                   c = '\r';
 947                 }
 948             }
 949           *dst++ = c;
 950           coding->produced_char++;
 951           continue;
 952         }
 953       else if (*src == '\n')
 954         {
 955           if ((coding->eol_type == CODING_EOL_CR
 956                || coding->eol_type == CODING_EOL_CRLF)
 957               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 958             {
 959               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 960               goto label_end_of_loop;
 961             }
 962           *dst++ = *src++;
 963           coding->produced_char++;
 964           continue;
 965         }
 966       else if (*src == 0x80)
 967         {
 968           /* Start of composition data.  */
 969           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 970                                                          &dst, dst_end,
 971                                                          dst_bytes);
 972           if (consumed < 0)
 973             goto label_end_of_loop;
 974           else if (consumed > 0)
 975             {
 976               src += consumed;
 977               continue;
 978             }
 979           bytes = CHAR_STRING (*src, tmp);
 980           p = tmp;
 981           src++;
 982         }
 983       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 984         {
 985           p = src;
 986           src += bytes;
 987         }
 988       else
 989         {
 990           bytes = CHAR_STRING (*src, tmp);
 991           p = tmp;
 992           src++;
 993         }
 994       if (dst + bytes >= (dst_bytes ? dst_end : src))
 995         {
 996           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 997           break;
 998         }
 999       while (bytes--) *dst++ = *p++;
1000       coding->produced_char++;
1001     }
1002  label_end_of_loop:
1003   coding->consumed = coding->consumed_char = src_base - source;
1004   coding->produced = dst - destination;
1005 }
1006
1007
1008 /* Encode composition data stored at DATA into a special byte sequence
1009    starting by 0x80.  Update CODING->cmp_data_start and maybe
1010    CODING->cmp_data for the next call.  */
1011
1012 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1013   do {                                                                  \
1014     unsigned char buf[1024], *p0 = buf, *p;                             \
1015     int len = data[0];                                                  \
1016     int i;                                                              \
1017                                                                         \
1018     buf[0] = 0x80;                                                      \
1019     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1020     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1021     p = buf + 4;                                                        \
1022     if (data[3] == COMPOSITION_WITH_RULE                                \
1023         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1024       {                                                                 \
1025         p += CHAR_STRING (data[4], p);                                  \
1026         for (i = 5; i < len; i += 2)                                    \
1027           {                                                             \
1028             int gref, nref;                                             \
1029              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1030             *p++ = 0x20 + gref;                                         \
1031             *p++ = 0x20 + nref;                                         \
1032             p += CHAR_STRING (data[i + 1], p);                          \
1033           }                                                             \
1034       }                                                                 \
1035     else                                                                \
1036       {                                                                 \
1037         for (i = 4; i < len; i++)                                       \
1038           p += CHAR_STRING (data[i], p);                                \
1039       }                                                                 \
1040     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1041                                                                         \
1042     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1043       {                                                                 \
1044         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1045         goto label_end_of_loop;                                         \
1046       }                                                                 \
1047     while (p0 < p)                                                      \
1048       *dst++ = *p0++;                                                   \
1049     coding->cmp_data_start += data[0];                                  \
1050     if (coding->cmp_data_start == coding->cmp_data->used                \
1051         && coding->cmp_data->next)                                      \
1052       {                                                                 \
1053         coding->cmp_data = coding->cmp_data->next;                      \
1054         coding->cmp_data_start = 0;                                     \
1055       }                                                                 \
1056   } while (0)
1057
1058
1059 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1060                             unsigned char *, int, int));
1061
1062 static void
1063 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1064      struct coding_system *coding;
1065      unsigned char *source, *destination;
1066      int src_bytes, dst_bytes;
1067 {
1068   unsigned char *src = source;
1069   unsigned char *src_end = source + src_bytes;
1070   unsigned char *dst = destination;
1071   unsigned char *dst_end = destination + dst_bytes;
1072   unsigned char *src_base;
1073   int c;
1074   int char_offset;
1075   int *data;
1076
1077   Lisp_Object translation_table;
1078
1079   translation_table = Qnil;
1080
1081   /* Optimization for the case that there's no composition.  */
1082   if (!coding->cmp_data || coding->cmp_data->used == 0)
1083     {
1084       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1085       return;
1086     }
1087
1088   char_offset = coding->cmp_data->char_offset;
1089   data = coding->cmp_data->data + coding->cmp_data_start;
1090   while (1)
1091     {
1092       src_base = src;
1093
1094       /* If SRC starts a composition, encode the information about the
1095          composition in advance.  */
1096       if (coding->cmp_data_start < coding->cmp_data->used
1097           && char_offset + coding->consumed_char == data[1])
1098         {
1099           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1100           char_offset = coding->cmp_data->char_offset;
1101           data = coding->cmp_data->data + coding->cmp_data_start;
1102         }
1103
1104       ONE_MORE_CHAR (c);
1105       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1106                         || coding->eol_type == CODING_EOL_CR))
1107         {
1108           if (coding->eol_type == CODING_EOL_CRLF)
1109             EMIT_TWO_BYTES ('\r', c);
1110           else
1111             EMIT_ONE_BYTE ('\r');
1112         }
1113       else if (SINGLE_BYTE_CHAR_P (c))
1114         EMIT_ONE_BYTE (c);
1115       else
1116         EMIT_BYTES (src_base, src);
1117       coding->consumed_char++;
1118     }
1119  label_end_of_loop:
1120   coding->consumed = src_base - source;
1121   coding->produced = coding->produced_char = dst - destination;
1122   return;
1123 }
1124
1125 \f
1126 /*** 3. ISO2022 handlers ***/
1127
1128 /* The following note describes the coding system ISO2022 briefly.
1129    Since the intention of this note is to help understand the
1130    functions in this file, some parts are NOT ACCURATE or are OVERLY
1131    SIMPLIFIED.  For thorough understanding, please refer to the
1132    original document of ISO2022.  This is equivalent to the standard
1133    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1134
1135    ISO2022 provides many mechanisms to encode several character sets
1136    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1137    is encoded using bytes less than 128.  This may make the encoded
1138    text a little bit longer, but the text passes more easily through
1139    several types of gateway, some of which strip off the MSB (Most
1140    Significant Bit).
1141
1142    There are two kinds of character sets: control character sets and
1143    graphic character sets.  The former contain control characters such
1144    as `newline' and `escape' to provide control functions (control
1145    functions are also provided by escape sequences).  The latter
1146    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1147    two control character sets and many graphic character sets.
1148
1149    Graphic character sets are classified into one of the following
1150    four classes, according to the number of bytes (DIMENSION) and
1151    number of characters in one dimension (CHARS) of the set:
1152    - DIMENSION1_CHARS94
1153    - DIMENSION1_CHARS96
1154    - DIMENSION2_CHARS94
1155    - DIMENSION2_CHARS96
1156
1157    In addition, each character set is assigned an identification tag,
1158    unique for each set, called the "final character" (denoted as <F>
1159    hereafter).  The <F> of each character set is decided by ECMA(*)
1160    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1161    (0x30..0x3F are for private use only).
1162
1163    Note (*): ECMA = European Computer Manufacturers Association
1164
1165    Here are examples of graphic character sets [NAME(<F>)]:
1166         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1167         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1168         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1169         o DIMENSION2_CHARS96 -- none for the moment
1170
1171    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1172         C0 [0x00..0x1F] -- control character plane 0
1173         GL [0x20..0x7F] -- graphic character plane 0
1174         C1 [0x80..0x9F] -- control character plane 1
1175         GR [0xA0..0xFF] -- graphic character plane 1
1176
1177    A control character set is directly designated and invoked to C0 or
1178    C1 by an escape sequence.  The most common case is that:
1179    - ISO646's  control character set is designated/invoked to C0, and
1180    - ISO6429's control character set is designated/invoked to C1,
1181    and usually these designations/invocations are omitted in encoded
1182    text.  In a 7-bit environment, only C0 can be used, and a control
1183    character for C1 is encoded by an appropriate escape sequence to
1184    fit into the environment.  All control characters for C1 are
1185    defined to have corresponding escape sequences.
1186
1187    A graphic character set is at first designated to one of four
1188    graphic registers (G0 through G3), then these graphic registers are
1189    invoked to GL or GR.  These designations and invocations can be
1190    done independently.  The most common case is that G0 is invoked to
1191    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1192    these invocations and designations are omitted in encoded text.
1193    In a 7-bit environment, only GL can be used.
1194
1195    When a graphic character set of CHARS94 is invoked to GL, codes
1196    0x20 and 0x7F of the GL area work as control characters SPACE and
1197    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1198    be used.
1199
1200    There are two ways of invocation: locking-shift and single-shift.
1201    With locking-shift, the invocation lasts until the next different
1202    invocation, whereas with single-shift, the invocation affects the
1203    following character only and doesn't affect the locking-shift
1204    state.  Invocations are done by the following control characters or
1205    escape sequences:
1206
1207    ----------------------------------------------------------------------
1208    abbrev  function                  cntrl escape seq   description
1209    ----------------------------------------------------------------------
1210    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1211    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1212    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1213    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1214    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1215    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1216    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1217    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1218    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1219    ----------------------------------------------------------------------
1220    (*) These are not used by any known coding system.
1221
1222    Control characters for these functions are defined by macros
1223    ISO_CODE_XXX in `coding.h'.
1224
1225    Designations are done by the following escape sequences:
1226    ----------------------------------------------------------------------
1227    escape sequence      description
1228    ----------------------------------------------------------------------
1229    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1230    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1231    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1232    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1233    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1234    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1235    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1236    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1237    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1238    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1239    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1240    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1241    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1242    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1243    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1244    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1245    ----------------------------------------------------------------------
1246
1247    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1248    of dimension 1, chars 94, and final character <F>, etc...
1249
1250    Note (*): Although these designations are not allowed in ISO2022,
1251    Emacs accepts them on decoding, and produces them on encoding
1252    CHARS96 character sets in a coding system which is characterized as
1253    7-bit environment, non-locking-shift, and non-single-shift.
1254
1255    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1256    '(' can be omitted.  We refer to this as "short-form" hereafter.
1257
1258    Now you may notice that there are a lot of ways of encoding the
1259    same multilingual text in ISO2022.  Actually, there exist many
1260    coding systems such as Compound Text (used in X11's inter client
1261    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1262    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1263    localized platforms), and all of these are variants of ISO2022.
1264
1265    In addition to the above, Emacs handles two more kinds of escape
1266    sequences: ISO6429's direction specification and Emacs' private
1267    sequence for specifying character composition.
1268
1269    ISO6429's direction specification takes the following form:
1270         o CSI ']'      -- end of the current direction
1271         o CSI '0' ']'  -- end of the current direction
1272         o CSI '1' ']'  -- start of left-to-right text
1273         o CSI '2' ']'  -- start of right-to-left text
1274    The control character CSI (0x9B: control sequence introducer) is
1275    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1276
1277    Character composition specification takes the following form:
1278         o ESC '0' -- start relative composition
1279         o ESC '1' -- end composition
1280         o ESC '2' -- start rule-base composition (*)
1281         o ESC '3' -- start relative composition with alternate chars  (**)
1282         o ESC '4' -- start rule-base composition with alternate chars  (**)
1283   Since these are not standard escape sequences of any ISO standard,
1284   the use of them with these meanings is restricted to Emacs only.
1285
1286   (*) This form is used only in Emacs 20.5 and older versions,
1287   but the newer versions can safely decode it.
1288   (**) This form is used only in Emacs 21.1 and newer versions,
1289   and the older versions can't decode it.
1290
1291   Here's a list of example usages of these composition escape
1292   sequences (categorized by `enum composition_method').
1293
1294   COMPOSITION_RELATIVE:
1295         ESC 0 CHAR [ CHAR ] ESC 1
1296   COMPOSITION_WITH_RULE:
1297         ESC 2 CHAR [ RULE CHAR ] ESC 1
1298   COMPOSITION_WITH_ALTCHARS:
1299         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1300   COMPOSITION_WITH_RULE_ALTCHARS:
1301         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1302
1303 enum iso_code_class_type iso_code_class[256];
1304
1305 #define CHARSET_OK(idx, charset, c)                                     \
1306   (coding_system_table[idx]                                             \
1307    && (charset == CHARSET_ASCII                                         \
1308        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
1309            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1310    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1311                                               charset)                  \
1312        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1313
1314 #define SHIFT_OUT_OK(idx) \
1315   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1316
1317 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1318    Check if a text is encoded in ISO2022.  If it is, return an
1319    integer in which appropriate flag bits any of:
1320         CODING_CATEGORY_MASK_ISO_7
1321         CODING_CATEGORY_MASK_ISO_7_TIGHT
1322         CODING_CATEGORY_MASK_ISO_8_1
1323         CODING_CATEGORY_MASK_ISO_8_2
1324         CODING_CATEGORY_MASK_ISO_7_ELSE
1325         CODING_CATEGORY_MASK_ISO_8_ELSE
1326    are set.  If a code which should never appear in ISO2022 is found,
1327    returns 0.  */
1328
1329 static int
1330 detect_coding_iso2022 (src, src_end, multibytep)
1331      unsigned char *src, *src_end;
1332      int multibytep;
1333 {
1334   int mask = CODING_CATEGORY_MASK_ISO;
1335   int mask_found = 0;
1336   int reg[4], shift_out = 0, single_shifting = 0;
1337   int c, c1, charset;
1338   /* Dummy for ONE_MORE_BYTE.  */
1339   struct coding_system dummy_coding;
1340   struct coding_system *coding = &dummy_coding;
1341   Lisp_Object safe_chars;
1342
1343   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1344   while (mask && src < src_end)
1345     {
1346       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1347       switch (c)
1348         {
1349         case ISO_CODE_ESC:
1350           if (inhibit_iso_escape_detection)
1351             break;
1352           single_shifting = 0;
1353           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1354           if (c >= '(' && c <= '/')
1355             {
1356               /* Designation sequence for a charset of dimension 1.  */
1357               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1358               if (c1 < ' ' || c1 >= 0x80
1359                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1360                 /* Invalid designation sequence.  Just ignore.  */
1361                 break;
1362               reg[(c - '(') % 4] = charset;
1363             }
1364           else if (c == '$')
1365             {
1366               /* Designation sequence for a charset of dimension 2.  */
1367               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1368               if (c >= '@' && c <= 'B')
1369                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1370                 reg[0] = charset = iso_charset_table[1][0][c];
1371               else if (c >= '(' && c <= '/')
1372                 {
1373                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1374                   if (c1 < ' ' || c1 >= 0x80
1375                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1376                     /* Invalid designation sequence.  Just ignore.  */
1377                     break;
1378                   reg[(c - '(') % 4] = charset;
1379                 }
1380               else
1381                 /* Invalid designation sequence.  Just ignore.  */
1382                 break;
1383             }
1384           else if (c == 'N' || c == 'O')
1385             {
1386               /* ESC <Fe> for SS2 or SS3.  */
1387               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1388               break;
1389             }
1390           else if (c >= '0' && c <= '4')
1391             {
1392               /* ESC <Fp> for start/end composition.  */
1393               mask_found |= CODING_CATEGORY_MASK_ISO;
1394               break;
1395             }
1396           else
1397             /* Invalid escape sequence.  Just ignore.  */
1398             break;
1399
1400           /* We found a valid designation sequence for CHARSET.  */
1401           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1402           c = MAKE_CHAR (charset, 0, 0);
1403           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1404             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1405           else
1406             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1407           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1408             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1409           else
1410             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1411           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1412             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1413           else
1414             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1415           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1416             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1417           else
1418             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1419           break;
1420
1421         case ISO_CODE_SO:
1422           if (inhibit_iso_escape_detection)
1423             break;
1424           single_shifting = 0;
1425           if (shift_out == 0
1426               && (reg[1] >= 0
1427                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1428                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1429             {
1430               /* Locking shift out.  */
1431               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1432               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1433             }
1434           break;
1435
1436         case ISO_CODE_SI:
1437           if (inhibit_iso_escape_detection)
1438             break;
1439           single_shifting = 0;
1440           if (shift_out == 1)
1441             {
1442               /* Locking shift in.  */
1443               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1444               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1445             }
1446           break;
1447
1448         case ISO_CODE_CSI:
1449           single_shifting = 0;
1450         case ISO_CODE_SS2:
1451         case ISO_CODE_SS3:
1452           {
1453             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1454
1455             if (inhibit_iso_escape_detection)
1456               break;
1457             if (c != ISO_CODE_CSI)
1458               {
1459                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1460                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1461                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1462                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1463                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1464                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1465                 single_shifting = 1;
1466               }
1467             if (VECTORP (Vlatin_extra_code_table)
1468                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1469               {
1470                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1471                     & CODING_FLAG_ISO_LATIN_EXTRA)
1472                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1473                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1474                     & CODING_FLAG_ISO_LATIN_EXTRA)
1475                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1476               }
1477             mask &= newmask;
1478             mask_found |= newmask;
1479           }
1480           break;
1481
1482         default:
1483           if (c < 0x80)
1484             {
1485               single_shifting = 0;
1486               break;
1487             }
1488           else if (c < 0xA0)
1489             {
1490               single_shifting = 0;
1491               if (VECTORP (Vlatin_extra_code_table)
1492                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1493                 {
1494                   int newmask = 0;
1495
1496                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1497                       & CODING_FLAG_ISO_LATIN_EXTRA)
1498                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1499                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1500                       & CODING_FLAG_ISO_LATIN_EXTRA)
1501                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1502                   mask &= newmask;
1503                   mask_found |= newmask;
1504                 }
1505               else
1506                 return 0;
1507             }
1508           else
1509             {
1510               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1511                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1512               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1513               /* Check the length of succeeding codes of the range
1514                  0xA0..0FF.  If the byte length is odd, we exclude
1515                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1516                  when we are not single shifting.  */
1517               if (!single_shifting
1518                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1519                 {
1520                   int i = 1;
1521                   while (src < src_end)
1522                     {
1523                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1524                       if (c < 0xA0)
1525                         break;
1526                       i++;
1527                     }
1528
1529                   if (i & 1 && src < src_end)
1530                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1531                   else
1532                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1533                 }
1534             }
1535           break;
1536         }
1537     }
1538  label_end_of_loop:
1539   return (mask & mask_found);
1540 }
1541
1542 /* Decode a character of which charset is CHARSET, the 1st position
1543    code is C1, the 2nd position code is C2, and return the decoded
1544    character code.  If the variable `translation_table' is non-nil,
1545    returned the translated code.  */
1546
1547 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1548   (NILP (translation_table)                     \
1549    ? MAKE_CHAR (charset, c1, c2)                \
1550    : translate_char (translation_table, -1, charset, c1, c2))
1551
1552 /* Set designation state into CODING.  */
1553 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1554   do {                                                                     \
1555     int charset, c;                                                        \
1556                                                                            \
1557     if (final_char < '0' || final_char >= 128)                             \
1558       goto label_invalid_code;                                             \
1559     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1560                                  make_number (chars),                      \
1561                                  make_number (final_char));                \
1562     c = MAKE_CHAR (charset, 0, 0);                                         \
1563     if (charset >= 0                                                       \
1564         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1565             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1566       {                                                                    \
1567         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1568             && reg == 0                                                    \
1569             && charset == CHARSET_ASCII)                                   \
1570           {                                                                \
1571             /* We should insert this designation sequence as is so         \
1572                that it is surely written back to a file.  */               \
1573             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1574             goto label_invalid_code;                                       \
1575           }                                                                \
1576         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1577         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1578             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1579           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1580         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1581       }                                                                    \
1582     else                                                                   \
1583       {                                                                    \
1584         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1585         goto label_invalid_code;                                           \
1586       }                                                                    \
1587   } while (0)
1588
1589 /* Allocate a memory block for storing information about compositions.
1590    The block is chained to the already allocated blocks.  */
1591
1592 void
1593 coding_allocate_composition_data (coding, char_offset)
1594      struct coding_system *coding;
1595      int char_offset;
1596 {
1597   struct composition_data *cmp_data
1598     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1599
1600   cmp_data->char_offset = char_offset;
1601   cmp_data->used = 0;
1602   cmp_data->prev = coding->cmp_data;
1603   cmp_data->next = NULL;
1604   if (coding->cmp_data)
1605     coding->cmp_data->next = cmp_data;
1606   coding->cmp_data = cmp_data;
1607   coding->cmp_data_start = 0;
1608 }
1609
1610 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1611    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1612    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1613    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1614    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1615   */
1616
1617 #define DECODE_COMPOSITION_START(c1)                                       \
1618   do {                                                                     \
1619     if (coding->composing == COMPOSITION_DISABLED)                         \
1620       {                                                                    \
1621         *dst++ = ISO_CODE_ESC;                                             \
1622         *dst++ = c1 & 0x7f;                                                \
1623         coding->produced_char += 2;                                        \
1624       }                                                                    \
1625     else if (!COMPOSING_P (coding))                                        \
1626       {                                                                    \
1627         /* This is surely the start of a composition.  We must be sure     \
1628            that coding->cmp_data has enough space to store the             \
1629            information about the composition.  If not, terminate the       \
1630            current decoding loop, allocate one more memory block for       \
1631            coding->cmp_data in the caller, then start the decoding         \
1632            loop again.  We can't allocate memory here directly because     \
1633            it may cause buffer/string relocation.  */                      \
1634         if (!coding->cmp_data                                              \
1635             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1636                 >= COMPOSITION_DATA_SIZE))                                 \
1637           {                                                                \
1638             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1639             goto label_end_of_loop;                                        \
1640           }                                                                \
1641         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1642                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1643                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1644                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1645         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1646                                       coding->composing);                  \
1647         coding->composition_rule_follows = 0;                              \
1648       }                                                                    \
1649     else                                                                   \
1650       {                                                                    \
1651         /* We are already handling a composition.  If the method is        \
1652            the following two, the codes following the current escape       \
1653            sequence are actual characters stored in a buffer.  */          \
1654         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1655             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1656           {                                                                \
1657             coding->composing = COMPOSITION_RELATIVE;                      \
1658             coding->composition_rule_follows = 0;                          \
1659           }                                                                \
1660       }                                                                    \
1661   } while (0)
1662
1663 /* Handle composition end sequence ESC 1.  */
1664
1665 #define DECODE_COMPOSITION_END(c1)                                      \
1666   do {                                                                  \
1667     if (coding->composing == COMPOSITION_DISABLED)                      \
1668       {                                                                 \
1669         *dst++ = ISO_CODE_ESC;                                          \
1670         *dst++ = c1;                                                    \
1671         coding->produced_char += 2;                                     \
1672       }                                                                 \
1673     else                                                                \
1674       {                                                                 \
1675         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1676         coding->composing = COMPOSITION_NO;                             \
1677       }                                                                 \
1678   } while (0)
1679
1680 /* Decode a composition rule from the byte C1 (and maybe one more byte
1681    from SRC) and store one encoded composition rule in
1682    coding->cmp_data.  */
1683
1684 #define DECODE_COMPOSITION_RULE(c1)                                     \
1685   do {                                                                  \
1686     int rule = 0;                                                       \
1687     (c1) -= 32;                                                         \
1688     if (c1 < 81)                /* old format (before ver.21) */        \
1689       {                                                                 \
1690         int gref = (c1) / 9;                                            \
1691         int nref = (c1) % 9;                                            \
1692         if (gref == 4) gref = 10;                                       \
1693         if (nref == 4) nref = 10;                                       \
1694         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1695       }                                                                 \
1696     else if (c1 < 93)           /* new format (after ver.21) */         \
1697       {                                                                 \
1698         ONE_MORE_BYTE (c2);                                             \
1699         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1700       }                                                                 \
1701     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1702     coding->composition_rule_follows = 0;                               \
1703   } while (0)
1704
1705
1706 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1707
1708 static void
1709 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1710      struct coding_system *coding;
1711      unsigned char *source, *destination;
1712      int src_bytes, dst_bytes;
1713 {
1714   unsigned char *src = source;
1715   unsigned char *src_end = source + src_bytes;
1716   unsigned char *dst = destination;
1717   unsigned char *dst_end = destination + dst_bytes;
1718   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1719   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1720   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1721   /* SRC_BASE remembers the start position in source in each loop.
1722      The loop will be exited when there's not enough source code
1723      (within macro ONE_MORE_BYTE), or when there's not enough
1724      destination area to produce a character (within macro
1725      EMIT_CHAR).  */
1726   unsigned char *src_base;
1727   int c, charset;
1728   Lisp_Object translation_table;
1729   Lisp_Object safe_chars;
1730
1731   safe_chars = coding_safe_chars (coding);
1732
1733   if (NILP (Venable_character_translation))
1734     translation_table = Qnil;
1735   else
1736     {
1737       translation_table = coding->translation_table_for_decode;
1738       if (NILP (translation_table))
1739         translation_table = Vstandard_translation_table_for_decode;
1740     }
1741
1742   coding->result = CODING_FINISH_NORMAL;
1743
1744   while (1)
1745     {
1746       int c1, c2;
1747
1748       src_base = src;
1749       ONE_MORE_BYTE (c1);
1750
1751       /* We produce no character or one character.  */
1752       switch (iso_code_class [c1])
1753         {
1754         case ISO_0x20_or_0x7F:
1755           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1756             {
1757               DECODE_COMPOSITION_RULE (c1);
1758               continue;
1759             }
1760           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1761             {
1762               /* This is SPACE or DEL.  */
1763               charset = CHARSET_ASCII;
1764               break;
1765             }
1766           /* This is a graphic character, we fall down ...  */
1767
1768         case ISO_graphic_plane_0:
1769           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1770             {
1771               DECODE_COMPOSITION_RULE (c1);
1772               continue;
1773             }
1774           charset = charset0;
1775           break;
1776
1777         case ISO_0xA0_or_0xFF:
1778           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1779               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1780             goto label_invalid_code;
1781           /* This is a graphic character, we fall down ... */
1782
1783         case ISO_graphic_plane_1:
1784           if (charset1 < 0)
1785             goto label_invalid_code;
1786           charset = charset1;
1787           break;
1788
1789         case ISO_control_0:
1790           if (COMPOSING_P (coding))
1791             DECODE_COMPOSITION_END ('1');
1792
1793           /* All ISO2022 control characters in this class have the
1794              same representation in Emacs internal format.  */
1795           if (c1 == '\n'
1796               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1797               && (coding->eol_type == CODING_EOL_CR
1798                   || coding->eol_type == CODING_EOL_CRLF))
1799             {
1800               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1801               goto label_end_of_loop;
1802             }
1803           charset = CHARSET_ASCII;
1804           break;
1805
1806         case ISO_control_1:
1807           if (COMPOSING_P (coding))
1808             DECODE_COMPOSITION_END ('1');
1809           goto label_invalid_code;
1810
1811         case ISO_carriage_return:
1812           if (COMPOSING_P (coding))
1813             DECODE_COMPOSITION_END ('1');
1814
1815           if (coding->eol_type == CODING_EOL_CR)
1816             c1 = '\n';
1817           else if (coding->eol_type == CODING_EOL_CRLF)
1818             {
1819               ONE_MORE_BYTE (c1);
1820               if (c1 != ISO_CODE_LF)
1821                 {
1822                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1823                     {
1824                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1825                       goto label_end_of_loop;
1826                     }
1827                   src--;
1828                   c1 = '\r';
1829                 }
1830             }
1831           charset = CHARSET_ASCII;
1832           break;
1833
1834         case ISO_shift_out:
1835           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1836               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1837             goto label_invalid_code;
1838           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1839           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1840           continue;
1841
1842         case ISO_shift_in:
1843           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1844             goto label_invalid_code;
1845           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1846           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1847           continue;
1848
1849         case ISO_single_shift_2_7:
1850         case ISO_single_shift_2:
1851           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1852             goto label_invalid_code;
1853           /* SS2 is handled as an escape sequence of ESC 'N' */
1854           c1 = 'N';
1855           goto label_escape_sequence;
1856
1857         case ISO_single_shift_3:
1858           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1859             goto label_invalid_code;
1860           /* SS2 is handled as an escape sequence of ESC 'O' */
1861           c1 = 'O';
1862           goto label_escape_sequence;
1863
1864         case ISO_control_sequence_introducer:
1865           /* CSI is handled as an escape sequence of ESC '[' ...  */
1866           c1 = '[';
1867           goto label_escape_sequence;
1868
1869         case ISO_escape:
1870           ONE_MORE_BYTE (c1);
1871         label_escape_sequence:
1872           /* Escape sequences handled by Emacs are invocation,
1873              designation, direction specification, and character
1874              composition specification.  */
1875           switch (c1)
1876             {
1877             case '&':           /* revision of following character set */
1878               ONE_MORE_BYTE (c1);
1879               if (!(c1 >= '@' && c1 <= '~'))
1880                 goto label_invalid_code;
1881               ONE_MORE_BYTE (c1);
1882               if (c1 != ISO_CODE_ESC)
1883                 goto label_invalid_code;
1884               ONE_MORE_BYTE (c1);
1885               goto label_escape_sequence;
1886
1887             case '$':           /* designation of 2-byte character set */
1888               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1889                 goto label_invalid_code;
1890               ONE_MORE_BYTE (c1);
1891               if (c1 >= '@' && c1 <= 'B')
1892                 {       /* designation of JISX0208.1978, GB2312.1980,
1893                            or JISX0208.1980 */
1894                   DECODE_DESIGNATION (0, 2, 94, c1);
1895                 }
1896               else if (c1 >= 0x28 && c1 <= 0x2B)
1897                 {       /* designation of DIMENSION2_CHARS94 character set */
1898                   ONE_MORE_BYTE (c2);
1899                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1900                 }
1901               else if (c1 >= 0x2C && c1 <= 0x2F)
1902                 {       /* designation of DIMENSION2_CHARS96 character set */
1903                   ONE_MORE_BYTE (c2);
1904                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1905                 }
1906               else
1907                 goto label_invalid_code;
1908               /* We must update these variables now.  */
1909               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1910               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1911               continue;
1912
1913             case 'n':           /* invocation of locking-shift-2 */
1914               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1915                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1916                 goto label_invalid_code;
1917               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1918               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1919               continue;
1920
1921             case 'o':           /* invocation of locking-shift-3 */
1922               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1923                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1924                 goto label_invalid_code;
1925               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1926               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927               continue;
1928
1929             case 'N':           /* invocation of single-shift-2 */
1930               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1931                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1932                 goto label_invalid_code;
1933               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1934               ONE_MORE_BYTE (c1);
1935               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1936                 goto label_invalid_code;
1937               break;
1938
1939             case 'O':           /* invocation of single-shift-3 */
1940               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1941                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1942                 goto label_invalid_code;
1943               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1944               ONE_MORE_BYTE (c1);
1945               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1946                 goto label_invalid_code;
1947               break;
1948
1949             case '0': case '2': case '3': case '4': /* start composition */
1950               DECODE_COMPOSITION_START (c1);
1951               continue;
1952
1953             case '1':           /* end composition */
1954               DECODE_COMPOSITION_END (c1);
1955               continue;
1956
1957             case '[':           /* specification of direction */
1958               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1959                 goto label_invalid_code;
1960               /* For the moment, nested direction is not supported.
1961                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1962                  left-to-right, and nonzero means right-to-left.  */
1963               ONE_MORE_BYTE (c1);
1964               switch (c1)
1965                 {
1966                 case ']':       /* end of the current direction */
1967                   coding->mode &= ~CODING_MODE_DIRECTION;
1968
1969                 case '0':       /* end of the current direction */
1970                 case '1':       /* start of left-to-right direction */
1971                   ONE_MORE_BYTE (c1);
1972                   if (c1 == ']')
1973                     coding->mode &= ~CODING_MODE_DIRECTION;
1974                   else
1975                     goto label_invalid_code;
1976                   break;
1977
1978                 case '2':       /* start of right-to-left direction */
1979                   ONE_MORE_BYTE (c1);
1980                   if (c1 == ']')
1981                     coding->mode |= CODING_MODE_DIRECTION;
1982                   else
1983                     goto label_invalid_code;
1984                   break;
1985
1986                 default:
1987                   goto label_invalid_code;
1988                 }
1989               continue;
1990
1991             default:
1992               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1993                 goto label_invalid_code;
1994               if (c1 >= 0x28 && c1 <= 0x2B)
1995                 {       /* designation of DIMENSION1_CHARS94 character set */
1996                   ONE_MORE_BYTE (c2);
1997                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1998                 }
1999               else if (c1 >= 0x2C && c1 <= 0x2F)
2000                 {       /* designation of DIMENSION1_CHARS96 character set */
2001                   ONE_MORE_BYTE (c2);
2002                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2003                 }
2004               else
2005                 goto label_invalid_code;
2006               /* We must update these variables now.  */
2007               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2008               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2009               continue;
2010             }
2011         }
2012
2013       /* Now we know CHARSET and 1st position code C1 of a character.
2014          Produce a multibyte sequence for that character while getting
2015          2nd position code C2 if necessary.  */
2016       if (CHARSET_DIMENSION (charset) == 2)
2017         {
2018           ONE_MORE_BYTE (c2);
2019           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2020             /* C2 is not in a valid range.  */
2021             goto label_invalid_code;
2022         }
2023       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2024       EMIT_CHAR (c);
2025       continue;
2026
2027     label_invalid_code:
2028       coding->errors++;
2029       if (COMPOSING_P (coding))
2030         DECODE_COMPOSITION_END ('1');
2031       src = src_base;
2032       c = *src++;
2033       EMIT_CHAR (c);
2034     }
2035
2036  label_end_of_loop:
2037   coding->consumed = coding->consumed_char = src_base - source;
2038   coding->produced = dst - destination;
2039   return;
2040 }
2041
2042
2043 /* ISO2022 encoding stuff.  */
2044
2045 /*
2046    It is not enough to say just "ISO2022" on encoding, we have to
2047    specify more details.  In Emacs, each ISO2022 coding system
2048    variant has the following specifications:
2049         1. Initial designation to G0 through G3.
2050         2. Allows short-form designation?
2051         3. ASCII should be designated to G0 before control characters?
2052         4. ASCII should be designated to G0 at end of line?
2053         5. 7-bit environment or 8-bit environment?
2054         6. Use locking-shift?
2055         7. Use Single-shift?
2056    And the following two are only for Japanese:
2057         8. Use ASCII in place of JIS0201-1976-Roman?
2058         9. Use JISX0208-1983 in place of JISX0208-1978?
2059    These specifications are encoded in `coding->flags' as flag bits
2060    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2061    details.
2062 */
2063
2064 /* Produce codes (escape sequence) for designating CHARSET to graphic
2065    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2066    '@', 'A', or 'B' and the coding system CODING allows, produce
2067    designation sequence of short-form.  */
2068
2069 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2070   do {                                                                  \
2071     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2072     char *intermediate_char_94 = "()*+";                                \
2073     char *intermediate_char_96 = ",-./";                                \
2074     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2075                                                                         \
2076     if (revision < 255)                                                 \
2077       {                                                                 \
2078         *dst++ = ISO_CODE_ESC;                                          \
2079         *dst++ = '&';                                                   \
2080         *dst++ = '@' + revision;                                        \
2081       }                                                                 \
2082     *dst++ = ISO_CODE_ESC;                                              \
2083     if (CHARSET_DIMENSION (charset) == 1)                               \
2084       {                                                                 \
2085         if (CHARSET_CHARS (charset) == 94)                              \
2086           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2087         else                                                            \
2088           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2089       }                                                                 \
2090     else                                                                \
2091       {                                                                 \
2092         *dst++ = '$';                                                   \
2093         if (CHARSET_CHARS (charset) == 94)                              \
2094           {                                                             \
2095             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2096                 || reg != 0                                             \
2097                 || final_char < '@' || final_char > 'B')                \
2098               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2099           }                                                             \
2100         else                                                            \
2101           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2102       }                                                                 \
2103     *dst++ = final_char;                                                \
2104     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2105   } while (0)
2106
2107 /* The following two macros produce codes (control character or escape
2108    sequence) for ISO2022 single-shift functions (single-shift-2 and
2109    single-shift-3).  */
2110
2111 #define ENCODE_SINGLE_SHIFT_2                           \
2112   do {                                                  \
2113     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2114       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2115     else                                                \
2116       *dst++ = ISO_CODE_SS2;                            \
2117     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2118   } while (0)
2119
2120 #define ENCODE_SINGLE_SHIFT_3                           \
2121   do {                                                  \
2122     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2123       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2124     else                                                \
2125       *dst++ = ISO_CODE_SS3;                            \
2126     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2127   } while (0)
2128
2129 /* The following four macros produce codes (control character or
2130    escape sequence) for ISO2022 locking-shift functions (shift-in,
2131    shift-out, locking-shift-2, and locking-shift-3).  */
2132
2133 #define ENCODE_SHIFT_IN                         \
2134   do {                                          \
2135     *dst++ = ISO_CODE_SI;                       \
2136     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2137   } while (0)
2138
2139 #define ENCODE_SHIFT_OUT                        \
2140   do {                                          \
2141     *dst++ = ISO_CODE_SO;                       \
2142     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2143   } while (0)
2144
2145 #define ENCODE_LOCKING_SHIFT_2                  \
2146   do {                                          \
2147     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2148     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2149   } while (0)
2150
2151 #define ENCODE_LOCKING_SHIFT_3                  \
2152   do {                                          \
2153     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2154     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2155   } while (0)
2156
2157 /* Produce codes for a DIMENSION1 character whose character set is
2158    CHARSET and whose position-code is C1.  Designation and invocation
2159    sequences are also produced in advance if necessary.  */
2160
2161 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2162   do {                                                                  \
2163     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2164       {                                                                 \
2165         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2166           *dst++ = c1 & 0x7F;                                           \
2167         else                                                            \
2168           *dst++ = c1 | 0x80;                                           \
2169         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2170         break;                                                          \
2171       }                                                                 \
2172     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2173       {                                                                 \
2174         *dst++ = c1 & 0x7F;                                             \
2175         break;                                                          \
2176       }                                                                 \
2177     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2178       {                                                                 \
2179         *dst++ = c1 | 0x80;                                             \
2180         break;                                                          \
2181       }                                                                 \
2182     else                                                                \
2183       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2184          must invoke it, or, at first, designate it to some graphic     \
2185          register.  Then repeat the loop to actually produce the        \
2186          character.  */                                                 \
2187       dst = encode_invocation_designation (charset, coding, dst);       \
2188   } while (1)
2189
2190 /* Produce codes for a DIMENSION2 character whose character set is
2191    CHARSET and whose position-codes are C1 and C2.  Designation and
2192    invocation codes are also produced in advance if necessary.  */
2193
2194 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2195   do {                                                                  \
2196     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2197       {                                                                 \
2198         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2199           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2200         else                                                            \
2201           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2202         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2203         break;                                                          \
2204       }                                                                 \
2205     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2206       {                                                                 \
2207         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2208         break;                                                          \
2209       }                                                                 \
2210     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2211       {                                                                 \
2212         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2213         break;                                                          \
2214       }                                                                 \
2215     else                                                                \
2216       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2217          must invoke it, or, at first, designate it to some graphic     \
2218          register.  Then repeat the loop to actually produce the        \
2219          character.  */                                                 \
2220       dst = encode_invocation_designation (charset, coding, dst);       \
2221   } while (1)
2222
2223 #define ENCODE_ISO_CHARACTER(c)                                 \
2224   do {                                                          \
2225     int charset, c1, c2;                                        \
2226                                                                 \
2227     SPLIT_CHAR (c, charset, c1, c2);                            \
2228     if (CHARSET_DEFINED_P (charset))                            \
2229       {                                                         \
2230         if (CHARSET_DIMENSION (charset) == 1)                   \
2231           {                                                     \
2232             if (charset == CHARSET_ASCII                        \
2233                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2234               charset = charset_latin_jisx0201;                 \
2235             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2236           }                                                     \
2237         else                                                    \
2238           {                                                     \
2239             if (charset == charset_jisx0208                     \
2240                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2241               charset = charset_jisx0208_1978;                  \
2242             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2243           }                                                     \
2244       }                                                         \
2245     else                                                        \
2246       {                                                         \
2247         *dst++ = c1;                                            \
2248         if (c2 >= 0)                                            \
2249           *dst++ = c2;                                          \
2250       }                                                         \
2251   } while (0)
2252
2253
2254 /* Instead of encoding character C, produce one or two `?'s.  */
2255
2256 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2257   do {                                                                  \
2258     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2259     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2260       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2261   } while (0)
2262
2263
2264 /* Produce designation and invocation codes at a place pointed by DST
2265    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2266    Return new DST.  */
2267
2268 unsigned char *
2269 encode_invocation_designation (charset, coding, dst)
2270      int charset;
2271      struct coding_system *coding;
2272      unsigned char *dst;
2273 {
2274   int reg;                      /* graphic register number */
2275
2276   /* At first, check designations.  */
2277   for (reg = 0; reg < 4; reg++)
2278     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2279       break;
2280
2281   if (reg >= 4)
2282     {
2283       /* CHARSET is not yet designated to any graphic registers.  */
2284       /* At first check the requested designation.  */
2285       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2286       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2287         /* Since CHARSET requests no special designation, designate it
2288            to graphic register 0.  */
2289         reg = 0;
2290
2291       ENCODE_DESIGNATION (charset, reg, coding);
2292     }
2293
2294   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2295       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2296     {
2297       /* Since the graphic register REG is not invoked to any graphic
2298          planes, invoke it to graphic plane 0.  */
2299       switch (reg)
2300         {
2301         case 0:                 /* graphic register 0 */
2302           ENCODE_SHIFT_IN;
2303           break;
2304
2305         case 1:                 /* graphic register 1 */
2306           ENCODE_SHIFT_OUT;
2307           break;
2308
2309         case 2:                 /* graphic register 2 */
2310           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2311             ENCODE_SINGLE_SHIFT_2;
2312           else
2313             ENCODE_LOCKING_SHIFT_2;
2314           break;
2315
2316         case 3:                 /* graphic register 3 */
2317           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2318             ENCODE_SINGLE_SHIFT_3;
2319           else
2320             ENCODE_LOCKING_SHIFT_3;
2321           break;
2322         }
2323     }
2324
2325   return dst;
2326 }
2327
2328 /* Produce 2-byte codes for encoded composition rule RULE.  */
2329
2330 #define ENCODE_COMPOSITION_RULE(rule)           \
2331   do {                                          \
2332     int gref, nref;                             \
2333     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2334     *dst++ = 32 + 81 + gref;                    \
2335     *dst++ = 32 + nref;                         \
2336   } while (0)
2337
2338 /* Produce codes for indicating the start of a composition sequence
2339    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2340    which specify information about the composition.  See the comment
2341    in coding.h for the format of DATA.  */
2342
2343 #define ENCODE_COMPOSITION_START(coding, data)                          \
2344   do {                                                                  \
2345     coding->composing = data[3];                                        \
2346     *dst++ = ISO_CODE_ESC;                                              \
2347     if (coding->composing == COMPOSITION_RELATIVE)                      \
2348       *dst++ = '0';                                                     \
2349     else                                                                \
2350       {                                                                 \
2351         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2352                   ? '3' : '4');                                         \
2353         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2354         coding->composition_rule_follows = 0;                           \
2355       }                                                                 \
2356   } while (0)
2357
2358 /* Produce codes for indicating the end of the current composition.  */
2359
2360 #define ENCODE_COMPOSITION_END(coding, data)                    \
2361   do {                                                          \
2362     *dst++ = ISO_CODE_ESC;                                      \
2363     *dst++ = '1';                                               \
2364     coding->cmp_data_start += data[0];                          \
2365     coding->composing = COMPOSITION_NO;                         \
2366     if (coding->cmp_data_start == coding->cmp_data->used        \
2367         && coding->cmp_data->next)                              \
2368       {                                                         \
2369         coding->cmp_data = coding->cmp_data->next;              \
2370         coding->cmp_data_start = 0;                             \
2371       }                                                         \
2372   } while (0)
2373
2374 /* Produce composition start sequence ESC 0.  Here, this sequence
2375    doesn't mean the start of a new composition but means that we have
2376    just produced components (alternate chars and composition rules) of
2377    the composition and the actual text follows in SRC.  */
2378
2379 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2380   do {                                          \
2381     *dst++ = ISO_CODE_ESC;                      \
2382     *dst++ = '0';                               \
2383     coding->composing = COMPOSITION_RELATIVE;   \
2384   } while (0)
2385
2386 /* The following three macros produce codes for indicating direction
2387    of text.  */
2388 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2389   do {                                                  \
2390     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2391       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2392     else                                                \
2393       *dst++ = ISO_CODE_CSI;                            \
2394   } while (0)
2395
2396 #define ENCODE_DIRECTION_R2L    \
2397   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2398
2399 #define ENCODE_DIRECTION_L2R    \
2400   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2401
2402 /* Produce codes for designation and invocation to reset the graphic
2403    planes and registers to initial state.  */
2404 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2405   do {                                                                      \
2406     int reg;                                                                \
2407     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2408       ENCODE_SHIFT_IN;                                                      \
2409     for (reg = 0; reg < 4; reg++)                                           \
2410       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2411           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2412               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2413         ENCODE_DESIGNATION                                                  \
2414           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2415   } while (0)
2416
2417 /* Produce designation sequences of charsets in the line started from
2418    SRC to a place pointed by DST, and return updated DST.
2419
2420    If the current block ends before any end-of-line, we may fail to
2421    find all the necessary designations.  */
2422
2423 static unsigned char *
2424 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2425      struct coding_system *coding;
2426      Lisp_Object translation_table;
2427      unsigned char *src, *src_end, *dst;
2428 {
2429   int charset, c, found = 0, reg;
2430   /* Table of charsets to be designated to each graphic register.  */
2431   int r[4];
2432
2433   for (reg = 0; reg < 4; reg++)
2434     r[reg] = -1;
2435
2436   while (found < 4)
2437     {
2438       ONE_MORE_CHAR (c);
2439       if (c == '\n')
2440         break;
2441
2442       charset = CHAR_CHARSET (c);
2443       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2444       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2445         {
2446           found++;
2447           r[reg] = charset;
2448         }
2449     }
2450
2451  label_end_of_loop:
2452   if (found)
2453     {
2454       for (reg = 0; reg < 4; reg++)
2455         if (r[reg] >= 0
2456             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2457           ENCODE_DESIGNATION (r[reg], reg, coding);
2458     }
2459
2460   return dst;
2461 }
2462
2463 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2464
2465 static void
2466 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2467      struct coding_system *coding;
2468      unsigned char *source, *destination;
2469      int src_bytes, dst_bytes;
2470 {
2471   unsigned char *src = source;
2472   unsigned char *src_end = source + src_bytes;
2473   unsigned char *dst = destination;
2474   unsigned char *dst_end = destination + dst_bytes;
2475   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2476      from DST_END to assure overflow checking is necessary only at the
2477      head of loop.  */
2478   unsigned char *adjusted_dst_end = dst_end - 19;
2479   /* SRC_BASE remembers the start position in source in each loop.
2480      The loop will be exited when there's not enough source text to
2481      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2482      there's not enough destination area to produce encoded codes
2483      (within macro EMIT_BYTES).  */
2484   unsigned char *src_base;
2485   int c;
2486   Lisp_Object translation_table;
2487   Lisp_Object safe_chars;
2488
2489   safe_chars = coding_safe_chars (coding);
2490
2491   if (NILP (Venable_character_translation))
2492     translation_table = Qnil;
2493   else
2494     {
2495       translation_table = coding->translation_table_for_encode;
2496       if (NILP (translation_table))
2497         translation_table = Vstandard_translation_table_for_encode;
2498     }
2499
2500   coding->consumed_char = 0;
2501   coding->errors = 0;
2502   while (1)
2503     {
2504       src_base = src;
2505
2506       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2507         {
2508           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2509           break;
2510         }
2511
2512       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2513           && CODING_SPEC_ISO_BOL (coding))
2514         {
2515           /* We have to produce designation sequences if any now.  */
2516           dst = encode_designation_at_bol (coding, translation_table,
2517                                            src, src_end, dst);
2518           CODING_SPEC_ISO_BOL (coding) = 0;
2519         }
2520
2521       /* Check composition start and end.  */
2522       if (coding->composing != COMPOSITION_DISABLED
2523           && coding->cmp_data_start < coding->cmp_data->used)
2524         {
2525           struct composition_data *cmp_data = coding->cmp_data;
2526           int *data = cmp_data->data + coding->cmp_data_start;
2527           int this_pos = cmp_data->char_offset + coding->consumed_char;
2528
2529           if (coding->composing == COMPOSITION_RELATIVE)
2530             {
2531               if (this_pos == data[2])
2532                 {
2533                   ENCODE_COMPOSITION_END (coding, data);
2534                   cmp_data = coding->cmp_data;
2535                   data = cmp_data->data + coding->cmp_data_start;
2536                 }
2537             }
2538           else if (COMPOSING_P (coding))
2539             {
2540               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2541               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2542                 /* We have consumed components of the composition.
2543                    What follows in SRC is the composition's base
2544                    text.  */
2545                 ENCODE_COMPOSITION_FAKE_START (coding);
2546               else
2547                 {
2548                   int c = cmp_data->data[coding->cmp_data_index++];
2549                   if (coding->composition_rule_follows)
2550                     {
2551                       ENCODE_COMPOSITION_RULE (c);
2552                       coding->composition_rule_follows = 0;
2553                     }
2554                   else
2555                     {
2556                       if (coding->flags & CODING_FLAG_ISO_SAFE
2557                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2558                         ENCODE_UNSAFE_CHARACTER (c);
2559                       else
2560                         ENCODE_ISO_CHARACTER (c);
2561                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2562                         coding->composition_rule_follows = 1;
2563                     }
2564                   continue;
2565                 }
2566             }
2567           if (!COMPOSING_P (coding))
2568             {
2569               if (this_pos == data[1])
2570                 {
2571                   ENCODE_COMPOSITION_START (coding, data);
2572                   continue;
2573                 }
2574             }
2575         }
2576
2577       ONE_MORE_CHAR (c);
2578
2579       /* Now encode the character C.  */
2580       if (c < 0x20 || c == 0x7F)
2581         {
2582           if (c == '\r')
2583             {
2584               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2585                 {
2586                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2587                     ENCODE_RESET_PLANE_AND_REGISTER;
2588                   *dst++ = c;
2589                   continue;
2590                 }
2591               /* fall down to treat '\r' as '\n' ...  */
2592               c = '\n';
2593             }
2594           if (c == '\n')
2595             {
2596               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2597                 ENCODE_RESET_PLANE_AND_REGISTER;
2598               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2599                 bcopy (coding->spec.iso2022.initial_designation,
2600                        coding->spec.iso2022.current_designation,
2601                        sizeof coding->spec.iso2022.initial_designation);
2602               if (coding->eol_type == CODING_EOL_LF
2603                   || coding->eol_type == CODING_EOL_UNDECIDED)
2604                 *dst++ = ISO_CODE_LF;
2605               else if (coding->eol_type == CODING_EOL_CRLF)
2606                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2607               else
2608                 *dst++ = ISO_CODE_CR;
2609               CODING_SPEC_ISO_BOL (coding) = 1;
2610             }
2611           else
2612             {
2613               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2614                 ENCODE_RESET_PLANE_AND_REGISTER;
2615               *dst++ = c;
2616             }
2617         }
2618       else if (ASCII_BYTE_P (c))
2619         ENCODE_ISO_CHARACTER (c);
2620       else if (SINGLE_BYTE_CHAR_P (c))
2621         {
2622           *dst++ = c;
2623           coding->errors++;
2624         }
2625       else if (coding->flags & CODING_FLAG_ISO_SAFE
2626                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2627         ENCODE_UNSAFE_CHARACTER (c);
2628       else
2629         ENCODE_ISO_CHARACTER (c);
2630
2631       coding->consumed_char++;
2632     }
2633
2634  label_end_of_loop:
2635   coding->consumed = src_base - source;
2636   coding->produced = coding->produced_char = dst - destination;
2637 }
2638
2639 \f
2640 /*** 4. SJIS and BIG5 handlers ***/
2641
2642 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2643    quite widely.  So, for the moment, Emacs supports them in the bare
2644    C code.  But, in the future, they may be supported only by CCL.  */
2645
2646 /* SJIS is a coding system encoding three character sets: ASCII, right
2647    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2648    as is.  A character of charset katakana-jisx0201 is encoded by
2649    "position-code + 0x80".  A character of charset japanese-jisx0208
2650    is encoded in 2-byte but two position-codes are divided and shifted
2651    so that it fits in the range below.
2652
2653    --- CODE RANGE of SJIS ---
2654    (character set)      (range)
2655    ASCII                0x00 .. 0x7F
2656    KATAKANA-JISX0201    0xA0 .. 0xDF
2657    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2658             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2659    -------------------------------
2660
2661 */
2662
2663 /* BIG5 is a coding system encoding two character sets: ASCII and
2664    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2665    character set and is encoded in two bytes.
2666
2667    --- CODE RANGE of BIG5 ---
2668    (character set)      (range)
2669    ASCII                0x00 .. 0x7F
2670    Big5 (1st byte)      0xA1 .. 0xFE
2671         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2672    --------------------------
2673
2674    Since the number of characters in Big5 is larger than maximum
2675    characters in Emacs' charset (96x96), it can't be handled as one
2676    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2677    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2678    contains frequently used characters and the latter contains less
2679    frequently used characters.  */
2680
2681 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2682    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2683    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2684    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2685
2686 /* Number of Big5 characters which have the same code in 1st byte.  */
2687 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2688
2689 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2690   do {                                                                  \
2691     unsigned int temp                                                   \
2692       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2693     if (b1 < 0xC9)                                                      \
2694       charset = charset_big5_1;                                         \
2695     else                                                                \
2696       {                                                                 \
2697         charset = charset_big5_2;                                       \
2698         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2699       }                                                                 \
2700     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2701     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2702   } while (0)
2703
2704 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2705   do {                                                                  \
2706     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2707     if (charset == charset_big5_2)                                      \
2708       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2709     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2710     b2 = temp % BIG5_SAME_ROW;                                          \
2711     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2712   } while (0)
2713
2714 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2715    Check if a text is encoded in SJIS.  If it is, return
2716    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2717
2718 static int
2719 detect_coding_sjis (src, src_end, multibytep)
2720      unsigned char *src, *src_end;
2721      int multibytep;
2722 {
2723   int c;
2724   /* Dummy for ONE_MORE_BYTE.  */
2725   struct coding_system dummy_coding;
2726   struct coding_system *coding = &dummy_coding;
2727
2728   while (1)
2729     {
2730       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2731       if (c >= 0x81)
2732         {
2733           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2734             {
2735               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2736               if (c < 0x40 || c == 0x7F || c > 0xFC)
2737                 return 0;
2738             }
2739           else if (c > 0xDF)
2740             return 0;
2741         }
2742     }
2743  label_end_of_loop:
2744   return CODING_CATEGORY_MASK_SJIS;
2745 }
2746
2747 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2748    Check if a text is encoded in BIG5.  If it is, return
2749    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2750
2751 static int
2752 detect_coding_big5 (src, src_end, multibytep)
2753      unsigned char *src, *src_end;
2754      int multibytep;
2755 {
2756   int c;
2757   /* Dummy for ONE_MORE_BYTE.  */
2758   struct coding_system dummy_coding;
2759   struct coding_system *coding = &dummy_coding;
2760
2761   while (1)
2762     {
2763       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2764       if (c >= 0xA1)
2765         {
2766           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2767           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2768             return 0;
2769         }
2770     }
2771  label_end_of_loop:
2772   return CODING_CATEGORY_MASK_BIG5;
2773 }
2774
2775 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2776    Check if a text is encoded in UTF-8.  If it is, return
2777    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2778
2779 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2780 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2781 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2782 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2783 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2784 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2785 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2786
2787 static int
2788 detect_coding_utf_8 (src, src_end, multibytep)
2789      unsigned char *src, *src_end;
2790      int multibytep;
2791 {
2792   unsigned char c;
2793   int seq_maybe_bytes;
2794   /* Dummy for ONE_MORE_BYTE.  */
2795   struct coding_system dummy_coding;
2796   struct coding_system *coding = &dummy_coding;
2797
2798   while (1)
2799     {
2800       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2801       if (UTF_8_1_OCTET_P (c))
2802         continue;
2803       else if (UTF_8_2_OCTET_LEADING_P (c))
2804         seq_maybe_bytes = 1;
2805       else if (UTF_8_3_OCTET_LEADING_P (c))
2806         seq_maybe_bytes = 2;
2807       else if (UTF_8_4_OCTET_LEADING_P (c))
2808         seq_maybe_bytes = 3;
2809       else if (UTF_8_5_OCTET_LEADING_P (c))
2810         seq_maybe_bytes = 4;
2811       else if (UTF_8_6_OCTET_LEADING_P (c))
2812         seq_maybe_bytes = 5;
2813       else
2814         return 0;
2815
2816       do
2817         {
2818           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2819           if (!UTF_8_EXTRA_OCTET_P (c))
2820             return 0;
2821           seq_maybe_bytes--;
2822         }
2823       while (seq_maybe_bytes > 0);
2824     }
2825
2826  label_end_of_loop:
2827   return CODING_CATEGORY_MASK_UTF_8;
2828 }
2829
2830 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2831    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2832    Little Endian (otherwise).  If it is, return
2833    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2834    else return 0.  */
2835
2836 #define UTF_16_INVALID_P(val)   \
2837   (((val) == 0xFFFE)            \
2838    || ((val) == 0xFFFF))
2839
2840 #define UTF_16_HIGH_SURROGATE_P(val) \
2841   (((val) & 0xD800) == 0xD800)
2842
2843 #define UTF_16_LOW_SURROGATE_P(val) \
2844   (((val) & 0xDC00) == 0xDC00)
2845
2846 static int
2847 detect_coding_utf_16 (src, src_end, multibytep)
2848      unsigned char *src, *src_end;
2849      int multibytep;
2850 {
2851   unsigned char c1, c2;
2852   /* Dummy for TWO_MORE_BYTES.  */
2853   struct coding_system dummy_coding;
2854   struct coding_system *coding = &dummy_coding;
2855
2856   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2857   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2858
2859   if ((c1 == 0xFF) && (c2 == 0xFE))
2860     return CODING_CATEGORY_MASK_UTF_16_LE;
2861   else if ((c1 == 0xFE) && (c2 == 0xFF))
2862     return CODING_CATEGORY_MASK_UTF_16_BE;
2863
2864  label_end_of_loop:
2865   return 0;
2866 }
2867
2868 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2869    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2870
2871 static void
2872 decode_coding_sjis_big5 (coding, source, destination,
2873                          src_bytes, dst_bytes, sjis_p)
2874      struct coding_system *coding;
2875      unsigned char *source, *destination;
2876      int src_bytes, dst_bytes;
2877      int sjis_p;
2878 {
2879   unsigned char *src = source;
2880   unsigned char *src_end = source + src_bytes;
2881   unsigned char *dst = destination;
2882   unsigned char *dst_end = destination + dst_bytes;
2883   /* SRC_BASE remembers the start position in source in each loop.
2884      The loop will be exited when there's not enough source code
2885      (within macro ONE_MORE_BYTE), or when there's not enough
2886      destination area to produce a character (within macro
2887      EMIT_CHAR).  */
2888   unsigned char *src_base;
2889   Lisp_Object translation_table;
2890
2891   if (NILP (Venable_character_translation))
2892     translation_table = Qnil;
2893   else
2894     {
2895       translation_table = coding->translation_table_for_decode;
2896       if (NILP (translation_table))
2897         translation_table = Vstandard_translation_table_for_decode;
2898     }
2899
2900   coding->produced_char = 0;
2901   while (1)
2902     {
2903       int c, charset, c1, c2;
2904
2905       src_base = src;
2906       ONE_MORE_BYTE (c1);
2907
2908       if (c1 < 0x80)
2909         {
2910           charset = CHARSET_ASCII;
2911           if (c1 < 0x20)
2912             {
2913               if (c1 == '\r')
2914                 {
2915                   if (coding->eol_type == CODING_EOL_CRLF)
2916                     {
2917                       ONE_MORE_BYTE (c2);
2918                       if (c2 == '\n')
2919                         c1 = c2;
2920                       else if (coding->mode
2921                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2922                         {
2923                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2924                           goto label_end_of_loop;
2925                         }
2926                       else
2927                         /* To process C2 again, SRC is subtracted by 1.  */
2928                         src--;
2929                     }
2930                   else if (coding->eol_type == CODING_EOL_CR)
2931                     c1 = '\n';
2932                 }
2933               else if (c1 == '\n'
2934                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2935                        && (coding->eol_type == CODING_EOL_CR
2936                            || coding->eol_type == CODING_EOL_CRLF))
2937                 {
2938                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2939                   goto label_end_of_loop;
2940                 }
2941             }
2942         }
2943       else
2944         {
2945           if (sjis_p)
2946             {
2947               if (c1 >= 0xF0)
2948                 goto label_invalid_code;
2949               if (c1 < 0xA0 || c1 >= 0xE0)
2950                 {
2951                   /* SJIS -> JISX0208 */
2952                   ONE_MORE_BYTE (c2);
2953                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2954                     goto label_invalid_code;
2955                   DECODE_SJIS (c1, c2, c1, c2);
2956                   charset = charset_jisx0208;
2957                 }
2958               else
2959                 /* SJIS -> JISX0201-Kana */
2960                 charset = charset_katakana_jisx0201;
2961             }
2962           else
2963             {
2964               /* BIG5 -> Big5 */
2965               if (c1 < 0xA1 || c1 > 0xFE)
2966                 goto label_invalid_code;
2967               ONE_MORE_BYTE (c2);
2968               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2969                 goto label_invalid_code;
2970               DECODE_BIG5 (c1, c2, charset, c1, c2);
2971             }
2972         }
2973
2974       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2975       EMIT_CHAR (c);
2976       continue;
2977
2978     label_invalid_code:
2979       coding->errors++;
2980       src = src_base;
2981       c = *src++;
2982       EMIT_CHAR (c);
2983     }
2984
2985  label_end_of_loop:
2986   coding->consumed = coding->consumed_char = src_base - source;
2987   coding->produced = dst - destination;
2988   return;
2989 }
2990
2991 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2992    This function can encode charsets `ascii', `katakana-jisx0201',
2993    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2994    are sure that all these charsets are registered as official charset
2995    (i.e. do not have extended leading-codes).  Characters of other
2996    charsets are produced without any encoding.  If SJIS_P is 1, encode
2997    SJIS text, else encode BIG5 text.  */
2998
2999 static void
3000 encode_coding_sjis_big5 (coding, source, destination,
3001                          src_bytes, dst_bytes, sjis_p)
3002      struct coding_system *coding;
3003      unsigned char *source, *destination;
3004      int src_bytes, dst_bytes;
3005      int sjis_p;
3006 {
3007   unsigned char *src = source;
3008   unsigned char *src_end = source + src_bytes;
3009   unsigned char *dst = destination;
3010   unsigned char *dst_end = destination + dst_bytes;
3011   /* SRC_BASE remembers the start position in source in each loop.
3012      The loop will be exited when there's not enough source text to
3013      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3014      there's not enough destination area to produce encoded codes
3015      (within macro EMIT_BYTES).  */
3016   unsigned char *src_base;
3017   Lisp_Object translation_table;
3018
3019   if (NILP (Venable_character_translation))
3020     translation_table = Qnil;
3021   else
3022     {
3023       translation_table = coding->translation_table_for_encode;
3024       if (NILP (translation_table))
3025         translation_table = Vstandard_translation_table_for_encode;
3026     }
3027
3028   while (1)
3029     {
3030       int c, charset, c1, c2;
3031
3032       src_base = src;
3033       ONE_MORE_CHAR (c);
3034
3035       /* Now encode the character C.  */
3036       if (SINGLE_BYTE_CHAR_P (c))
3037         {
3038           switch (c)
3039             {
3040             case '\r':
3041               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3042                 {
3043                   EMIT_ONE_BYTE (c);
3044                   break;
3045                 }
3046               c = '\n';
3047             case '\n':
3048               if (coding->eol_type == CODING_EOL_CRLF)
3049                 {
3050                   EMIT_TWO_BYTES ('\r', c);
3051                   break;
3052                 }
3053               else if (coding->eol_type == CODING_EOL_CR)
3054                 c = '\r';
3055             default:
3056               EMIT_ONE_BYTE (c);
3057             }
3058         }
3059       else
3060         {
3061           SPLIT_CHAR (c, charset, c1, c2);
3062           if (sjis_p)
3063             {
3064               if (charset == charset_jisx0208
3065                   || charset == charset_jisx0208_1978)
3066                 {
3067                   ENCODE_SJIS (c1, c2, c1, c2);
3068                   EMIT_TWO_BYTES (c1, c2);
3069                 }
3070               else if (charset == charset_katakana_jisx0201)
3071                 EMIT_ONE_BYTE (c1 | 0x80);
3072               else if (charset == charset_latin_jisx0201)
3073                 EMIT_ONE_BYTE (c1);
3074               else
3075                 /* There's no way other than producing the internal
3076                    codes as is.  */
3077                 EMIT_BYTES (src_base, src);
3078             }
3079           else
3080             {
3081               if (charset == charset_big5_1 || charset == charset_big5_2)
3082                 {
3083                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3084                   EMIT_TWO_BYTES (c1, c2);
3085                 }
3086               else
3087                 /* There's no way other than producing the internal
3088                    codes as is.  */
3089                 EMIT_BYTES (src_base, src);
3090             }
3091         }
3092       coding->consumed_char++;
3093     }
3094
3095  label_end_of_loop:
3096   coding->consumed = src_base - source;
3097   coding->produced = coding->produced_char = dst - destination;
3098 }
3099
3100 \f
3101 /*** 5. CCL handlers ***/
3102
3103 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3104    Check if a text is encoded in a coding system of which
3105    encoder/decoder are written in CCL program.  If it is, return
3106    CODING_CATEGORY_MASK_CCL, else return 0.  */
3107
3108 static int
3109 detect_coding_ccl (src, src_end, multibytep)
3110      unsigned char *src, *src_end;
3111      int multibytep;
3112 {
3113   unsigned char *valid;
3114   int c;
3115   /* Dummy for ONE_MORE_BYTE.  */
3116   struct coding_system dummy_coding;
3117   struct coding_system *coding = &dummy_coding;
3118
3119   /* No coding system is assigned to coding-category-ccl.  */
3120   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3121     return 0;
3122
3123   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3124   while (1)
3125     {
3126       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3127       if (! valid[c])
3128         return 0;
3129     }
3130  label_end_of_loop:
3131   return CODING_CATEGORY_MASK_CCL;
3132 }
3133
3134 \f
3135 /*** 6. End-of-line handlers ***/
3136
3137 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3138
3139 static void
3140 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3141      struct coding_system *coding;
3142      unsigned char *source, *destination;
3143      int src_bytes, dst_bytes;
3144 {
3145   unsigned char *src = source;
3146   unsigned char *dst = destination;
3147   unsigned char *src_end = src + src_bytes;
3148   unsigned char *dst_end = dst + dst_bytes;
3149   Lisp_Object translation_table;
3150   /* SRC_BASE remembers the start position in source in each loop.
3151      The loop will be exited when there's not enough source code
3152      (within macro ONE_MORE_BYTE), or when there's not enough
3153      destination area to produce a character (within macro
3154      EMIT_CHAR).  */
3155   unsigned char *src_base;
3156   int c;
3157
3158   translation_table = Qnil;
3159   switch (coding->eol_type)
3160     {
3161     case CODING_EOL_CRLF:
3162       while (1)
3163         {
3164           src_base = src;
3165           ONE_MORE_BYTE (c);
3166           if (c == '\r')
3167             {
3168               ONE_MORE_BYTE (c);
3169               if (c != '\n')
3170                 {
3171                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3172                     {
3173                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
3174                       goto label_end_of_loop;
3175                     }
3176                   src--;
3177                   c = '\r';
3178                 }
3179             }
3180           else if (c == '\n'
3181                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3182             {
3183               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3184               goto label_end_of_loop;
3185             }
3186           EMIT_CHAR (c);
3187         }
3188       break;
3189
3190     case CODING_EOL_CR:
3191       while (1)
3192         {
3193           src_base = src;
3194           ONE_MORE_BYTE (c);
3195           if (c == '\n')
3196             {
3197               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3198                 {
3199                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3200                   goto label_end_of_loop;
3201                 }
3202             }
3203           else if (c == '\r')
3204             c = '\n';
3205           EMIT_CHAR (c);
3206         }
3207       break;
3208
3209     default:                    /* no need for EOL handling */
3210       while (1)
3211         {
3212           src_base = src;
3213           ONE_MORE_BYTE (c);
3214           EMIT_CHAR (c);
3215         }
3216     }
3217
3218  label_end_of_loop:
3219   coding->consumed = coding->consumed_char = src_base - source;
3220   coding->produced = dst - destination;
3221   return;
3222 }
3223
3224 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3225    format of end-of-line according to `coding->eol_type'.  It also
3226    convert multibyte form 8-bit characters to unibyte if
3227    CODING->src_multibyte is nonzero.  If `coding->mode &
3228    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3229    also means end-of-line.  */
3230
3231 static void
3232 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3233      struct coding_system *coding;
3234      unsigned char *source, *destination;
3235      int src_bytes, dst_bytes;
3236 {
3237   unsigned char *src = source;
3238   unsigned char *dst = destination;
3239   unsigned char *src_end = src + src_bytes;
3240   unsigned char *dst_end = dst + dst_bytes;
3241   Lisp_Object translation_table;
3242   /* SRC_BASE remembers the start position in source in each loop.
3243      The loop will be exited when there's not enough source text to
3244      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3245      there's not enough destination area to produce encoded codes
3246      (within macro EMIT_BYTES).  */
3247   unsigned char *src_base;
3248   int c;
3249   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3250
3251   translation_table = Qnil;
3252   if (coding->src_multibyte
3253       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3254     {
3255       src_end--;
3256       src_bytes--;
3257       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3258     }
3259
3260   if (coding->eol_type == CODING_EOL_CRLF)
3261     {
3262       while (src < src_end)
3263         {
3264           src_base = src;
3265           c = *src++;
3266           if (c >= 0x20)
3267             EMIT_ONE_BYTE (c);
3268           else if (c == '\n' || (c == '\r' && selective_display))
3269             EMIT_TWO_BYTES ('\r', '\n');
3270           else
3271             EMIT_ONE_BYTE (c);
3272         }
3273       src_base = src;
3274     label_end_of_loop:
3275       ;
3276     }
3277   else
3278     {
3279       if (!dst_bytes || src_bytes <= dst_bytes)
3280         {
3281           safe_bcopy (src, dst, src_bytes);
3282           src_base = src_end;
3283           dst += src_bytes;
3284         }
3285       else
3286         {
3287           if (coding->src_multibyte
3288               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3289             dst_bytes--;
3290           safe_bcopy (src, dst, dst_bytes);
3291           src_base = src + dst_bytes;
3292           dst = destination + dst_bytes;
3293           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3294         }
3295       if (coding->eol_type == CODING_EOL_CR)
3296         {
3297           for (src = destination; src < dst; src++)
3298             if (*src == '\n') *src = '\r';
3299         }
3300       else if (selective_display)
3301         {
3302           for (src = destination; src < dst; src++)
3303             if (*src == '\r') *src = '\n';
3304         }
3305     }
3306   if (coding->src_multibyte)
3307     dst = destination + str_as_unibyte (destination, dst - destination);
3308
3309   coding->consumed = src_base - source;
3310   coding->produced = dst - destination;
3311   coding->produced_char = coding->produced;
3312 }
3313
3314 \f
3315 /*** 7. C library functions ***/
3316
3317 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3318    has a property `coding-system'.  The value of this property is a
3319    vector of length 5 (called the coding-vector).  Among elements of
3320    this vector, the first (element[0]) and the fifth (element[4])
3321    carry important information for decoding/encoding.  Before
3322    decoding/encoding, this information should be set in fields of a
3323    structure of type `coding_system'.
3324
3325    The value of the property `coding-system' can be a symbol of another
3326    subsidiary coding-system.  In that case, Emacs gets coding-vector
3327    from that symbol.
3328
3329    `element[0]' contains information to be set in `coding->type'.  The
3330    value and its meaning is as follows:
3331
3332    0 -- coding_type_emacs_mule
3333    1 -- coding_type_sjis
3334    2 -- coding_type_iso2022
3335    3 -- coding_type_big5
3336    4 -- coding_type_ccl encoder/decoder written in CCL
3337    nil -- coding_type_no_conversion
3338    t -- coding_type_undecided (automatic conversion on decoding,
3339                                no-conversion on encoding)
3340
3341    `element[4]' contains information to be set in `coding->flags' and
3342    `coding->spec'.  The meaning varies by `coding->type'.
3343
3344    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3345    of length 32 (of which the first 13 sub-elements are used now).
3346    Meanings of these sub-elements are:
3347
3348    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3349         If the value is an integer of valid charset, the charset is
3350         assumed to be designated to graphic register N initially.
3351
3352         If the value is minus, it is a minus value of charset which
3353         reserves graphic register N, which means that the charset is
3354         not designated initially but should be designated to graphic
3355         register N just before encoding a character in that charset.
3356
3357         If the value is nil, graphic register N is never used on
3358         encoding.
3359
3360    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3361         Each value takes t or nil.  See the section ISO2022 of
3362         `coding.h' for more information.
3363
3364    If `coding->type' is `coding_type_big5', element[4] is t to denote
3365    BIG5-ETen or nil to denote BIG5-HKU.
3366
3367    If `coding->type' takes the other value, element[4] is ignored.
3368
3369    Emacs Lisp's coding systems also carry information about format of
3370    end-of-line in a value of property `eol-type'.  If the value is
3371    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3372    means CODING_EOL_CR.  If it is not integer, it should be a vector
3373    of subsidiary coding systems of which property `eol-type' has one
3374    of the above values.
3375
3376 */
3377
3378 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3379    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3380    is setup so that no conversion is necessary and return -1, else
3381    return 0.  */
3382
3383 int
3384 setup_coding_system (coding_system, coding)
3385      Lisp_Object coding_system;
3386      struct coding_system *coding;
3387 {
3388   Lisp_Object coding_spec, coding_type, eol_type, plist;
3389   Lisp_Object val;
3390
3391   /* At first, zero clear all members.  */
3392   bzero (coding, sizeof (struct coding_system));
3393
3394   /* Initialize some fields required for all kinds of coding systems.  */
3395   coding->symbol = coding_system;
3396   coding->heading_ascii = -1;
3397   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3398   coding->composing = COMPOSITION_DISABLED;
3399   coding->cmp_data = NULL;
3400
3401   if (NILP (coding_system))
3402     goto label_invalid_coding_system;
3403
3404   coding_spec = Fget (coding_system, Qcoding_system);
3405
3406   if (!VECTORP (coding_spec)
3407       || XVECTOR (coding_spec)->size != 5
3408       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3409     goto label_invalid_coding_system;
3410
3411   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3412   if (VECTORP (eol_type))
3413     {
3414       coding->eol_type = CODING_EOL_UNDECIDED;
3415       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3416     }
3417   else if (XFASTINT (eol_type) == 1)
3418     {
3419       coding->eol_type = CODING_EOL_CRLF;
3420       coding->common_flags
3421         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3422     }
3423   else if (XFASTINT (eol_type) == 2)
3424     {
3425       coding->eol_type = CODING_EOL_CR;
3426       coding->common_flags
3427         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3428     }
3429   else
3430     coding->eol_type = CODING_EOL_LF;
3431
3432   coding_type = XVECTOR (coding_spec)->contents[0];
3433   /* Try short cut.  */
3434   if (SYMBOLP (coding_type))
3435     {
3436       if (EQ (coding_type, Qt))
3437         {
3438           coding->type = coding_type_undecided;
3439           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3440         }
3441       else
3442         coding->type = coding_type_no_conversion;
3443       /* Initialize this member.  Any thing other than
3444          CODING_CATEGORY_IDX_UTF_16_BE and
3445          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3446          special treatment in detect_eol.  */
3447       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3448
3449       return 0;
3450     }
3451
3452   /* Get values of coding system properties:
3453      `post-read-conversion', `pre-write-conversion',
3454      `translation-table-for-decode', `translation-table-for-encode'.  */
3455   plist = XVECTOR (coding_spec)->contents[3];
3456   /* Pre & post conversion functions should be disabled if
3457      inhibit_eol_conversion is nonzero.  This is the case that a code
3458      conversion function is called while those functions are running.  */
3459   if (! inhibit_pre_post_conversion)
3460     {
3461       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3462       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3463     }
3464   val = Fplist_get (plist, Qtranslation_table_for_decode);
3465   if (SYMBOLP (val))
3466     val = Fget (val, Qtranslation_table_for_decode);
3467   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3468   val = Fplist_get (plist, Qtranslation_table_for_encode);
3469   if (SYMBOLP (val))
3470     val = Fget (val, Qtranslation_table_for_encode);
3471   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3472   val = Fplist_get (plist, Qcoding_category);
3473   if (!NILP (val))
3474     {
3475       val = Fget (val, Qcoding_category_index);
3476       if (INTEGERP (val))
3477         coding->category_idx = XINT (val);
3478       else
3479         goto label_invalid_coding_system;
3480     }
3481   else
3482     goto label_invalid_coding_system;
3483
3484   /* If the coding system has non-nil `composition' property, enable
3485      composition handling.  */
3486   val = Fplist_get (plist, Qcomposition);
3487   if (!NILP (val))
3488     coding->composing = COMPOSITION_NO;
3489
3490   switch (XFASTINT (coding_type))
3491     {
3492     case 0:
3493       coding->type = coding_type_emacs_mule;
3494       coding->common_flags
3495         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3496       coding->composing = COMPOSITION_NO;
3497       if (!NILP (coding->post_read_conversion))
3498         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3499       if (!NILP (coding->pre_write_conversion))
3500         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3501       break;
3502
3503     case 1:
3504       coding->type = coding_type_sjis;
3505       coding->common_flags
3506         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3507       break;
3508
3509     case 2:
3510       coding->type = coding_type_iso2022;
3511       coding->common_flags
3512         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3513       {
3514         Lisp_Object val, temp;
3515         Lisp_Object *flags;
3516         int i, charset, reg_bits = 0;
3517
3518         val = XVECTOR (coding_spec)->contents[4];
3519
3520         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3521           goto label_invalid_coding_system;
3522
3523         flags = XVECTOR (val)->contents;
3524         coding->flags
3525           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3526              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3527              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3528              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3529              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3530              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3531              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3532              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3533              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3534              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3535              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3536              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3537              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3538              );
3539
3540         /* Invoke graphic register 0 to plane 0.  */
3541         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3542         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3543         CODING_SPEC_ISO_INVOCATION (coding, 1)
3544           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3545         /* Not single shifting at first.  */
3546         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3547         /* Beginning of buffer should also be regarded as bol. */
3548         CODING_SPEC_ISO_BOL (coding) = 1;
3549
3550         for (charset = 0; charset <= MAX_CHARSET; charset++)
3551           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3552         val = Vcharset_revision_alist;
3553         while (CONSP (val))
3554           {
3555             charset = get_charset_id (Fcar_safe (XCAR (val)));
3556             if (charset >= 0
3557                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3558                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3559               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3560             val = XCDR (val);
3561           }
3562
3563         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3564            FLAGS[REG] can be one of below:
3565                 integer CHARSET: CHARSET occupies register I,
3566                 t: designate nothing to REG initially, but can be used
3567                   by any charsets,
3568                 list of integer, nil, or t: designate the first
3569                   element (if integer) to REG initially, the remaining
3570                   elements (if integer) is designated to REG on request,
3571                   if an element is t, REG can be used by any charsets,
3572                 nil: REG is never used.  */
3573         for (charset = 0; charset <= MAX_CHARSET; charset++)
3574           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3575             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3576         for (i = 0; i < 4; i++)
3577           {
3578             if (INTEGERP (flags[i])
3579                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3580                 || (charset = get_charset_id (flags[i])) >= 0)
3581               {
3582                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3583                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3584               }
3585             else if (EQ (flags[i], Qt))
3586               {
3587                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3588                 reg_bits |= 1 << i;
3589                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3590               }
3591             else if (CONSP (flags[i]))
3592               {
3593                 Lisp_Object tail;
3594                 tail = flags[i];
3595
3596                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3597                 if (INTEGERP (XCAR (tail))
3598                     && (charset = XINT (XCAR (tail)),
3599                         CHARSET_VALID_P (charset))
3600                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3601                   {
3602                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3603                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3604                   }
3605                 else
3606                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3607                 tail = XCDR (tail);
3608                 while (CONSP (tail))
3609                   {
3610                     if (INTEGERP (XCAR (tail))
3611                         && (charset = XINT (XCAR (tail)),
3612                             CHARSET_VALID_P (charset))
3613                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3614                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3615                         = i;
3616                     else if (EQ (XCAR (tail), Qt))
3617                       reg_bits |= 1 << i;
3618                     tail = XCDR (tail);
3619                   }
3620               }
3621             else
3622               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3623
3624             CODING_SPEC_ISO_DESIGNATION (coding, i)
3625               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3626           }
3627
3628         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3629           {
3630             /* REG 1 can be used only by locking shift in 7-bit env.  */
3631             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3632               reg_bits &= ~2;
3633             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3634               /* Without any shifting, only REG 0 and 1 can be used.  */
3635               reg_bits &= 3;
3636           }
3637
3638         if (reg_bits)
3639           for (charset = 0; charset <= MAX_CHARSET; charset++)
3640             {
3641               if (CHARSET_VALID_P (charset)
3642                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3643                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3644                 {
3645                   /* There exist some default graphic registers to be
3646                      used by CHARSET.  */
3647
3648                   /* We had better avoid designating a charset of
3649                      CHARS96 to REG 0 as far as possible.  */
3650                   if (CHARSET_CHARS (charset) == 96)
3651                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3652                       = (reg_bits & 2
3653                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3654                   else
3655                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3656                       = (reg_bits & 1
3657                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3658                 }
3659             }
3660       }
3661       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3662       coding->spec.iso2022.last_invalid_designation_register = -1;
3663       break;
3664
3665     case 3:
3666       coding->type = coding_type_big5;
3667       coding->common_flags
3668         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3669       coding->flags
3670         = (NILP (XVECTOR (coding_spec)->contents[4])
3671            ? CODING_FLAG_BIG5_HKU
3672            : CODING_FLAG_BIG5_ETEN);
3673       break;
3674
3675     case 4:
3676       coding->type = coding_type_ccl;
3677       coding->common_flags
3678         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3679       {
3680         val = XVECTOR (coding_spec)->contents[4];
3681         if (! CONSP (val)
3682             || setup_ccl_program (&(coding->spec.ccl.decoder),
3683                                   XCAR (val)) < 0
3684             || setup_ccl_program (&(coding->spec.ccl.encoder),
3685                                   XCDR (val)) < 0)
3686           goto label_invalid_coding_system;
3687
3688         bzero (coding->spec.ccl.valid_codes, 256);
3689         val = Fplist_get (plist, Qvalid_codes);
3690         if (CONSP (val))
3691           {
3692             Lisp_Object this;
3693
3694             for (; CONSP (val); val = XCDR (val))
3695               {
3696                 this = XCAR (val);
3697                 if (INTEGERP (this)
3698                     && XINT (this) >= 0 && XINT (this) < 256)
3699                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3700                 else if (CONSP (this)
3701                          && INTEGERP (XCAR (this))
3702                          && INTEGERP (XCDR (this)))
3703                   {
3704                     int start = XINT (XCAR (this));
3705                     int end = XINT (XCDR (this));
3706
3707                     if (start >= 0 && start <= end && end < 256)
3708                       while (start <= end)
3709                         coding->spec.ccl.valid_codes[start++] = 1;
3710                   }
3711               }
3712           }
3713       }
3714       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3715       coding->spec.ccl.cr_carryover = 0;
3716       coding->spec.ccl.eight_bit_carryover[0] = 0;
3717       break;
3718
3719     case 5:
3720       coding->type = coding_type_raw_text;
3721       break;
3722
3723     default:
3724       goto label_invalid_coding_system;
3725     }
3726   return 0;
3727
3728  label_invalid_coding_system:
3729   coding->type = coding_type_no_conversion;
3730   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3731   coding->common_flags = 0;
3732   coding->eol_type = CODING_EOL_LF;
3733   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3734   return -1;
3735 }
3736
3737 /* Free memory blocks allocated for storing composition information.  */
3738
3739 void
3740 coding_free_composition_data (coding)
3741      struct coding_system *coding;
3742 {
3743   struct composition_data *cmp_data = coding->cmp_data, *next;
3744
3745   if (!cmp_data)
3746     return;
3747   /* Memory blocks are chained.  At first, rewind to the first, then,
3748      free blocks one by one.  */
3749   while (cmp_data->prev)
3750     cmp_data = cmp_data->prev;
3751   while (cmp_data)
3752     {
3753       next = cmp_data->next;
3754       xfree (cmp_data);
3755       cmp_data = next;
3756     }
3757   coding->cmp_data = NULL;
3758 }
3759
3760 /* Set `char_offset' member of all memory blocks pointed by
3761    coding->cmp_data to POS.  */
3762
3763 void
3764 coding_adjust_composition_offset (coding, pos)
3765      struct coding_system *coding;
3766      int pos;
3767 {
3768   struct composition_data *cmp_data;
3769
3770   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3771     cmp_data->char_offset = pos;
3772 }
3773
3774 /* Setup raw-text or one of its subsidiaries in the structure
3775    coding_system CODING according to the already setup value eol_type
3776    in CODING.  CODING should be setup for some coding system in
3777    advance.  */
3778
3779 void
3780 setup_raw_text_coding_system (coding)
3781      struct coding_system *coding;
3782 {
3783   if (coding->type != coding_type_raw_text)
3784     {
3785       coding->symbol = Qraw_text;
3786       coding->type = coding_type_raw_text;
3787       if (coding->eol_type != CODING_EOL_UNDECIDED)
3788         {
3789           Lisp_Object subsidiaries;
3790           subsidiaries = Fget (Qraw_text, Qeol_type);
3791
3792           if (VECTORP (subsidiaries)
3793               && XVECTOR (subsidiaries)->size == 3)
3794             coding->symbol
3795               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3796         }
3797       setup_coding_system (coding->symbol, coding);
3798     }
3799   return;
3800 }
3801
3802 /* Emacs has a mechanism to automatically detect a coding system if it
3803    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3804    it's impossible to distinguish some coding systems accurately
3805    because they use the same range of codes.  So, at first, coding
3806    systems are categorized into 7, those are:
3807
3808    o coding-category-emacs-mule
3809
3810         The category for a coding system which has the same code range
3811         as Emacs' internal format.  Assigned the coding-system (Lisp
3812         symbol) `emacs-mule' by default.
3813
3814    o coding-category-sjis
3815
3816         The category for a coding system which has the same code range
3817         as SJIS.  Assigned the coding-system (Lisp
3818         symbol) `japanese-shift-jis' by default.
3819
3820    o coding-category-iso-7
3821
3822         The category for a coding system which has the same code range
3823         as ISO2022 of 7-bit environment.  This doesn't use any locking
3824         shift and single shift functions.  This can encode/decode all
3825         charsets.  Assigned the coding-system (Lisp symbol)
3826         `iso-2022-7bit' by default.
3827
3828    o coding-category-iso-7-tight
3829
3830         Same as coding-category-iso-7 except that this can
3831         encode/decode only the specified charsets.
3832
3833    o coding-category-iso-8-1
3834
3835         The category for a coding system which has the same code range
3836         as ISO2022 of 8-bit environment and graphic plane 1 used only
3837         for DIMENSION1 charset.  This doesn't use any locking shift
3838         and single shift functions.  Assigned the coding-system (Lisp
3839         symbol) `iso-latin-1' by default.
3840
3841    o coding-category-iso-8-2
3842
3843         The category for a coding system which has the same code range
3844         as ISO2022 of 8-bit environment and graphic plane 1 used only
3845         for DIMENSION2 charset.  This doesn't use any locking shift
3846         and single shift functions.  Assigned the coding-system (Lisp
3847         symbol) `japanese-iso-8bit' by default.
3848
3849    o coding-category-iso-7-else
3850
3851         The category for a coding system which has the same code range
3852         as ISO2022 of 7-bit environment but uses locking shift or
3853         single shift functions.  Assigned the coding-system (Lisp
3854         symbol) `iso-2022-7bit-lock' by default.
3855
3856    o coding-category-iso-8-else
3857
3858         The category for a coding system which has the same code range
3859         as ISO2022 of 8-bit environment but uses locking shift or
3860         single shift functions.  Assigned the coding-system (Lisp
3861         symbol) `iso-2022-8bit-ss2' by default.
3862
3863    o coding-category-big5
3864
3865         The category for a coding system which has the same code range
3866         as BIG5.  Assigned the coding-system (Lisp symbol)
3867         `cn-big5' by default.
3868
3869    o coding-category-utf-8
3870
3871         The category for a coding system which has the same code range
3872         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3873         symbol) `utf-8' by default.
3874
3875    o coding-category-utf-16-be
3876
3877         The category for a coding system in which a text has an
3878         Unicode signature (cf. Unicode Standard) in the order of BIG
3879         endian at the head.  Assigned the coding-system (Lisp symbol)
3880         `utf-16-be' by default.
3881
3882    o coding-category-utf-16-le
3883
3884         The category for a coding system in which a text has an
3885         Unicode signature (cf. Unicode Standard) in the order of
3886         LITTLE endian at the head.  Assigned the coding-system (Lisp
3887         symbol) `utf-16-le' by default.
3888
3889    o coding-category-ccl
3890
3891         The category for a coding system of which encoder/decoder is
3892         written in CCL programs.  The default value is nil, i.e., no
3893         coding system is assigned.
3894
3895    o coding-category-binary
3896
3897         The category for a coding system not categorized in any of the
3898         above.  Assigned the coding-system (Lisp symbol)
3899         `no-conversion' by default.
3900
3901    Each of them is a Lisp symbol and the value is an actual
3902    `coding-system' (this is also a Lisp symbol) assigned by a user.
3903    What Emacs does actually is to detect a category of coding system.
3904    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3905    decide a single possible category, it selects a category of the
3906    highest priority.  Priorities of categories are also specified by a
3907    user in a Lisp variable `coding-category-list'.
3908
3909 */
3910
3911 static
3912 int ascii_skip_code[256];
3913
3914 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3915    If it detects possible coding systems, return an integer in which
3916    appropriate flag bits are set.  Flag bits are defined by macros
3917    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3918    it should point the table `coding_priorities'.  In that case, only
3919    the flag bit for a coding system of the highest priority is set in
3920    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3921    range 0x80..0x9F are in multibyte form.
3922
3923    How many ASCII characters are at the head is returned as *SKIP.  */
3924
3925 static int
3926 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3927      unsigned char *source;
3928      int src_bytes, *priorities, *skip;
3929      int multibytep;
3930 {
3931   register unsigned char c;
3932   unsigned char *src = source, *src_end = source + src_bytes;
3933   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3934   int i;
3935
3936   /* At first, skip all ASCII characters and control characters except
3937      for three ISO2022 specific control characters.  */
3938   ascii_skip_code[ISO_CODE_SO] = 0;
3939   ascii_skip_code[ISO_CODE_SI] = 0;
3940   ascii_skip_code[ISO_CODE_ESC] = 0;
3941
3942  label_loop_detect_coding:
3943   while (src < src_end && ascii_skip_code[*src]) src++;
3944   *skip = src - source;
3945
3946   if (src >= src_end)
3947     /* We found nothing other than ASCII.  There's nothing to do.  */
3948     return 0;
3949
3950   c = *src;
3951   /* The text seems to be encoded in some multilingual coding system.
3952      Now, try to find in which coding system the text is encoded.  */
3953   if (c < 0x80)
3954     {
3955       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3956       /* C is an ISO2022 specific control code of C0.  */
3957       mask = detect_coding_iso2022 (src, src_end, multibytep);
3958       if (mask == 0)
3959         {
3960           /* No valid ISO2022 code follows C.  Try again.  */
3961           src++;
3962           if (c == ISO_CODE_ESC)
3963             ascii_skip_code[ISO_CODE_ESC] = 1;
3964           else
3965             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3966           goto label_loop_detect_coding;
3967         }
3968       if (priorities)
3969         {
3970           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3971             {
3972               if (mask & priorities[i])
3973                 return priorities[i];
3974             }
3975           return CODING_CATEGORY_MASK_RAW_TEXT;
3976         }
3977     }
3978   else
3979     {
3980       int try;
3981
3982       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3983         c = *src++ - 0x20;
3984
3985       if (c < 0xA0)
3986         {
3987           /* C is the first byte of SJIS character code,
3988              or a leading-code of Emacs' internal format (emacs-mule),
3989              or the first byte of UTF-16.  */
3990           try = (CODING_CATEGORY_MASK_SJIS
3991                   | CODING_CATEGORY_MASK_EMACS_MULE
3992                   | CODING_CATEGORY_MASK_UTF_16_BE
3993                   | CODING_CATEGORY_MASK_UTF_16_LE);
3994
3995           /* Or, if C is a special latin extra code,
3996              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3997              or is an ISO2022 control-sequence-introducer (CSI),
3998              we should also consider the possibility of ISO2022 codings.  */
3999           if ((VECTORP (Vlatin_extra_code_table)
4000                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4001               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4002               || (c == ISO_CODE_CSI
4003                   && (src < src_end
4004                       && (*src == ']'
4005                           || ((*src == '0' || *src == '1' || *src == '2')
4006                               && src + 1 < src_end
4007                               && src[1] == ']')))))
4008             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4009                      | CODING_CATEGORY_MASK_ISO_8BIT);
4010         }
4011       else
4012         /* C is a character of ISO2022 in graphic plane right,
4013            or a SJIS's 1-byte character code (i.e. JISX0201),
4014            or the first byte of BIG5's 2-byte code,
4015            or the first byte of UTF-8/16.  */
4016         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4017                 | CODING_CATEGORY_MASK_ISO_8BIT
4018                 | CODING_CATEGORY_MASK_SJIS
4019                 | CODING_CATEGORY_MASK_BIG5
4020                 | CODING_CATEGORY_MASK_UTF_8
4021                 | CODING_CATEGORY_MASK_UTF_16_BE
4022                 | CODING_CATEGORY_MASK_UTF_16_LE);
4023
4024       /* Or, we may have to consider the possibility of CCL.  */
4025       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4026           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4027               ->spec.ccl.valid_codes)[c])
4028         try |= CODING_CATEGORY_MASK_CCL;
4029
4030       mask = 0;
4031       utf16_examined_p = iso2022_examined_p = 0;
4032       if (priorities)
4033         {
4034           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4035             {
4036               if (!iso2022_examined_p
4037                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4038                 {
4039                   mask |= detect_coding_iso2022 (src, src_end);
4040                   iso2022_examined_p = 1;
4041                 }
4042               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4043                 mask |= detect_coding_sjis (src, src_end, multibytep);
4044               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4045                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4046               else if (!utf16_examined_p
4047                        && (priorities[i] & try &
4048                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4049                 {
4050                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4051                   utf16_examined_p = 1;
4052                 }
4053               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4054                 mask |= detect_coding_big5 (src, src_end, multibytep);
4055               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4056                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4057               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4058                 mask |= detect_coding_ccl (src, src_end, multibytep);
4059               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4060                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4061               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4062                 mask |= CODING_CATEGORY_MASK_BINARY;
4063               if (mask & priorities[i])
4064                 return priorities[i];
4065             }
4066           return CODING_CATEGORY_MASK_RAW_TEXT;
4067         }
4068       if (try & CODING_CATEGORY_MASK_ISO)
4069         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4070       if (try & CODING_CATEGORY_MASK_SJIS)
4071         mask |= detect_coding_sjis (src, src_end, multibytep);
4072       if (try & CODING_CATEGORY_MASK_BIG5)
4073         mask |= detect_coding_big5 (src, src_end, multibytep);
4074       if (try & CODING_CATEGORY_MASK_UTF_8)
4075         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4076       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4077         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4078       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4079         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4080       if (try & CODING_CATEGORY_MASK_CCL)
4081         mask |= detect_coding_ccl (src, src_end, multibytep);
4082     }
4083   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4084 }
4085
4086 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4087    The information of the detected coding system is set in CODING.  */
4088
4089 void
4090 detect_coding (coding, src, src_bytes)
4091      struct coding_system *coding;
4092      unsigned char *src;
4093      int src_bytes;
4094 {
4095   unsigned int idx;
4096   int skip, mask;
4097   Lisp_Object val;
4098
4099   val = Vcoding_category_list;
4100   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4101                              coding->src_multibyte);
4102   coding->heading_ascii = skip;
4103
4104   if (!mask) return;
4105
4106   /* We found a single coding system of the highest priority in MASK.  */
4107   idx = 0;
4108   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4109   if (! mask)
4110     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4111
4112   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
4113
4114   if (coding->eol_type != CODING_EOL_UNDECIDED)
4115     {
4116       Lisp_Object tmp;
4117
4118       tmp = Fget (val, Qeol_type);
4119       if (VECTORP (tmp))
4120         val = XVECTOR (tmp)->contents[coding->eol_type];
4121     }
4122
4123   /* Setup this new coding system while preserving some slots.  */
4124   {
4125     int src_multibyte = coding->src_multibyte;
4126     int dst_multibyte = coding->dst_multibyte;
4127
4128     setup_coding_system (val, coding);
4129     coding->src_multibyte = src_multibyte;
4130     coding->dst_multibyte = dst_multibyte;
4131     coding->heading_ascii = skip;
4132   }
4133 }
4134
4135 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4136    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4137    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4138
4139    How many non-eol characters are at the head is returned as *SKIP.  */
4140
4141 #define MAX_EOL_CHECK_COUNT 3
4142
4143 static int
4144 detect_eol_type (source, src_bytes, skip)
4145      unsigned char *source;
4146      int src_bytes, *skip;
4147 {
4148   unsigned char *src = source, *src_end = src + src_bytes;
4149   unsigned char c;
4150   int total = 0;                /* How many end-of-lines are found so far.  */
4151   int eol_type = CODING_EOL_UNDECIDED;
4152   int this_eol_type;
4153
4154   *skip = 0;
4155
4156   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4157     {
4158       c = *src++;
4159       if (c == '\n' || c == '\r')
4160         {
4161           if (*skip == 0)
4162             *skip = src - 1 - source;
4163           total++;
4164           if (c == '\n')
4165             this_eol_type = CODING_EOL_LF;
4166           else if (src >= src_end || *src != '\n')
4167             this_eol_type = CODING_EOL_CR;
4168           else
4169             this_eol_type = CODING_EOL_CRLF, src++;
4170
4171           if (eol_type == CODING_EOL_UNDECIDED)
4172             /* This is the first end-of-line.  */
4173             eol_type = this_eol_type;
4174           else if (eol_type != this_eol_type)
4175             {
4176               /* The found type is different from what found before.  */
4177               eol_type = CODING_EOL_INCONSISTENT;
4178               break;
4179             }
4180         }
4181     }
4182
4183   if (*skip == 0)
4184     *skip = src_end - source;
4185   return eol_type;
4186 }
4187
4188 /* Like detect_eol_type, but detect EOL type in 2-octet
4189    big-endian/little-endian format for coding systems utf-16-be and
4190    utf-16-le.  */
4191
4192 static int
4193 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4194      unsigned char *source;
4195      int src_bytes, *skip, big_endian_p;
4196 {
4197   unsigned char *src = source, *src_end = src + src_bytes;
4198   unsigned int c1, c2;
4199   int total = 0;                /* How many end-of-lines are found so far.  */
4200   int eol_type = CODING_EOL_UNDECIDED;
4201   int this_eol_type;
4202   int msb, lsb;
4203
4204   if (big_endian_p)
4205     msb = 0, lsb = 1;
4206   else
4207     msb = 1, lsb = 0;
4208
4209   *skip = 0;
4210
4211   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4212     {
4213       c1 = (src[msb] << 8) | (src[lsb]);
4214       src += 2;
4215
4216       if (c1 == '\n' || c1 == '\r')
4217         {
4218           if (*skip == 0)
4219             *skip = src - 2 - source;
4220           total++;
4221           if (c1 == '\n')
4222             {
4223               this_eol_type = CODING_EOL_LF;
4224             }
4225           else
4226             {
4227               if ((src + 1) >= src_end)
4228                 {
4229                   this_eol_type = CODING_EOL_CR;
4230                 }
4231               else
4232                 {
4233                   c2 = (src[msb] << 8) | (src[lsb]);
4234                   if (c2 == '\n')
4235                     this_eol_type = CODING_EOL_CRLF, src += 2;
4236                   else
4237                     this_eol_type = CODING_EOL_CR;
4238                 }
4239             }
4240
4241           if (eol_type == CODING_EOL_UNDECIDED)
4242             /* This is the first end-of-line.  */
4243             eol_type = this_eol_type;
4244           else if (eol_type != this_eol_type)
4245             {
4246               /* The found type is different from what found before.  */
4247               eol_type = CODING_EOL_INCONSISTENT;
4248               break;
4249             }
4250         }
4251     }
4252
4253   if (*skip == 0)
4254     *skip = src_end - source;
4255   return eol_type;
4256 }
4257
4258 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4259    is encoded.  If it detects an appropriate format of end-of-line, it
4260    sets the information in *CODING.  */
4261
4262 void
4263 detect_eol (coding, src, src_bytes)
4264      struct coding_system *coding;
4265      unsigned char *src;
4266      int src_bytes;
4267 {
4268   Lisp_Object val;
4269   int skip;
4270   int eol_type;
4271
4272   switch (coding->category_idx)
4273     {
4274     case CODING_CATEGORY_IDX_UTF_16_BE:
4275       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4276       break;
4277     case CODING_CATEGORY_IDX_UTF_16_LE:
4278       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4279       break;
4280     default:
4281       eol_type = detect_eol_type (src, src_bytes, &skip);
4282       break;
4283     }
4284
4285   if (coding->heading_ascii > skip)
4286     coding->heading_ascii = skip;
4287   else
4288     skip = coding->heading_ascii;
4289
4290   if (eol_type == CODING_EOL_UNDECIDED)
4291     return;
4292   if (eol_type == CODING_EOL_INCONSISTENT)
4293     {
4294 #if 0
4295       /* This code is suppressed until we find a better way to
4296          distinguish raw text file and binary file.  */
4297
4298       /* If we have already detected that the coding is raw-text, the
4299          coding should actually be no-conversion.  */
4300       if (coding->type == coding_type_raw_text)
4301         {
4302           setup_coding_system (Qno_conversion, coding);
4303           return;
4304         }
4305       /* Else, let's decode only text code anyway.  */
4306 #endif /* 0 */
4307       eol_type = CODING_EOL_LF;
4308     }
4309
4310   val = Fget (coding->symbol, Qeol_type);
4311   if (VECTORP (val) && XVECTOR (val)->size == 3)
4312     {
4313       int src_multibyte = coding->src_multibyte;
4314       int dst_multibyte = coding->dst_multibyte;
4315
4316       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4317       coding->src_multibyte = src_multibyte;
4318       coding->dst_multibyte = dst_multibyte;
4319       coding->heading_ascii = skip;
4320     }
4321 }
4322
4323 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4324
4325 #define DECODING_BUFFER_MAG(coding)                     \
4326   (coding->type == coding_type_iso2022                  \
4327    ? 3                                                  \
4328    : (coding->type == coding_type_ccl                   \
4329       ? coding->spec.ccl.decoder.buf_magnification      \
4330       : 2))
4331
4332 /* Return maximum size (bytes) of a buffer enough for decoding
4333    SRC_BYTES of text encoded in CODING.  */
4334
4335 int
4336 decoding_buffer_size (coding, src_bytes)
4337      struct coding_system *coding;
4338      int src_bytes;
4339 {
4340   return (src_bytes * DECODING_BUFFER_MAG (coding)
4341           + CONVERSION_BUFFER_EXTRA_ROOM);
4342 }
4343
4344 /* Return maximum size (bytes) of a buffer enough for encoding
4345    SRC_BYTES of text to CODING.  */
4346
4347 int
4348 encoding_buffer_size (coding, src_bytes)
4349      struct coding_system *coding;
4350      int src_bytes;
4351 {
4352   int magnification;
4353
4354   if (coding->type == coding_type_ccl)
4355     magnification = coding->spec.ccl.encoder.buf_magnification;
4356   else if (CODING_REQUIRE_ENCODING (coding))
4357     magnification = 3;
4358   else
4359     magnification = 1;
4360
4361   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4362 }
4363
4364 /* Working buffer for code conversion.  */
4365 struct conversion_buffer
4366 {
4367   int size;                     /* size of data.  */
4368   int on_stack;                 /* 1 if allocated by alloca.  */
4369   unsigned char *data;
4370 };
4371
4372 /* Don't use alloca for allocating memory space larger than this, lest
4373    we overflow their stack.  */
4374 #define MAX_ALLOCA 16*1024
4375
4376 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4377 #define allocate_conversion_buffer(buf, len)            \
4378   do {                                                  \
4379     if (len < MAX_ALLOCA)                               \
4380       {                                                 \
4381         buf.data = (unsigned char *) alloca (len);      \
4382         buf.on_stack = 1;                               \
4383       }                                                 \
4384     else                                                \
4385       {                                                 \
4386         buf.data = (unsigned char *) xmalloc (len);     \
4387         buf.on_stack = 0;                               \
4388       }                                                 \
4389     buf.size = len;                                     \
4390   } while (0)
4391
4392 /* Double the allocated memory for *BUF.  */
4393 static void
4394 extend_conversion_buffer (buf)
4395      struct conversion_buffer *buf;
4396 {
4397   if (buf->on_stack)
4398     {
4399       unsigned char *save = buf->data;
4400       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4401       bcopy (save, buf->data, buf->size);
4402       buf->on_stack = 0;
4403     }
4404   else
4405     {
4406       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4407     }
4408   buf->size *= 2;
4409 }
4410
4411 /* Free the allocated memory for BUF if it is not on stack.  */
4412 static void
4413 free_conversion_buffer (buf)
4414      struct conversion_buffer *buf;
4415 {
4416   if (!buf->on_stack)
4417     xfree (buf->data);
4418 }
4419
4420 int
4421 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4422      struct coding_system *coding;
4423      unsigned char *source, *destination;
4424      int src_bytes, dst_bytes, encodep;
4425 {
4426   struct ccl_program *ccl
4427     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4428   unsigned char *dst = destination;
4429
4430   ccl->suppress_error = coding->suppress_error;
4431   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4432   if (encodep)
4433     {
4434       /* On encoding, EOL format is converted within ccl_driver.  For
4435          that, setup proper information in the structure CCL.  */
4436       ccl->eol_type = coding->eol_type;
4437       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4438         ccl->eol_type = CODING_EOL_LF;
4439       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4440     }
4441   ccl->multibyte = coding->src_multibyte;
4442   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4443     {
4444       /* Move carryover bytes to DESTINATION.  */
4445       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4446       while (*p)
4447         *dst++ = *p++;
4448       coding->spec.ccl.eight_bit_carryover[0] = 0;
4449       if (dst_bytes)
4450         dst_bytes -= dst - destination;
4451     }
4452
4453   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4454                                   &(coding->consumed))
4455                       + dst - destination);
4456
4457   if (encodep)
4458     {
4459       coding->produced_char = coding->produced;
4460       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4461     }
4462   else
4463     {
4464       /* On decoding, the destination should always multibyte.  But,
4465          CCL program might have been generated an invalid multibyte
4466          sequence.  Here we make such a sequence valid as
4467          multibyte.  */
4468       int bytes
4469         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4470
4471       if ((coding->consumed < src_bytes
4472            || !ccl->last_block)
4473           && coding->produced >= 1
4474           && destination[coding->produced - 1] >= 0x80)
4475         {
4476           /* We should not convert the tailing 8-bit codes to
4477              multibyte form even if they doesn't form a valid
4478              multibyte sequence.  They may form a valid sequence in
4479              the next call.  */
4480           int carryover = 0;
4481
4482           if (destination[coding->produced - 1] < 0xA0)
4483             carryover = 1;
4484           else if (coding->produced >= 2)
4485             {
4486               if (destination[coding->produced - 2] >= 0x80)
4487                 {
4488                   if (destination[coding->produced - 2] < 0xA0)
4489                     carryover = 2;
4490                   else if (coding->produced >= 3
4491                            && destination[coding->produced - 3] >= 0x80
4492                            && destination[coding->produced - 3] < 0xA0)
4493                     carryover = 3;
4494                 }
4495             }
4496           if (carryover > 0)
4497             {
4498               BCOPY_SHORT (destination + coding->produced - carryover,
4499                            coding->spec.ccl.eight_bit_carryover,
4500                            carryover);
4501               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4502               coding->produced -= carryover;
4503             }
4504         }
4505       coding->produced = str_as_multibyte (destination, bytes,
4506                                            coding->produced,
4507                                            &(coding->produced_char));
4508     }
4509
4510   switch (ccl->status)
4511     {
4512     case CCL_STAT_SUSPEND_BY_SRC:
4513       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4514       break;
4515     case CCL_STAT_SUSPEND_BY_DST:
4516       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4517       break;
4518     case CCL_STAT_QUIT:
4519     case CCL_STAT_INVALID_CMD:
4520       coding->result = CODING_FINISH_INTERRUPT;
4521       break;
4522     default:
4523       coding->result = CODING_FINISH_NORMAL;
4524       break;
4525     }
4526   return coding->result;
4527 }
4528
4529 /* Decode EOL format of the text at PTR of BYTES length destructively
4530    according to CODING->eol_type.  This is called after the CCL
4531    program produced a decoded text at PTR.  If we do CRLF->LF
4532    conversion, update CODING->produced and CODING->produced_char.  */
4533
4534 static void
4535 decode_eol_post_ccl (coding, ptr, bytes)
4536      struct coding_system *coding;
4537      unsigned char *ptr;
4538      int bytes;
4539 {
4540   Lisp_Object val, saved_coding_symbol;
4541   unsigned char *pend = ptr + bytes;
4542   int dummy;
4543
4544   /* Remember the current coding system symbol.  We set it back when
4545      an inconsistent EOL is found so that `last-coding-system-used' is
4546      set to the coding system that doesn't specify EOL conversion.  */
4547   saved_coding_symbol = coding->symbol;
4548
4549   coding->spec.ccl.cr_carryover = 0;
4550   if (coding->eol_type == CODING_EOL_UNDECIDED)
4551     {
4552       /* Here, to avoid the call of setup_coding_system, we directly
4553          call detect_eol_type.  */
4554       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4555       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4556         coding->eol_type = CODING_EOL_LF;
4557       if (coding->eol_type != CODING_EOL_UNDECIDED)
4558         {
4559           val = Fget (coding->symbol, Qeol_type);
4560           if (VECTORP (val) && XVECTOR (val)->size == 3)
4561             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4562         }
4563       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4564     }
4565
4566   if (coding->eol_type == CODING_EOL_LF
4567       || coding->eol_type == CODING_EOL_UNDECIDED)
4568     {
4569       /* We have nothing to do.  */
4570       ptr = pend;
4571     }
4572   else if (coding->eol_type == CODING_EOL_CRLF)
4573     {
4574       unsigned char *pstart = ptr, *p = ptr;
4575
4576       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4577           && *(pend - 1) == '\r')
4578         {
4579           /* If the last character is CR, we can't handle it here
4580              because LF will be in the not-yet-decoded source text.
4581              Recorded that the CR is not yet processed.  */
4582           coding->spec.ccl.cr_carryover = 1;
4583           coding->produced--;
4584           coding->produced_char--;
4585           pend--;
4586         }
4587       while (ptr < pend)
4588         {
4589           if (*ptr == '\r')
4590             {
4591               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4592                 {
4593                   *p++ = '\n';
4594                   ptr += 2;
4595                 }
4596               else
4597                 {
4598                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4599                     goto undo_eol_conversion;
4600                   *p++ = *ptr++;
4601                 }
4602             }
4603           else if (*ptr == '\n'
4604                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4605             goto undo_eol_conversion;
4606           else
4607             *p++ = *ptr++;
4608           continue;
4609
4610         undo_eol_conversion:
4611           /* We have faced with inconsistent EOL format at PTR.
4612              Convert all LFs before PTR back to CRLFs.  */
4613           for (p--, ptr--; p >= pstart; p--)
4614             {
4615               if (*p == '\n')
4616                 *ptr-- = '\n', *ptr-- = '\r';
4617               else
4618                 *ptr-- = *p;
4619             }
4620           /*  If carryover is recorded, cancel it because we don't
4621               convert CRLF anymore.  */
4622           if (coding->spec.ccl.cr_carryover)
4623             {
4624               coding->spec.ccl.cr_carryover = 0;
4625               coding->produced++;
4626               coding->produced_char++;
4627               pend++;
4628             }
4629           p = ptr = pend;
4630           coding->eol_type = CODING_EOL_LF;
4631           coding->symbol = saved_coding_symbol;
4632         }
4633       if (p < pend)
4634         {
4635           /* As each two-byte sequence CRLF was converted to LF, (PEND
4636              - P) is the number of deleted characters.  */
4637           coding->produced -= pend - p;
4638           coding->produced_char -= pend - p;
4639         }
4640     }
4641   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4642     {
4643       unsigned char *p = ptr;
4644
4645       for (; ptr < pend; ptr++)
4646         {
4647           if (*ptr == '\r')
4648             *ptr = '\n';
4649           else if (*ptr == '\n'
4650                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4651             {
4652               for (; p < ptr; p++)
4653                 {
4654                   if (*p == '\n')
4655                     *p = '\r';
4656                 }
4657               ptr = pend;
4658               coding->eol_type = CODING_EOL_LF;
4659               coding->symbol = saved_coding_symbol;
4660             }
4661         }
4662     }
4663 }
4664
4665 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4666    decoding, it may detect coding system and format of end-of-line if
4667    those are not yet decided.  The source should be unibyte, the
4668    result is multibyte if CODING->dst_multibyte is nonzero, else
4669    unibyte.  */
4670
4671 int
4672 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4673      struct coding_system *coding;
4674      unsigned char *source, *destination;
4675      int src_bytes, dst_bytes;
4676 {
4677   if (coding->type == coding_type_undecided)
4678     detect_coding (coding, source, src_bytes);
4679
4680   if (coding->eol_type == CODING_EOL_UNDECIDED
4681       && coding->type != coding_type_ccl)
4682     {
4683       detect_eol (coding, source, src_bytes);
4684       /* We had better recover the original eol format if we
4685          encounter an inconsistent eol format while decoding.  */
4686       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4687     }
4688
4689   coding->produced = coding->produced_char = 0;
4690   coding->consumed = coding->consumed_char = 0;
4691   coding->errors = 0;
4692   coding->result = CODING_FINISH_NORMAL;
4693
4694   switch (coding->type)
4695     {
4696     case coding_type_sjis:
4697       decode_coding_sjis_big5 (coding, source, destination,
4698                                src_bytes, dst_bytes, 1);
4699       break;
4700
4701     case coding_type_iso2022:
4702       decode_coding_iso2022 (coding, source, destination,
4703                              src_bytes, dst_bytes);
4704       break;
4705
4706     case coding_type_big5:
4707       decode_coding_sjis_big5 (coding, source, destination,
4708                                src_bytes, dst_bytes, 0);
4709       break;
4710
4711     case coding_type_emacs_mule:
4712       decode_coding_emacs_mule (coding, source, destination,
4713                                 src_bytes, dst_bytes);
4714       break;
4715
4716     case coding_type_ccl:
4717       if (coding->spec.ccl.cr_carryover)
4718         {
4719           /* Set the CR which is not processed by the previous call of
4720              decode_eol_post_ccl in DESTINATION.  */
4721           *destination = '\r';
4722           coding->produced++;
4723           coding->produced_char++;
4724           dst_bytes--;
4725         }
4726       ccl_coding_driver (coding, source,
4727                          destination + coding->spec.ccl.cr_carryover,
4728                          src_bytes, dst_bytes, 0);
4729       if (coding->eol_type != CODING_EOL_LF)
4730         decode_eol_post_ccl (coding, destination, coding->produced);
4731       break;
4732
4733     default:
4734       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4735     }
4736
4737   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4738       && coding->mode & CODING_MODE_LAST_BLOCK
4739       && coding->consumed == src_bytes)
4740     coding->result = CODING_FINISH_NORMAL;
4741
4742   if (coding->mode & CODING_MODE_LAST_BLOCK
4743       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4744     {
4745       unsigned char *src = source + coding->consumed;
4746       unsigned char *dst = destination + coding->produced;
4747
4748       src_bytes -= coding->consumed;
4749       coding->errors++;
4750       if (COMPOSING_P (coding))
4751         DECODE_COMPOSITION_END ('1');
4752       while (src_bytes--)
4753         {
4754           int c = *src++;
4755           dst += CHAR_STRING (c, dst);
4756           coding->produced_char++;
4757         }
4758       coding->consumed = coding->consumed_char = src - source;
4759       coding->produced = dst - destination;
4760       coding->result = CODING_FINISH_NORMAL;
4761     }
4762
4763   if (!coding->dst_multibyte)
4764     {
4765       coding->produced = str_as_unibyte (destination, coding->produced);
4766       coding->produced_char = coding->produced;
4767     }
4768
4769   return coding->result;
4770 }
4771
4772 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4773    multibyteness of the source is CODING->src_multibyte, the
4774    multibyteness of the result is always unibyte.  */
4775
4776 int
4777 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4778      struct coding_system *coding;
4779      unsigned char *source, *destination;
4780      int src_bytes, dst_bytes;
4781 {
4782   coding->produced = coding->produced_char = 0;
4783   coding->consumed = coding->consumed_char = 0;
4784   coding->errors = 0;
4785   coding->result = CODING_FINISH_NORMAL;
4786
4787   switch (coding->type)
4788     {
4789     case coding_type_sjis:
4790       encode_coding_sjis_big5 (coding, source, destination,
4791                                src_bytes, dst_bytes, 1);
4792       break;
4793
4794     case coding_type_iso2022:
4795       encode_coding_iso2022 (coding, source, destination,
4796                              src_bytes, dst_bytes);
4797       break;
4798
4799     case coding_type_big5:
4800       encode_coding_sjis_big5 (coding, source, destination,
4801                                src_bytes, dst_bytes, 0);
4802       break;
4803
4804     case coding_type_emacs_mule:
4805       encode_coding_emacs_mule (coding, source, destination,
4806                                 src_bytes, dst_bytes);
4807       break;
4808
4809     case coding_type_ccl:
4810       ccl_coding_driver (coding, source, destination,
4811                          src_bytes, dst_bytes, 1);
4812       break;
4813
4814     default:
4815       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4816     }
4817
4818   if (coding->mode & CODING_MODE_LAST_BLOCK
4819       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4820     {
4821       unsigned char *src = source + coding->consumed;
4822       unsigned char *src_end = src + src_bytes;
4823       unsigned char *dst = destination + coding->produced;
4824
4825       if (coding->type == coding_type_iso2022)
4826         ENCODE_RESET_PLANE_AND_REGISTER;
4827       if (COMPOSING_P (coding))
4828         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4829       if (coding->consumed < src_bytes)
4830         {
4831           int len = src_bytes - coding->consumed;
4832
4833           BCOPY_SHORT (source + coding->consumed, dst, len);
4834           if (coding->src_multibyte)
4835             len = str_as_unibyte (dst, len);
4836           dst += len;
4837           coding->consumed = src_bytes;
4838         }
4839       coding->produced = coding->produced_char = dst - destination;
4840       coding->result = CODING_FINISH_NORMAL;
4841     }
4842
4843   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4844       && coding->consumed == src_bytes)
4845     coding->result = CODING_FINISH_NORMAL;
4846
4847   return coding->result;
4848 }
4849
4850 /* Scan text in the region between *BEG and *END (byte positions),
4851    skip characters which we don't have to decode by coding system
4852    CODING at the head and tail, then set *BEG and *END to the region
4853    of the text we actually have to convert.  The caller should move
4854    the gap out of the region in advance if the region is from a
4855    buffer.
4856
4857    If STR is not NULL, *BEG and *END are indices into STR.  */
4858
4859 static void
4860 shrink_decoding_region (beg, end, coding, str)
4861      int *beg, *end;
4862      struct coding_system *coding;
4863      unsigned char *str;
4864 {
4865   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4866   int eol_conversion;
4867   Lisp_Object translation_table;
4868
4869   if (coding->type == coding_type_ccl
4870       || coding->type == coding_type_undecided
4871       || coding->eol_type != CODING_EOL_LF
4872       || !NILP (coding->post_read_conversion)
4873       || coding->composing != COMPOSITION_DISABLED)
4874     {
4875       /* We can't skip any data.  */
4876       return;
4877     }
4878   if (coding->type == coding_type_no_conversion
4879       || coding->type == coding_type_raw_text
4880       || coding->type == coding_type_emacs_mule)
4881     {
4882       /* We need no conversion, but don't have to skip any data here.
4883          Decoding routine handles them effectively anyway.  */
4884       return;
4885     }
4886
4887   translation_table = coding->translation_table_for_decode;
4888   if (NILP (translation_table) && !NILP (Venable_character_translation))
4889     translation_table = Vstandard_translation_table_for_decode;
4890   if (CHAR_TABLE_P (translation_table))
4891     {
4892       int i;
4893       for (i = 0; i < 128; i++)
4894         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4895           break;
4896       if (i < 128)
4897         /* Some ASCII character should be translated.  We give up
4898            shrinking.  */
4899         return;
4900     }
4901
4902   if (coding->heading_ascii >= 0)
4903     /* Detection routine has already found how much we can skip at the
4904        head.  */
4905     *beg += coding->heading_ascii;
4906
4907   if (str)
4908     {
4909       begp_orig = begp = str + *beg;
4910       endp_orig = endp = str + *end;
4911     }
4912   else
4913     {
4914       begp_orig = begp = BYTE_POS_ADDR (*beg);
4915       endp_orig = endp = begp + *end - *beg;
4916     }
4917
4918   eol_conversion = (coding->eol_type == CODING_EOL_CR
4919                     || coding->eol_type == CODING_EOL_CRLF);
4920
4921   switch (coding->type)
4922     {
4923     case coding_type_sjis:
4924     case coding_type_big5:
4925       /* We can skip all ASCII characters at the head.  */
4926       if (coding->heading_ascii < 0)
4927         {
4928           if (eol_conversion)
4929             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4930           else
4931             while (begp < endp && *begp < 0x80) begp++;
4932         }
4933       /* We can skip all ASCII characters at the tail except for the
4934          second byte of SJIS or BIG5 code.  */
4935       if (eol_conversion)
4936         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4937       else
4938         while (begp < endp && endp[-1] < 0x80) endp--;
4939       /* Do not consider LF as ascii if preceded by CR, since that
4940          confuses eol decoding. */
4941       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4942         endp++;
4943       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4944         endp++;
4945       break;
4946
4947     case coding_type_iso2022:
4948       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4949         /* We can't skip any data.  */
4950         break;
4951       if (coding->heading_ascii < 0)
4952         {
4953           /* We can skip all ASCII characters at the head except for a
4954              few control codes.  */
4955           while (begp < endp && (c = *begp) < 0x80
4956                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4957                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4958                  && (!eol_conversion || c != ISO_CODE_LF))
4959             begp++;
4960         }
4961       switch (coding->category_idx)
4962         {
4963         case CODING_CATEGORY_IDX_ISO_8_1:
4964         case CODING_CATEGORY_IDX_ISO_8_2:
4965           /* We can skip all ASCII characters at the tail.  */
4966           if (eol_conversion)
4967             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4968           else
4969             while (begp < endp && endp[-1] < 0x80) endp--;
4970           /* Do not consider LF as ascii if preceded by CR, since that
4971              confuses eol decoding. */
4972           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4973             endp++;
4974           break;
4975
4976         case CODING_CATEGORY_IDX_ISO_7:
4977         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4978           {
4979             /* We can skip all characters at the tail except for 8-bit
4980                codes and ESC and the following 2-byte at the tail.  */
4981             unsigned char *eight_bit = NULL;
4982
4983             if (eol_conversion)
4984               while (begp < endp
4985                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4986                 {
4987                   if (!eight_bit && c & 0x80) eight_bit = endp;
4988                   endp--;
4989                 }
4990             else
4991               while (begp < endp
4992                      && (c = endp[-1]) != ISO_CODE_ESC)
4993                 {
4994                   if (!eight_bit && c & 0x80) eight_bit = endp;
4995                   endp--;
4996                 }
4997             /* Do not consider LF as ascii if preceded by CR, since that
4998                confuses eol decoding. */
4999             if (begp < endp && endp < endp_orig
5000                 && endp[-1] == '\r' && endp[0] == '\n')
5001               endp++;
5002             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5003               {
5004                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5005                   /* This is an ASCII designation sequence.  We can
5006                      surely skip the tail.  But, if we have
5007                      encountered an 8-bit code, skip only the codes
5008                      after that.  */
5009                   endp = eight_bit ? eight_bit : endp + 2;
5010                 else
5011                   /* Hmmm, we can't skip the tail.  */
5012                   endp = endp_orig;
5013               }
5014             else if (eight_bit)
5015               endp = eight_bit;
5016           }
5017         }
5018       break;
5019
5020     default:
5021       abort ();
5022     }
5023   *beg += begp - begp_orig;
5024   *end += endp - endp_orig;
5025   return;
5026 }
5027
5028 /* Like shrink_decoding_region but for encoding.  */
5029
5030 static void
5031 shrink_encoding_region (beg, end, coding, str)
5032      int *beg, *end;
5033      struct coding_system *coding;
5034      unsigned char *str;
5035 {
5036   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5037   int eol_conversion;
5038   Lisp_Object translation_table;
5039
5040   if (coding->type == coding_type_ccl
5041       || coding->eol_type == CODING_EOL_CRLF
5042       || coding->eol_type == CODING_EOL_CR
5043       || coding->cmp_data && coding->cmp_data->used > 0)
5044     {
5045       /* We can't skip any data.  */
5046       return;
5047     }
5048   if (coding->type == coding_type_no_conversion
5049       || coding->type == coding_type_raw_text
5050       || coding->type == coding_type_emacs_mule
5051       || coding->type == coding_type_undecided)
5052     {
5053       /* We need no conversion, but don't have to skip any data here.
5054          Encoding routine handles them effectively anyway.  */
5055       return;
5056     }
5057
5058   translation_table = coding->translation_table_for_encode;
5059   if (NILP (translation_table) && !NILP (Venable_character_translation))
5060     translation_table = Vstandard_translation_table_for_encode;
5061   if (CHAR_TABLE_P (translation_table))
5062     {
5063       int i;
5064       for (i = 0; i < 128; i++)
5065         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5066           break;
5067       if (i < 128)
5068         /* Some ASCII character should be translated.  We give up
5069            shrinking.  */
5070         return;
5071     }
5072
5073   if (str)
5074     {
5075       begp_orig = begp = str + *beg;
5076       endp_orig = endp = str + *end;
5077     }
5078   else
5079     {
5080       begp_orig = begp = BYTE_POS_ADDR (*beg);
5081       endp_orig = endp = begp + *end - *beg;
5082     }
5083
5084   eol_conversion = (coding->eol_type == CODING_EOL_CR
5085                     || coding->eol_type == CODING_EOL_CRLF);
5086
5087   /* Here, we don't have to check coding->pre_write_conversion because
5088      the caller is expected to have handled it already.  */
5089   switch (coding->type)
5090     {
5091     case coding_type_iso2022:
5092       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5093         /* We can't skip any data.  */
5094         break;
5095       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5096         {
5097           unsigned char *bol = begp;
5098           while (begp < endp && *begp < 0x80)
5099             {
5100               begp++;
5101               if (begp[-1] == '\n')
5102                 bol = begp;
5103             }
5104           begp = bol;
5105           goto label_skip_tail;
5106         }
5107       /* fall down ... */
5108
5109     case coding_type_sjis:
5110     case coding_type_big5:
5111       /* We can skip all ASCII characters at the head and tail.  */
5112       if (eol_conversion)
5113         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5114       else
5115         while (begp < endp && *begp < 0x80) begp++;
5116     label_skip_tail:
5117       if (eol_conversion)
5118         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5119       else
5120         while (begp < endp && *(endp - 1) < 0x80) endp--;
5121       break;
5122
5123     default:
5124       abort ();
5125     }
5126
5127   *beg += begp - begp_orig;
5128   *end += endp - endp_orig;
5129   return;
5130 }
5131
5132 /* As shrinking conversion region requires some overhead, we don't try
5133    shrinking if the length of conversion region is less than this
5134    value.  */
5135 static int shrink_conversion_region_threshhold = 1024;
5136
5137 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5138   do {                                                                  \
5139     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5140       {                                                                 \
5141         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5142         else shrink_decoding_region (beg, end, coding, str);            \
5143       }                                                                 \
5144   } while (0)
5145
5146 static Lisp_Object
5147 code_convert_region_unwind (dummy)
5148      Lisp_Object dummy;
5149 {
5150   inhibit_pre_post_conversion = 0;
5151   return Qnil;
5152 }
5153
5154 /* Store information about all compositions in the range FROM and TO
5155    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5156    buffer or a string, defaults to the current buffer.  */
5157
5158 void
5159 coding_save_composition (coding, from, to, obj)
5160      struct coding_system *coding;
5161      int from, to;
5162      Lisp_Object obj;
5163 {
5164   Lisp_Object prop;
5165   int start, end;
5166
5167   if (coding->composing == COMPOSITION_DISABLED)
5168     return;
5169   if (!coding->cmp_data)
5170     coding_allocate_composition_data (coding, from);
5171   if (!find_composition (from, to, &start, &end, &prop, obj)
5172       || end > to)
5173     return;
5174   if (start < from
5175       && (!find_composition (end, to, &start, &end, &prop, obj)
5176           || end > to))
5177     return;
5178   coding->composing = COMPOSITION_NO;
5179   do
5180     {
5181       if (COMPOSITION_VALID_P (start, end, prop))
5182         {
5183           enum composition_method method = COMPOSITION_METHOD (prop);
5184           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5185               >= COMPOSITION_DATA_SIZE)
5186             coding_allocate_composition_data (coding, from);
5187           /* For relative composition, we remember start and end
5188              positions, for the other compositions, we also remember
5189              components.  */
5190           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5191           if (method != COMPOSITION_RELATIVE)
5192             {
5193               /* We must store a*/
5194               Lisp_Object val, ch;
5195
5196               val = COMPOSITION_COMPONENTS (prop);
5197               if (CONSP (val))
5198                 while (CONSP (val))
5199                   {
5200                     ch = XCAR (val), val = XCDR (val);
5201                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5202                   }
5203               else if (VECTORP (val) || STRINGP (val))
5204                 {
5205                   int len = (VECTORP (val)
5206                              ? XVECTOR (val)->size : XSTRING (val)->size);
5207                   int i;
5208                   for (i = 0; i < len; i++)
5209                     {
5210                       ch = (STRINGP (val)
5211                             ? Faref (val, make_number (i))
5212                             : XVECTOR (val)->contents[i]);
5213                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5214                     }
5215                 }
5216               else              /* INTEGERP (val) */
5217                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5218             }
5219           CODING_ADD_COMPOSITION_END (coding, end - from);
5220         }
5221       start = end;
5222     }
5223   while (start < to
5224          && find_composition (start, to, &start, &end, &prop, obj)
5225          && end <= to);
5226
5227   /* Make coding->cmp_data point to the first memory block.  */
5228   while (coding->cmp_data->prev)
5229     coding->cmp_data = coding->cmp_data->prev;
5230   coding->cmp_data_start = 0;
5231 }
5232
5233 /* Reflect the saved information about compositions to OBJ.
5234    CODING->cmp_data points to a memory block for the information.  OBJ
5235    is a buffer or a string, defaults to the current buffer.  */
5236
5237 void
5238 coding_restore_composition (coding, obj)
5239      struct coding_system *coding;
5240      Lisp_Object obj;
5241 {
5242   struct composition_data *cmp_data = coding->cmp_data;
5243
5244   if (!cmp_data)
5245     return;
5246
5247   while (cmp_data->prev)
5248     cmp_data = cmp_data->prev;
5249
5250   while (cmp_data)
5251     {
5252       int i;
5253
5254       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5255            i += cmp_data->data[i])
5256         {
5257           int *data = cmp_data->data + i;
5258           enum composition_method method = (enum composition_method) data[3];
5259           Lisp_Object components;
5260
5261           if (method == COMPOSITION_RELATIVE)
5262             components = Qnil;
5263           else
5264             {
5265               int len = data[0] - 4, j;
5266               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5267
5268               for (j = 0; j < len; j++)
5269                 args[j] = make_number (data[4 + j]);
5270               components = (method == COMPOSITION_WITH_ALTCHARS
5271                             ? Fstring (len, args) : Fvector (len, args));
5272             }
5273           compose_text (data[1], data[2], components, Qnil, obj);
5274         }
5275       cmp_data = cmp_data->next;
5276     }
5277 }
5278
5279 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5280    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5281    coding system CODING, and return the status code of code conversion
5282    (currently, this value has no meaning).
5283
5284    How many characters (and bytes) are converted to how many
5285    characters (and bytes) are recorded in members of the structure
5286    CODING.
5287
5288    If REPLACE is nonzero, we do various things as if the original text
5289    is deleted and a new text is inserted.  See the comments in
5290    replace_range (insdel.c) to know what we are doing.
5291
5292    If REPLACE is zero, it is assumed that the source text is unibyte.
5293    Otherwise, it is assumed that the source text is multibyte.  */
5294
5295 int
5296 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5297      int from, from_byte, to, to_byte, encodep, replace;
5298      struct coding_system *coding;
5299 {
5300   int len = to - from, len_byte = to_byte - from_byte;
5301   int require, inserted, inserted_byte;
5302   int head_skip, tail_skip, total_skip = 0;
5303   Lisp_Object saved_coding_symbol;
5304   int first = 1;
5305   unsigned char *src, *dst;
5306   Lisp_Object deletion;
5307   int orig_point = PT, orig_len = len;
5308   int prev_Z;
5309   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5310
5311   deletion = Qnil;
5312   saved_coding_symbol = coding->symbol;
5313
5314   if (from < PT && PT < to)
5315     {
5316       TEMP_SET_PT_BOTH (from, from_byte);
5317       orig_point = from;
5318     }
5319
5320   if (replace)
5321     {
5322       int saved_from = from;
5323       int saved_inhibit_modification_hooks;
5324
5325       prepare_to_modify_buffer (from, to, &from);
5326       if (saved_from != from)
5327         {
5328           to = from + len;
5329           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5330           len_byte = to_byte - from_byte;
5331         }
5332
5333       /* The code conversion routine can not preserve text properties
5334          for now.  So, we must remove all text properties in the
5335          region.  Here, we must suppress all modification hooks.  */
5336       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5337       inhibit_modification_hooks = 1;
5338       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5339       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5340     }
5341
5342   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5343     {
5344       /* We must detect encoding of text and eol format.  */
5345
5346       if (from < GPT && to > GPT)
5347         move_gap_both (from, from_byte);
5348       if (coding->type == coding_type_undecided)
5349         {
5350           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5351           if (coding->type == coding_type_undecided)
5352             {
5353               /* It seems that the text contains only ASCII, but we
5354                  should not leave it undecided because the deeper
5355                  decoding routine (decode_coding) tries to detect the
5356                  encodings again in vain.  */
5357               coding->type = coding_type_emacs_mule;
5358               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5359               /* As emacs-mule decoder will handle composition, we
5360                  need this setting to allocate coding->cmp_data
5361                  later.  */
5362               coding->composing = COMPOSITION_NO;
5363             }
5364         }
5365       if (coding->eol_type == CODING_EOL_UNDECIDED
5366           && coding->type != coding_type_ccl)
5367         {
5368           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5369           if (coding->eol_type == CODING_EOL_UNDECIDED)
5370             coding->eol_type = CODING_EOL_LF;
5371           /* We had better recover the original eol format if we
5372              encounter an inconsistent eol format while decoding.  */
5373           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5374         }
5375     }
5376
5377   /* Now we convert the text.  */
5378
5379   /* For encoding, we must process pre-write-conversion in advance.  */
5380   if (! inhibit_pre_post_conversion
5381       && encodep
5382       && SYMBOLP (coding->pre_write_conversion)
5383       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5384     {
5385       /* The function in pre-write-conversion may put a new text in a
5386          new buffer.  */
5387       struct buffer *prev = current_buffer;
5388       Lisp_Object new;
5389       int count = specpdl_ptr - specpdl;
5390
5391       record_unwind_protect (code_convert_region_unwind, Qnil);
5392       /* We should not call any more pre-write/post-read-conversion
5393          functions while this pre-write-conversion is running.  */
5394       inhibit_pre_post_conversion = 1;
5395       call2 (coding->pre_write_conversion,
5396              make_number (from), make_number (to));
5397       inhibit_pre_post_conversion = 0;
5398       /* Discard the unwind protect.  */
5399       specpdl_ptr--;
5400
5401       if (current_buffer != prev)
5402         {
5403           len = ZV - BEGV;
5404           new = Fcurrent_buffer ();
5405           set_buffer_internal_1 (prev);
5406           del_range_2 (from, from_byte, to, to_byte, 0);
5407           TEMP_SET_PT_BOTH (from, from_byte);
5408           insert_from_buffer (XBUFFER (new), 1, len, 0);
5409           Fkill_buffer (new);
5410           if (orig_point >= to)
5411             orig_point += len - orig_len;
5412           else if (orig_point > from)
5413             orig_point = from;
5414           orig_len = len;
5415           to = from + len;
5416           from_byte = CHAR_TO_BYTE (from);
5417           to_byte = CHAR_TO_BYTE (to);
5418           len_byte = to_byte - from_byte;
5419           TEMP_SET_PT_BOTH (from, from_byte);
5420         }
5421     }
5422
5423   if (replace)
5424     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5425
5426   if (coding->composing != COMPOSITION_DISABLED)
5427     {
5428       if (encodep)
5429         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5430       else
5431         coding_allocate_composition_data (coding, from);
5432     }
5433
5434   /* Try to skip the heading and tailing ASCIIs.  */
5435   if (coding->type != coding_type_ccl)
5436     {
5437       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5438
5439       if (from < GPT && GPT < to)
5440         move_gap_both (from, from_byte);
5441       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5442       if (from_byte == to_byte
5443           && (encodep || NILP (coding->post_read_conversion))
5444           && ! CODING_REQUIRE_FLUSHING (coding))
5445         {
5446           coding->produced = len_byte;
5447           coding->produced_char = len;
5448           if (!replace)
5449             /* We must record and adjust for this new text now.  */
5450             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5451           return 0;
5452         }
5453
5454       head_skip = from_byte - from_byte_orig;
5455       tail_skip = to_byte_orig - to_byte;
5456       total_skip = head_skip + tail_skip;
5457       from += head_skip;
5458       to -= tail_skip;
5459       len -= total_skip; len_byte -= total_skip;
5460     }
5461
5462   /* For conversion, we must put the gap before the text in addition to
5463      making the gap larger for efficient decoding.  The required gap
5464      size starts from 2000 which is the magic number used in make_gap.
5465      But, after one batch of conversion, it will be incremented if we
5466      find that it is not enough .  */
5467   require = 2000;
5468
5469   if (GAP_SIZE  < require)
5470     make_gap (require - GAP_SIZE);
5471   move_gap_both (from, from_byte);
5472
5473   inserted = inserted_byte = 0;
5474
5475   GAP_SIZE += len_byte;
5476   ZV -= len;
5477   Z -= len;
5478   ZV_BYTE -= len_byte;
5479   Z_BYTE -= len_byte;
5480
5481   if (GPT - BEG < BEG_UNCHANGED)
5482     BEG_UNCHANGED = GPT - BEG;
5483   if (Z - GPT < END_UNCHANGED)
5484     END_UNCHANGED = Z - GPT;
5485
5486   if (!encodep && coding->src_multibyte)
5487     {
5488       /* Decoding routines expects that the source text is unibyte.
5489          We must convert 8-bit characters of multibyte form to
5490          unibyte.  */
5491       int len_byte_orig = len_byte;
5492       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5493       if (len_byte < len_byte_orig)
5494         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5495                     len_byte);
5496       coding->src_multibyte = 0;
5497     }
5498
5499   for (;;)
5500     {
5501       int result;
5502
5503       /* The buffer memory is now:
5504          +--------+converted-text+---------+-------original-text-------+---+
5505          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5506                   |<---------------------- GAP ----------------------->|  */
5507       src = GAP_END_ADDR - len_byte;
5508       dst = GPT_ADDR + inserted_byte;
5509
5510       if (encodep)
5511         result = encode_coding (coding, src, dst, len_byte, 0);
5512       else
5513         result = decode_coding (coding, src, dst, len_byte, 0);
5514
5515       /* The buffer memory is now:
5516          +--------+-------converted-text----+--+------original-text----+---+
5517          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5518                   |<---------------------- GAP ----------------------->|  */
5519
5520       inserted += coding->produced_char;
5521       inserted_byte += coding->produced;
5522       len_byte -= coding->consumed;
5523
5524       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5525         {
5526           coding_allocate_composition_data (coding, from + inserted);
5527           continue;
5528         }
5529
5530       src += coding->consumed;
5531       dst += coding->produced;
5532
5533       if (result == CODING_FINISH_NORMAL)
5534         {
5535           src += len_byte;
5536           break;
5537         }
5538       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5539         {
5540           unsigned char *pend = dst, *p = pend - inserted_byte;
5541           Lisp_Object eol_type;
5542
5543           /* Encode LFs back to the original eol format (CR or CRLF).  */
5544           if (coding->eol_type == CODING_EOL_CR)
5545             {
5546               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5547             }
5548           else
5549             {
5550               int count = 0;
5551
5552               while (p < pend) if (*p++ == '\n') count++;
5553               if (src - dst < count)
5554                 {
5555                   /* We don't have sufficient room for encoding LFs
5556                      back to CRLF.  We must record converted and
5557                      not-yet-converted text back to the buffer
5558                      content, enlarge the gap, then record them out of
5559                      the buffer contents again.  */
5560                   int add = len_byte + inserted_byte;
5561
5562                   GAP_SIZE -= add;
5563                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5564                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5565                   make_gap (count - GAP_SIZE);
5566                   GAP_SIZE += add;
5567                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5568                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5569                   /* Don't forget to update SRC, DST, and PEND.  */
5570                   src = GAP_END_ADDR - len_byte;
5571                   dst = GPT_ADDR + inserted_byte;
5572                   pend = dst;
5573                 }
5574               inserted += count;
5575               inserted_byte += count;
5576               coding->produced += count;
5577               p = dst = pend + count;
5578               while (count)
5579                 {
5580                   *--p = *--pend;
5581                   if (*p == '\n') count--, *--p = '\r';
5582                 }
5583             }
5584
5585           /* Suppress eol-format conversion in the further conversion.  */
5586           coding->eol_type = CODING_EOL_LF;
5587
5588           /* Set the coding system symbol to that for Unix-like EOL.  */
5589           eol_type = Fget (saved_coding_symbol, Qeol_type);
5590           if (VECTORP (eol_type)
5591               && XVECTOR (eol_type)->size == 3
5592               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5593             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5594           else
5595             coding->symbol = saved_coding_symbol;
5596
5597           continue;
5598         }
5599       if (len_byte <= 0)
5600         {
5601           if (coding->type != coding_type_ccl
5602               || coding->mode & CODING_MODE_LAST_BLOCK)
5603             break;
5604           coding->mode |= CODING_MODE_LAST_BLOCK;
5605           continue;
5606         }
5607       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5608         {
5609           /* The source text ends in invalid codes.  Let's just
5610              make them valid buffer contents, and finish conversion.  */
5611           inserted += len_byte;
5612           inserted_byte += len_byte;
5613           while (len_byte--)
5614             *dst++ = *src++;
5615           break;
5616         }
5617       if (result == CODING_FINISH_INTERRUPT)
5618         {
5619           /* The conversion procedure was interrupted by a user.  */
5620           break;
5621         }
5622       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5623       if (coding->consumed < 1)
5624         {
5625           /* It's quite strange to require more memory without
5626              consuming any bytes.  Perhaps CCL program bug.  */
5627           break;
5628         }
5629       if (first)
5630         {
5631           /* We have just done the first batch of conversion which was
5632              stopped because of insufficient gap.  Let's reconsider the
5633              required gap size (i.e. SRT - DST) now.
5634
5635              We have converted ORIG bytes (== coding->consumed) into
5636              NEW bytes (coding->produced).  To convert the remaining
5637              LEN bytes, we may need REQUIRE bytes of gap, where:
5638                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5639                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5640              Here, we are sure that NEW >= ORIG.  */
5641           float ratio = coding->produced - coding->consumed;
5642           ratio /= coding->consumed;
5643           require = len_byte * ratio;
5644           first = 0;
5645         }
5646       if ((src - dst) < (require + 2000))
5647         {
5648           /* See the comment above the previous call of make_gap.  */
5649           int add = len_byte + inserted_byte;
5650
5651           GAP_SIZE -= add;
5652           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5653           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5654           make_gap (require + 2000);
5655           GAP_SIZE += add;
5656           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5657           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5658         }
5659     }
5660   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5661
5662   if (encodep && coding->dst_multibyte)
5663     {
5664       /* The output is unibyte.  We must convert 8-bit characters to
5665          multibyte form.  */
5666       if (inserted_byte * 2 > GAP_SIZE)
5667         {
5668           GAP_SIZE -= inserted_byte;
5669           ZV += inserted_byte; Z += inserted_byte;
5670           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5671           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5672           make_gap (inserted_byte - GAP_SIZE);
5673           GAP_SIZE += inserted_byte;
5674           ZV -= inserted_byte; Z -= inserted_byte;
5675           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5676           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5677         }
5678       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5679     }
5680
5681   /* If we shrank the conversion area, adjust it now.  */
5682   if (total_skip > 0)
5683     {
5684       if (tail_skip > 0)
5685         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5686       inserted += total_skip; inserted_byte += total_skip;
5687       GAP_SIZE += total_skip;
5688       GPT -= head_skip; GPT_BYTE -= head_skip;
5689       ZV -= total_skip; ZV_BYTE -= total_skip;
5690       Z -= total_skip; Z_BYTE -= total_skip;
5691       from -= head_skip; from_byte -= head_skip;
5692       to += tail_skip; to_byte += tail_skip;
5693     }
5694
5695   prev_Z = Z;
5696   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5697   inserted = Z - prev_Z;
5698
5699   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5700     coding_restore_composition (coding, Fcurrent_buffer ());
5701   coding_free_composition_data (coding);
5702
5703   if (! inhibit_pre_post_conversion
5704       && ! encodep && ! NILP (coding->post_read_conversion))
5705     {
5706       Lisp_Object val;
5707       int count = specpdl_ptr - specpdl;
5708
5709       if (from != PT)
5710         TEMP_SET_PT_BOTH (from, from_byte);
5711       prev_Z = Z;
5712       record_unwind_protect (code_convert_region_unwind, Qnil);
5713       /* We should not call any more pre-write/post-read-conversion
5714          functions while this post-read-conversion is running.  */
5715       inhibit_pre_post_conversion = 1;
5716       val = call1 (coding->post_read_conversion, make_number (inserted));
5717       inhibit_pre_post_conversion = 0;
5718       /* Discard the unwind protect.  */
5719       specpdl_ptr--;
5720       CHECK_NUMBER (val, 0);
5721       inserted += Z - prev_Z;
5722     }
5723
5724   if (orig_point >= from)
5725     {
5726       if (orig_point >= from + orig_len)
5727         orig_point += inserted - orig_len;
5728       else
5729         orig_point = from;
5730       TEMP_SET_PT (orig_point);
5731     }
5732
5733   if (replace)
5734     {
5735       signal_after_change (from, to - from, inserted);
5736       update_compositions (from, from + inserted, CHECK_BORDER);
5737     }
5738
5739   {
5740     coding->consumed = to_byte - from_byte;
5741     coding->consumed_char = to - from;
5742     coding->produced = inserted_byte;
5743     coding->produced_char = inserted;
5744   }
5745
5746   return 0;
5747 }
5748
5749 Lisp_Object
5750 run_pre_post_conversion_on_str (str, coding, encodep)
5751      Lisp_Object str;
5752      struct coding_system *coding;
5753      int encodep;
5754 {
5755   int count = specpdl_ptr - specpdl;
5756   struct gcpro gcpro1;
5757   int multibyte = STRING_MULTIBYTE (str);
5758
5759   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5760   record_unwind_protect (code_convert_region_unwind, Qnil);
5761   GCPRO1 (str);
5762   temp_output_buffer_setup (" *code-converting-work*");
5763   set_buffer_internal (XBUFFER (Vstandard_output));
5764   /* We must insert the contents of STR as is without
5765      unibyte<->multibyte conversion.  For that, we adjust the
5766      multibyteness of the working buffer to that of STR.  */
5767   Ferase_buffer ();
5768   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5769   insert_from_string (str, 0, 0,
5770                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5771   UNGCPRO;
5772   inhibit_pre_post_conversion = 1;
5773   if (encodep)
5774     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5775   else
5776     {
5777       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5778       call1 (coding->post_read_conversion, make_number (Z - BEG));
5779     }
5780   inhibit_pre_post_conversion = 0;
5781   str = make_buffer_string (BEG, Z, 1);
5782   return unbind_to (count, str);
5783 }
5784
5785 Lisp_Object
5786 decode_coding_string (str, coding, nocopy)
5787      Lisp_Object str;
5788      struct coding_system *coding;
5789      int nocopy;
5790 {
5791   int len;
5792   struct conversion_buffer buf;
5793   int from, to_byte;
5794   struct gcpro gcpro1;
5795   Lisp_Object saved_coding_symbol;
5796   int result;
5797   int require_decoding;
5798   int shrinked_bytes = 0;
5799   Lisp_Object newstr;
5800   int consumed, consumed_char, produced, produced_char;
5801
5802   from = 0;
5803   to_byte = STRING_BYTES (XSTRING (str));
5804
5805   saved_coding_symbol = coding->symbol;
5806   coding->src_multibyte = STRING_MULTIBYTE (str);
5807   coding->dst_multibyte = 1;
5808   if (CODING_REQUIRE_DETECTION (coding))
5809     {
5810       /* See the comments in code_convert_region.  */
5811       if (coding->type == coding_type_undecided)
5812         {
5813           detect_coding (coding, XSTRING (str)->data, to_byte);
5814           if (coding->type == coding_type_undecided)
5815             {
5816               coding->type = coding_type_emacs_mule;
5817               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5818               /* As emacs-mule decoder will handle composition, we
5819                  need this setting to allocate coding->cmp_data
5820                  later.  */
5821               coding->composing = COMPOSITION_NO;
5822             }
5823         }
5824       if (coding->eol_type == CODING_EOL_UNDECIDED
5825           && coding->type != coding_type_ccl)
5826         {
5827           saved_coding_symbol = coding->symbol;
5828           detect_eol (coding, XSTRING (str)->data, to_byte);
5829           if (coding->eol_type == CODING_EOL_UNDECIDED)
5830             coding->eol_type = CODING_EOL_LF;
5831           /* We had better recover the original eol format if we
5832              encounter an inconsistent eol format while decoding.  */
5833           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5834         }
5835     }
5836
5837   if (coding->type == coding_type_no_conversion
5838       || coding->type == coding_type_raw_text)
5839     coding->dst_multibyte = 0;
5840
5841   require_decoding = CODING_REQUIRE_DECODING (coding);
5842
5843   if (STRING_MULTIBYTE (str))
5844     {
5845       /* Decoding routines expect the source text to be unibyte.  */
5846       str = Fstring_as_unibyte (str);
5847       to_byte = STRING_BYTES (XSTRING (str));
5848       nocopy = 1;
5849       coding->src_multibyte = 0;
5850     }
5851
5852   /* Try to skip the heading and tailing ASCIIs.  */
5853   if (require_decoding && coding->type != coding_type_ccl)
5854     {
5855       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5856                                 0);
5857       if (from == to_byte)
5858         require_decoding = 0;
5859       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5860     }
5861
5862   if (!require_decoding)
5863     {
5864       coding->consumed = STRING_BYTES (XSTRING (str));
5865       coding->consumed_char = XSTRING (str)->size;
5866       if (coding->dst_multibyte)
5867         {
5868           str = Fstring_as_multibyte (str);
5869           nocopy = 1;
5870         }
5871       coding->produced = STRING_BYTES (XSTRING (str));
5872       coding->produced_char = XSTRING (str)->size;
5873       return (nocopy ? str : Fcopy_sequence (str));
5874     }
5875
5876   if (coding->composing != COMPOSITION_DISABLED)
5877     coding_allocate_composition_data (coding, from);
5878   len = decoding_buffer_size (coding, to_byte - from);
5879   allocate_conversion_buffer (buf, len);
5880
5881   consumed = consumed_char = produced = produced_char = 0;
5882   while (1)
5883     {
5884       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5885                               buf.data + produced, to_byte - from - consumed,
5886                               buf.size - produced);
5887       consumed += coding->consumed;
5888       consumed_char += coding->consumed_char;
5889       produced += coding->produced;
5890       produced_char += coding->produced_char;
5891       if (result == CODING_FINISH_NORMAL
5892           || (result == CODING_FINISH_INSUFFICIENT_SRC
5893               && coding->consumed == 0))
5894         break;
5895       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5896         coding_allocate_composition_data (coding, from + produced_char);
5897       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5898         extend_conversion_buffer (&buf);
5899       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5900         {
5901           Lisp_Object eol_type;
5902
5903           /* Recover the original EOL format.  */
5904           if (coding->eol_type == CODING_EOL_CR)
5905             {
5906               unsigned char *p;
5907               for (p = buf.data; p < buf.data + produced; p++)
5908                 if (*p == '\n') *p = '\r';
5909             }
5910           else if (coding->eol_type == CODING_EOL_CRLF)
5911             {
5912               int num_eol = 0;
5913               unsigned char *p0, *p1;
5914               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5915                 if (*p0 == '\n') num_eol++;
5916               if (produced + num_eol >= buf.size)
5917                 extend_conversion_buffer (&buf);
5918               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5919                 {
5920                   *--p1 = *--p0;
5921                   if (*p0 == '\n') *--p1 = '\r';
5922                 }
5923               produced += num_eol;
5924               produced_char += num_eol;
5925             }
5926           /* Suppress eol-format conversion in the further conversion.  */
5927           coding->eol_type = CODING_EOL_LF;
5928
5929           /* Set the coding system symbol to that for Unix-like EOL.  */
5930           eol_type = Fget (saved_coding_symbol, Qeol_type);
5931           if (VECTORP (eol_type)
5932               && XVECTOR (eol_type)->size == 3
5933               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5934             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5935           else
5936             coding->symbol = saved_coding_symbol;
5937
5938
5939         }
5940     }
5941
5942   coding->consumed = consumed;
5943   coding->consumed_char = consumed_char;
5944   coding->produced = produced;
5945   coding->produced_char = produced_char;
5946
5947   if (coding->dst_multibyte)
5948     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5949                                            produced + shrinked_bytes);
5950   else
5951     newstr = make_uninit_string (produced + shrinked_bytes);
5952   if (from > 0)
5953     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5954   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5955   if (shrinked_bytes > from)
5956     bcopy (XSTRING (str)->data + to_byte,
5957            XSTRING (newstr)->data + from + produced,
5958            shrinked_bytes - from);
5959   free_conversion_buffer (&buf);
5960
5961   if (coding->cmp_data && coding->cmp_data->used)
5962     coding_restore_composition (coding, newstr);
5963   coding_free_composition_data (coding);
5964
5965   if (SYMBOLP (coding->post_read_conversion)
5966       && !NILP (Ffboundp (coding->post_read_conversion)))
5967     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5968
5969   return newstr;
5970 }
5971
5972 Lisp_Object
5973 encode_coding_string (str, coding, nocopy)
5974      Lisp_Object str;
5975      struct coding_system *coding;
5976      int nocopy;
5977 {
5978   int len;
5979   struct conversion_buffer buf;
5980   int from, to, to_byte;
5981   int result;
5982   int shrinked_bytes = 0;
5983   Lisp_Object newstr;
5984   int consumed, consumed_char, produced, produced_char;
5985
5986   if (SYMBOLP (coding->pre_write_conversion)
5987       && !NILP (Ffboundp (coding->pre_write_conversion)))
5988     str = run_pre_post_conversion_on_str (str, coding, 1);
5989
5990   from = 0;
5991   to = XSTRING (str)->size;
5992   to_byte = STRING_BYTES (XSTRING (str));
5993
5994   /* Encoding routines determine the multibyteness of the source text
5995      by coding->src_multibyte.  */
5996   coding->src_multibyte = STRING_MULTIBYTE (str);
5997   coding->dst_multibyte = 0;
5998   if (! CODING_REQUIRE_ENCODING (coding))
5999     {
6000       coding->consumed = STRING_BYTES (XSTRING (str));
6001       coding->consumed_char = XSTRING (str)->size;
6002       if (STRING_MULTIBYTE (str))
6003         {
6004           str = Fstring_as_unibyte (str);
6005           nocopy = 1;
6006         }
6007       coding->produced = STRING_BYTES (XSTRING (str));
6008       coding->produced_char = XSTRING (str)->size;
6009       return (nocopy ? str : Fcopy_sequence (str));
6010     }
6011
6012   if (coding->composing != COMPOSITION_DISABLED)
6013     coding_save_composition (coding, from, to, str);
6014
6015   /* Try to skip the heading and tailing ASCIIs.  */
6016   if (coding->type != coding_type_ccl)
6017     {
6018       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
6019                                 1);
6020       if (from == to_byte)
6021         return (nocopy ? str : Fcopy_sequence (str));
6022       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
6023     }
6024
6025   len = encoding_buffer_size (coding, to_byte - from);
6026   allocate_conversion_buffer (buf, len);
6027
6028   consumed = consumed_char = produced = produced_char = 0;
6029   while (1)
6030     {
6031       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
6032                               buf.data + produced, to_byte - from - consumed,
6033                               buf.size - produced);
6034       consumed += coding->consumed;
6035       consumed_char += coding->consumed_char;
6036       produced += coding->produced;
6037       produced_char += coding->produced_char;
6038       if (result == CODING_FINISH_NORMAL
6039           || (result == CODING_FINISH_INSUFFICIENT_SRC
6040               && coding->consumed == 0))
6041         break;
6042       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6043       extend_conversion_buffer (&buf);
6044     }
6045
6046   coding->consumed = consumed;
6047   coding->consumed_char = consumed_char;
6048   coding->produced = produced;
6049   coding->produced_char = produced_char;
6050
6051   newstr = make_uninit_string (produced + shrinked_bytes);
6052   if (from > 0)
6053     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
6054   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
6055   if (shrinked_bytes > from)
6056     bcopy (XSTRING (str)->data + to_byte,
6057            XSTRING (newstr)->data + from + produced,
6058            shrinked_bytes - from);
6059
6060   free_conversion_buffer (&buf);
6061   coding_free_composition_data (coding);
6062
6063   return newstr;
6064 }
6065
6066 \f
6067 #ifdef emacs
6068 /*** 8. Emacs Lisp library functions ***/
6069
6070 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6071   "Return t if OBJECT is nil or a coding-system.\n\
6072 See the documentation of `make-coding-system' for information\n\
6073 about coding-system objects.")
6074   (obj)
6075      Lisp_Object obj;
6076 {
6077   if (NILP (obj))
6078     return Qt;
6079   if (!SYMBOLP (obj))
6080     return Qnil;
6081   /* Get coding-spec vector for OBJ.  */
6082   obj = Fget (obj, Qcoding_system);
6083   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6084           ? Qt : Qnil);
6085 }
6086
6087 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6088        Sread_non_nil_coding_system, 1, 1, 0,
6089   "Read a coding system from the minibuffer, prompting with string PROMPT.")
6090   (prompt)
6091      Lisp_Object prompt;
6092 {
6093   Lisp_Object val;
6094   do
6095     {
6096       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6097                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6098     }
6099   while (XSTRING (val)->size == 0);
6100   return (Fintern (val, Qnil));
6101 }
6102
6103 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6104   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
6105 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
6106   (prompt, default_coding_system)
6107      Lisp_Object prompt, default_coding_system;
6108 {
6109   Lisp_Object val;
6110   if (SYMBOLP (default_coding_system))
6111     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6112   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6113                           Qt, Qnil, Qcoding_system_history,
6114                           default_coding_system, Qnil);
6115   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6116 }
6117
6118 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6119        1, 1, 0,
6120   "Check validity of CODING-SYSTEM.\n\
6121 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
6122 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
6123 The value of property should be a vector of length 5.")
6124   (coding_system)
6125      Lisp_Object coding_system;
6126 {
6127   CHECK_SYMBOL (coding_system, 0);
6128   if (!NILP (Fcoding_system_p (coding_system)))
6129     return coding_system;
6130   while (1)
6131     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6132 }
6133 \f
6134 Lisp_Object
6135 detect_coding_system (src, src_bytes, highest, multibytep)
6136      unsigned char *src;
6137      int src_bytes, highest;
6138      int multibytep;
6139 {
6140   int coding_mask, eol_type;
6141   Lisp_Object val, tmp;
6142   int dummy;
6143
6144   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6145   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6146   if (eol_type == CODING_EOL_INCONSISTENT)
6147     eol_type = CODING_EOL_UNDECIDED;
6148
6149   if (!coding_mask)
6150     {
6151       val = Qundecided;
6152       if (eol_type != CODING_EOL_UNDECIDED)
6153         {
6154           Lisp_Object val2;
6155           val2 = Fget (Qundecided, Qeol_type);
6156           if (VECTORP (val2))
6157             val = XVECTOR (val2)->contents[eol_type];
6158         }
6159       return (highest ? val : Fcons (val, Qnil));
6160     }
6161
6162   /* At first, gather possible coding systems in VAL.  */
6163   val = Qnil;
6164   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6165     {
6166       Lisp_Object category_val, category_index;
6167
6168       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6169       category_val = Fsymbol_value (XCAR (tmp));
6170       if (!NILP (category_val)
6171           && NATNUMP (category_index)
6172           && (coding_mask & (1 << XFASTINT (category_index))))
6173         {
6174           val = Fcons (category_val, val);
6175           if (highest)
6176             break;
6177         }
6178     }
6179   if (!highest)
6180     val = Fnreverse (val);
6181
6182   /* Then, replace the elements with subsidiary coding systems.  */
6183   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6184     {
6185       if (eol_type != CODING_EOL_UNDECIDED
6186           && eol_type != CODING_EOL_INCONSISTENT)
6187         {
6188           Lisp_Object eol;
6189           eol = Fget (XCAR (tmp), Qeol_type);
6190           if (VECTORP (eol))
6191             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
6192         }
6193     }
6194   return (highest ? XCAR (val) : val);
6195 }
6196
6197 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6198        2, 3, 0,
6199   "Detect coding system of the text in the region between START and END.\n\
6200 Return a list of possible coding systems ordered by priority.\n\
6201 \n\
6202 If only ASCII characters are found, it returns a list of single element\n\
6203 `undecided' or its subsidiary coding system according to a detected\n\
6204 end-of-line format.\n\
6205 \n\
6206 If optional argument HIGHEST is non-nil, return the coding system of\n\
6207 highest priority.")
6208   (start, end, highest)
6209      Lisp_Object start, end, highest;
6210 {
6211   int from, to;
6212   int from_byte, to_byte;
6213
6214   CHECK_NUMBER_COERCE_MARKER (start, 0);
6215   CHECK_NUMBER_COERCE_MARKER (end, 1);
6216
6217   validate_region (&start, &end);
6218   from = XINT (start), to = XINT (end);
6219   from_byte = CHAR_TO_BYTE (from);
6220   to_byte = CHAR_TO_BYTE (to);
6221
6222   if (from < GPT && to >= GPT)
6223     move_gap_both (to, to_byte);
6224
6225   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6226                                to_byte - from_byte,
6227                                !NILP (highest),
6228                                !NILP (current_buffer
6229                                       ->enable_multibyte_characters));
6230 }
6231
6232 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6233        1, 2, 0,
6234   "Detect coding system of the text in STRING.\n\
6235 Return a list of possible coding systems ordered by priority.\n\
6236 \n\
6237 If only ASCII characters are found, it returns a list of single element\n\
6238 `undecided' or its subsidiary coding system according to a detected\n\
6239 end-of-line format.\n\
6240 \n\
6241 If optional argument HIGHEST is non-nil, return the coding system of\n\
6242 highest priority.")
6243   (string, highest)
6244      Lisp_Object string, highest;
6245 {
6246   CHECK_STRING (string, 0);
6247
6248   return detect_coding_system (XSTRING (string)->data,
6249                                STRING_BYTES (XSTRING (string)),
6250                                !NILP (highest),
6251                                STRING_MULTIBYTE (string));
6252 }
6253
6254 /* Return an intersection of lists L1 and L2.  */
6255
6256 static Lisp_Object
6257 intersection (l1, l2)
6258      Lisp_Object l1, l2;
6259 {
6260   Lisp_Object val;
6261
6262   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
6263     {
6264       if (!NILP (Fmemq (XCAR (l1), l2)))
6265         val = Fcons (XCAR (l1), val);
6266     }
6267   return val;
6268 }
6269
6270
6271 /*  Subroutine for Fsafe_coding_systems_region_internal.
6272
6273     Return a list of coding systems that safely encode the multibyte
6274     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
6275     possible coding systems.  If it is nil, it means that we have not
6276     yet found any coding systems.
6277
6278     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6279     element of WORK_TABLE is set to t once the element is looked up.
6280
6281     If a non-ASCII single byte char is found, set
6282     *single_byte_char_found to 1.  */
6283
6284 static Lisp_Object
6285 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6286      unsigned char *p, *pend;
6287      Lisp_Object safe_codings, work_table;
6288      int *single_byte_char_found;
6289 {
6290   int c, len, idx;
6291   Lisp_Object val;
6292
6293   while (p < pend)
6294     {
6295       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6296       p += len;
6297       if (ASCII_BYTE_P (c))
6298         /* We can ignore ASCII characters here.  */
6299         continue;
6300       if (SINGLE_BYTE_CHAR_P (c))
6301         *single_byte_char_found = 1;
6302       if (NILP (safe_codings))
6303         continue;
6304       /* Check the safe coding systems for C.  */
6305       val = char_table_ref_and_index (work_table, c, &idx);
6306       if (EQ (val, Qt))
6307         /* This element was already checked.  Ignore it.  */
6308         continue;
6309       /* Remember that we checked this element.  */
6310       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6311
6312       /* If there are some safe coding systems for C and we have
6313          already found the other set of coding systems for the
6314          different characters, get the intersection of them.  */
6315       if (!EQ (safe_codings, Qt) && !NILP (val))
6316         val = intersection (safe_codings, val);
6317       safe_codings = val;
6318     }
6319   return safe_codings;
6320 }
6321
6322
6323 /* Return a list of coding systems that safely encode the text between
6324    START and END.  If the text contains only ASCII or is unibyte,
6325    return t.  */
6326
6327 DEFUN ("find-coding-systems-region-internal",
6328        Ffind_coding_systems_region_internal,
6329        Sfind_coding_systems_region_internal, 2, 2, 0,
6330   "Internal use only.")
6331   (start, end)
6332      Lisp_Object start, end;
6333 {
6334   Lisp_Object work_table, safe_codings;
6335   int non_ascii_p = 0;
6336   int single_byte_char_found = 0;
6337   unsigned char *p1, *p1end, *p2, *p2end, *p;
6338
6339   if (STRINGP (start))
6340     {
6341       if (!STRING_MULTIBYTE (start))
6342         return Qt;
6343       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
6344       p2 = p2end = p1end;
6345       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6346         non_ascii_p = 1;
6347     }
6348   else
6349     {
6350       int from, to, stop;
6351
6352       CHECK_NUMBER_COERCE_MARKER (start, 0);
6353       CHECK_NUMBER_COERCE_MARKER (end, 1);
6354       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6355         args_out_of_range (start, end);
6356       if (NILP (current_buffer->enable_multibyte_characters))
6357         return Qt;
6358       from = CHAR_TO_BYTE (XINT (start));
6359       to = CHAR_TO_BYTE (XINT (end));
6360       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6361       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6362       if (stop == to)
6363         p2 = p2end = p1end;
6364       else
6365         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6366       if (XINT (end) - XINT (start) != to - from)
6367         non_ascii_p = 1;
6368     }
6369
6370   if (!non_ascii_p)
6371     {
6372       /* We are sure that the text contains no multibyte character.
6373          Check if it contains eight-bit-graphic.  */
6374       p = p1;
6375       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6376       if (p == p1end)
6377         {
6378           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6379           if (p == p2end)
6380             return Qt;
6381         }
6382     }
6383
6384   /* The text contains non-ASCII characters.  */
6385   work_table = Fcopy_sequence (Vchar_coding_system_table);
6386   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6387                                     &single_byte_char_found);
6388   if (p2 < p2end)
6389     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6390                                       &single_byte_char_found);
6391
6392   if (!single_byte_char_found)
6393     {
6394       /* Append generic coding systems.  */
6395       Lisp_Object args[2];
6396       args[0] = safe_codings;
6397       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6398                                         make_number (0));
6399       safe_codings = Fappend (2, args);
6400     }
6401   else
6402     safe_codings = Fcons (Qraw_text,
6403                           Fcons (Qemacs_mule,
6404                                  Fcons (Qno_conversion, safe_codings)));
6405   return safe_codings;
6406 }
6407
6408
6409 Lisp_Object
6410 code_convert_region1 (start, end, coding_system, encodep)
6411      Lisp_Object start, end, coding_system;
6412      int encodep;
6413 {
6414   struct coding_system coding;
6415   int from, to;
6416
6417   CHECK_NUMBER_COERCE_MARKER (start, 0);
6418   CHECK_NUMBER_COERCE_MARKER (end, 1);
6419   CHECK_SYMBOL (coding_system, 2);
6420
6421   validate_region (&start, &end);
6422   from = XFASTINT (start);
6423   to = XFASTINT (end);
6424
6425   if (NILP (coding_system))
6426     return make_number (to - from);
6427
6428   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6429     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6430
6431   coding.mode |= CODING_MODE_LAST_BLOCK;
6432   coding.src_multibyte = coding.dst_multibyte
6433     = !NILP (current_buffer->enable_multibyte_characters);
6434   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6435                        &coding, encodep, 1);
6436   Vlast_coding_system_used = coding.symbol;
6437   return make_number (coding.produced_char);
6438 }
6439
6440 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6441        3, 3, "r\nzCoding system: ",
6442   "Decode the current region from the specified coding system.\n\
6443 When called from a program, takes three arguments:\n\
6444 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6445 This function sets `last-coding-system-used' to the precise coding system\n\
6446 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6447 not fully specified.)\n\
6448 It returns the length of the decoded text.")
6449   (start, end, coding_system)
6450      Lisp_Object start, end, coding_system;
6451 {
6452   return code_convert_region1 (start, end, coding_system, 0);
6453 }
6454
6455 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6456        3, 3, "r\nzCoding system: ",
6457   "Encode the current region into the specified coding system.\n\
6458 When called from a program, takes three arguments:\n\
6459 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6460 This function sets `last-coding-system-used' to the precise coding system\n\
6461 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6462 not fully specified.)\n\
6463 It returns the length of the encoded text.")
6464   (start, end, coding_system)
6465      Lisp_Object start, end, coding_system;
6466 {
6467   return code_convert_region1 (start, end, coding_system, 1);
6468 }
6469
6470 Lisp_Object
6471 code_convert_string1 (string, coding_system, nocopy, encodep)
6472      Lisp_Object string, coding_system, nocopy;
6473      int encodep;
6474 {
6475   struct coding_system coding;
6476
6477   CHECK_STRING (string, 0);
6478   CHECK_SYMBOL (coding_system, 1);
6479
6480   if (NILP (coding_system))
6481     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6482
6483   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6484     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6485
6486   coding.mode |= CODING_MODE_LAST_BLOCK;
6487   string = (encodep
6488             ? encode_coding_string (string, &coding, !NILP (nocopy))
6489             : decode_coding_string (string, &coding, !NILP (nocopy)));
6490   Vlast_coding_system_used = coding.symbol;
6491
6492   return string;
6493 }
6494
6495 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6496        2, 3, 0,
6497   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
6498 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6499 if the decoding operation is trivial.\n\
6500 This function sets `last-coding-system-used' to the precise coding system\n\
6501 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6502 not fully specified.)")
6503   (string, coding_system, nocopy)
6504      Lisp_Object string, coding_system, nocopy;
6505 {
6506   return code_convert_string1 (string, coding_system, nocopy, 0);
6507 }
6508
6509 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6510        2, 3, 0,
6511   "Encode STRING to CODING-SYSTEM, and return the result.\n\
6512 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6513 if the encoding operation is trivial.\n\
6514 This function sets `last-coding-system-used' to the precise coding system\n\
6515 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6516 not fully specified.)")
6517   (string, coding_system, nocopy)
6518      Lisp_Object string, coding_system, nocopy;
6519 {
6520   return code_convert_string1 (string, coding_system, nocopy, 1);
6521 }
6522
6523 /* Encode or decode STRING according to CODING_SYSTEM.
6524    Do not set Vlast_coding_system_used.
6525
6526    This function is called only from macros DECODE_FILE and
6527    ENCODE_FILE, thus we ignore character composition.  */
6528
6529 Lisp_Object
6530 code_convert_string_norecord (string, coding_system, encodep)
6531      Lisp_Object string, coding_system;
6532      int encodep;
6533 {
6534   struct coding_system coding;
6535
6536   CHECK_STRING (string, 0);
6537   CHECK_SYMBOL (coding_system, 1);
6538
6539   if (NILP (coding_system))
6540     return string;
6541
6542   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6543     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6544
6545   coding.composing = COMPOSITION_DISABLED;
6546   coding.mode |= CODING_MODE_LAST_BLOCK;
6547   return (encodep
6548           ? encode_coding_string (string, &coding, 1)
6549           : decode_coding_string (string, &coding, 1));
6550 }
6551 \f
6552 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6553   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6554 Return the corresponding character.")
6555   (code)
6556      Lisp_Object code;
6557 {
6558   unsigned char c1, c2, s1, s2;
6559   Lisp_Object val;
6560
6561   CHECK_NUMBER (code, 0);
6562   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6563   if (s1 == 0)
6564     {
6565       if (s2 < 0x80)
6566         XSETFASTINT (val, s2);
6567       else if (s2 >= 0xA0 || s2 <= 0xDF)
6568         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6569       else
6570         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6571     }
6572   else
6573     {
6574       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6575           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6576         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6577       DECODE_SJIS (s1, s2, c1, c2);
6578       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6579     }
6580   return val;
6581 }
6582
6583 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6584   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6585 Return the corresponding code in SJIS.")
6586   (ch)
6587      Lisp_Object ch;
6588 {
6589   int charset, c1, c2, s1, s2;
6590   Lisp_Object val;
6591
6592   CHECK_NUMBER (ch, 0);
6593   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6594   if (charset == CHARSET_ASCII)
6595     {
6596       val = ch;
6597     }
6598   else if (charset == charset_jisx0208
6599            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6600     {
6601       ENCODE_SJIS (c1, c2, s1, s2);
6602       XSETFASTINT (val, (s1 << 8) | s2);
6603     }
6604   else if (charset == charset_katakana_jisx0201
6605            && c1 > 0x20 && c2 < 0xE0)
6606     {
6607       XSETFASTINT (val, c1 | 0x80);
6608     }
6609   else
6610     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6611   return val;
6612 }
6613
6614 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6615   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6616 Return the corresponding character.")
6617   (code)
6618      Lisp_Object code;
6619 {
6620   int charset;
6621   unsigned char b1, b2, c1, c2;
6622   Lisp_Object val;
6623
6624   CHECK_NUMBER (code, 0);
6625   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6626   if (b1 == 0)
6627     {
6628       if (b2 >= 0x80)
6629         error ("Invalid BIG5 code: %x", XFASTINT (code));
6630       val = code;
6631     }
6632   else
6633     {
6634       if ((b1 < 0xA1 || b1 > 0xFE)
6635           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6636         error ("Invalid BIG5 code: %x", XFASTINT (code));
6637       DECODE_BIG5 (b1, b2, charset, c1, c2);
6638       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6639     }
6640   return val;
6641 }
6642
6643 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6644   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6645 Return the corresponding character code in Big5.")
6646   (ch)
6647      Lisp_Object ch;
6648 {
6649   int charset, c1, c2, b1, b2;
6650   Lisp_Object val;
6651
6652   CHECK_NUMBER (ch, 0);
6653   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6654   if (charset == CHARSET_ASCII)
6655     {
6656       val = ch;
6657     }
6658   else if ((charset == charset_big5_1
6659             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6660            || (charset == charset_big5_2
6661                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6662     {
6663       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6664       XSETFASTINT (val, (b1 << 8) | b2);
6665     }
6666   else
6667     error ("Can't encode to Big5: %d", XFASTINT (ch));
6668   return val;
6669 }
6670 \f
6671 DEFUN ("set-terminal-coding-system-internal",
6672        Fset_terminal_coding_system_internal,
6673        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6674   (coding_system)
6675      Lisp_Object coding_system;
6676 {
6677   CHECK_SYMBOL (coding_system, 0);
6678   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6679   /* We had better not send unsafe characters to terminal.  */
6680   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6681   /* Character composition should be disabled.  */
6682   terminal_coding.composing = COMPOSITION_DISABLED;
6683   /* Error notification should be suppressed.  */
6684   terminal_coding.suppress_error = 1;
6685   terminal_coding.src_multibyte = 1;
6686   terminal_coding.dst_multibyte = 0;
6687   return Qnil;
6688 }
6689
6690 DEFUN ("set-safe-terminal-coding-system-internal",
6691        Fset_safe_terminal_coding_system_internal,
6692        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6693   (coding_system)
6694      Lisp_Object coding_system;
6695 {
6696   CHECK_SYMBOL (coding_system, 0);
6697   setup_coding_system (Fcheck_coding_system (coding_system),
6698                        &safe_terminal_coding);
6699   /* Character composition should be disabled.  */
6700   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6701   /* Error notification should be suppressed.  */
6702   terminal_coding.suppress_error = 1;
6703   safe_terminal_coding.src_multibyte = 1;
6704   safe_terminal_coding.dst_multibyte = 0;
6705   return Qnil;
6706 }
6707
6708 DEFUN ("terminal-coding-system",
6709        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6710   "Return coding system specified for terminal output.")
6711   ()
6712 {
6713   return terminal_coding.symbol;
6714 }
6715
6716 DEFUN ("set-keyboard-coding-system-internal",
6717        Fset_keyboard_coding_system_internal,
6718        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6719   (coding_system)
6720      Lisp_Object coding_system;
6721 {
6722   CHECK_SYMBOL (coding_system, 0);
6723   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6724   /* Character composition should be disabled.  */
6725   keyboard_coding.composing = COMPOSITION_DISABLED;
6726   return Qnil;
6727 }
6728
6729 DEFUN ("keyboard-coding-system",
6730        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6731   "Return coding system specified for decoding keyboard input.")
6732   ()
6733 {
6734   return keyboard_coding.symbol;
6735 }
6736
6737 \f
6738 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6739        Sfind_operation_coding_system,  1, MANY, 0,
6740   "Choose a coding system for an operation based on the target name.\n\
6741 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6742 DECODING-SYSTEM is the coding system to use for decoding\n\
6743 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6744 for encoding (in case OPERATION does encoding).\n\
6745 \n\
6746 The first argument OPERATION specifies an I/O primitive:\n\
6747   For file I/O, `insert-file-contents' or `write-region'.\n\
6748   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6749   For network I/O, `open-network-stream'.\n\
6750 \n\
6751 The remaining arguments should be the same arguments that were passed\n\
6752 to the primitive.  Depending on which primitive, one of those arguments\n\
6753 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6754 whichever argument specifies the file name is TARGET.\n\
6755 \n\
6756 TARGET has a meaning which depends on OPERATION:\n\
6757   For file I/O, TARGET is a file name.\n\
6758   For process I/O, TARGET is a process name.\n\
6759   For network I/O, TARGET is a service name or a port number\n\
6760 \n\
6761 This function looks up what specified for TARGET in,\n\
6762 `file-coding-system-alist', `process-coding-system-alist',\n\
6763 or `network-coding-system-alist' depending on OPERATION.\n\
6764 They may specify a coding system, a cons of coding systems,\n\
6765 or a function symbol to call.\n\
6766 In the last case, we call the function with one argument,\n\
6767 which is a list of all the arguments given to this function.")
6768   (nargs, args)
6769      int nargs;
6770      Lisp_Object *args;
6771 {
6772   Lisp_Object operation, target_idx, target, val;
6773   register Lisp_Object chain;
6774
6775   if (nargs < 2)
6776     error ("Too few arguments");
6777   operation = args[0];
6778   if (!SYMBOLP (operation)
6779       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6780     error ("Invalid first argument");
6781   if (nargs < 1 + XINT (target_idx))
6782     error ("Too few arguments for operation: %s",
6783            XSYMBOL (operation)->name->data);
6784   target = args[XINT (target_idx) + 1];
6785   if (!(STRINGP (target)
6786         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6787     error ("Invalid argument %d", XINT (target_idx) + 1);
6788
6789   chain = ((EQ (operation, Qinsert_file_contents)
6790             || EQ (operation, Qwrite_region))
6791            ? Vfile_coding_system_alist
6792            : (EQ (operation, Qopen_network_stream)
6793               ? Vnetwork_coding_system_alist
6794               : Vprocess_coding_system_alist));
6795   if (NILP (chain))
6796     return Qnil;
6797
6798   for (; CONSP (chain); chain = XCDR (chain))
6799     {
6800       Lisp_Object elt;
6801       elt = XCAR (chain);
6802
6803       if (CONSP (elt)
6804           && ((STRINGP (target)
6805                && STRINGP (XCAR (elt))
6806                && fast_string_match (XCAR (elt), target) >= 0)
6807               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6808         {
6809           val = XCDR (elt);
6810           /* Here, if VAL is both a valid coding system and a valid
6811              function symbol, we return VAL as a coding system.  */
6812           if (CONSP (val))
6813             return val;
6814           if (! SYMBOLP (val))
6815             return Qnil;
6816           if (! NILP (Fcoding_system_p (val)))
6817             return Fcons (val, val);
6818           if (! NILP (Ffboundp (val)))
6819             {
6820               val = call1 (val, Flist (nargs, args));
6821               if (CONSP (val))
6822                 return val;
6823               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6824                 return Fcons (val, val);
6825             }
6826           return Qnil;
6827         }
6828     }
6829   return Qnil;
6830 }
6831
6832 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6833        Supdate_coding_systems_internal, 0, 0, 0,
6834   "Update internal database for ISO2022 and CCL based coding systems.\n\
6835 When values of any coding categories are changed, you must\n\
6836 call this function")
6837   ()
6838 {
6839   int i;
6840
6841   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6842     {
6843       Lisp_Object val;
6844
6845       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6846       if (!NILP (val))
6847         {
6848           if (! coding_system_table[i])
6849             coding_system_table[i] = ((struct coding_system *)
6850                                       xmalloc (sizeof (struct coding_system)));
6851           setup_coding_system (val, coding_system_table[i]);
6852         }
6853       else if (coding_system_table[i])
6854         {
6855           xfree (coding_system_table[i]);
6856           coding_system_table[i] = NULL;
6857         }
6858     }
6859
6860   return Qnil;
6861 }
6862
6863 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6864        Sset_coding_priority_internal, 0, 0, 0,
6865   "Update internal database for the current value of `coding-category-list'.\n\
6866 This function is internal use only.")
6867   ()
6868 {
6869   int i = 0, idx;
6870   Lisp_Object val;
6871
6872   val = Vcoding_category_list;
6873
6874   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6875     {
6876       if (! SYMBOLP (XCAR (val)))
6877         break;
6878       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6879       if (idx >= CODING_CATEGORY_IDX_MAX)
6880         break;
6881       coding_priorities[i++] = (1 << idx);
6882       val = XCDR (val);
6883     }
6884   /* If coding-category-list is valid and contains all coding
6885      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6886      the following code saves Emacs from crashing.  */
6887   while (i < CODING_CATEGORY_IDX_MAX)
6888     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6889
6890   return Qnil;
6891 }
6892
6893 #endif /* emacs */
6894
6895 \f
6896 /*** 9. Post-amble ***/
6897
6898 void
6899 init_coding_once ()
6900 {
6901   int i;
6902
6903   /* Emacs' internal format specific initialize routine.  */
6904   for (i = 0; i <= 0x20; i++)
6905     emacs_code_class[i] = EMACS_control_code;
6906   emacs_code_class[0x0A] = EMACS_linefeed_code;
6907   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6908   for (i = 0x21 ; i < 0x7F; i++)
6909     emacs_code_class[i] = EMACS_ascii_code;
6910   emacs_code_class[0x7F] = EMACS_control_code;
6911   for (i = 0x80; i < 0xFF; i++)
6912     emacs_code_class[i] = EMACS_invalid_code;
6913   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6914   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6915   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6916   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6917
6918   /* ISO2022 specific initialize routine.  */
6919   for (i = 0; i < 0x20; i++)
6920     iso_code_class[i] = ISO_control_0;
6921   for (i = 0x21; i < 0x7F; i++)
6922     iso_code_class[i] = ISO_graphic_plane_0;
6923   for (i = 0x80; i < 0xA0; i++)
6924     iso_code_class[i] = ISO_control_1;
6925   for (i = 0xA1; i < 0xFF; i++)
6926     iso_code_class[i] = ISO_graphic_plane_1;
6927   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6928   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6929   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6930   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6931   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6932   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6933   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6934   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6935   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6936   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6937
6938   setup_coding_system (Qnil, &keyboard_coding);
6939   setup_coding_system (Qnil, &terminal_coding);
6940   setup_coding_system (Qnil, &safe_terminal_coding);
6941   setup_coding_system (Qnil, &default_buffer_file_coding);
6942
6943   bzero (coding_system_table, sizeof coding_system_table);
6944
6945   bzero (ascii_skip_code, sizeof ascii_skip_code);
6946   for (i = 0; i < 128; i++)
6947     ascii_skip_code[i] = 1;
6948
6949 #if defined (MSDOS) || defined (WINDOWSNT)
6950   system_eol_type = CODING_EOL_CRLF;
6951 #else
6952   system_eol_type = CODING_EOL_LF;
6953 #endif
6954
6955   inhibit_pre_post_conversion = 0;
6956 }
6957
6958 #ifdef emacs
6959
6960 void
6961 syms_of_coding ()
6962 {
6963   Qtarget_idx = intern ("target-idx");
6964   staticpro (&Qtarget_idx);
6965
6966   Qcoding_system_history = intern ("coding-system-history");
6967   staticpro (&Qcoding_system_history);
6968   Fset (Qcoding_system_history, Qnil);
6969
6970   /* Target FILENAME is the first argument.  */
6971   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6972   /* Target FILENAME is the third argument.  */
6973   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6974
6975   Qcall_process = intern ("call-process");
6976   staticpro (&Qcall_process);
6977   /* Target PROGRAM is the first argument.  */
6978   Fput (Qcall_process, Qtarget_idx, make_number (0));
6979
6980   Qcall_process_region = intern ("call-process-region");
6981   staticpro (&Qcall_process_region);
6982   /* Target PROGRAM is the third argument.  */
6983   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6984
6985   Qstart_process = intern ("start-process");
6986   staticpro (&Qstart_process);
6987   /* Target PROGRAM is the third argument.  */
6988   Fput (Qstart_process, Qtarget_idx, make_number (2));
6989
6990   Qopen_network_stream = intern ("open-network-stream");
6991   staticpro (&Qopen_network_stream);
6992   /* Target SERVICE is the fourth argument.  */
6993   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6994
6995   Qcoding_system = intern ("coding-system");
6996   staticpro (&Qcoding_system);
6997
6998   Qeol_type = intern ("eol-type");
6999   staticpro (&Qeol_type);
7000
7001   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7002   staticpro (&Qbuffer_file_coding_system);
7003
7004   Qpost_read_conversion = intern ("post-read-conversion");
7005   staticpro (&Qpost_read_conversion);
7006
7007   Qpre_write_conversion = intern ("pre-write-conversion");
7008   staticpro (&Qpre_write_conversion);
7009
7010   Qno_conversion = intern ("no-conversion");
7011   staticpro (&Qno_conversion);
7012
7013   Qundecided = intern ("undecided");
7014   staticpro (&Qundecided);
7015
7016   Qcoding_system_p = intern ("coding-system-p");
7017   staticpro (&Qcoding_system_p);
7018
7019   Qcoding_system_error = intern ("coding-system-error");
7020   staticpro (&Qcoding_system_error);
7021
7022   Fput (Qcoding_system_error, Qerror_conditions,
7023         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7024   Fput (Qcoding_system_error, Qerror_message,
7025         build_string ("Invalid coding system"));
7026
7027   Qcoding_category = intern ("coding-category");
7028   staticpro (&Qcoding_category);
7029   Qcoding_category_index = intern ("coding-category-index");
7030   staticpro (&Qcoding_category_index);
7031
7032   Vcoding_category_table
7033     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7034   staticpro (&Vcoding_category_table);
7035   {
7036     int i;
7037     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7038       {
7039         XVECTOR (Vcoding_category_table)->contents[i]
7040           = intern (coding_category_name[i]);
7041         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7042               Qcoding_category_index, make_number (i));
7043       }
7044   }
7045
7046   Qtranslation_table = intern ("translation-table");
7047   staticpro (&Qtranslation_table);
7048   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7049
7050   Qtranslation_table_id = intern ("translation-table-id");
7051   staticpro (&Qtranslation_table_id);
7052
7053   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7054   staticpro (&Qtranslation_table_for_decode);
7055
7056   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7057   staticpro (&Qtranslation_table_for_encode);
7058
7059   Qsafe_chars = intern ("safe-chars");
7060   staticpro (&Qsafe_chars);
7061
7062   Qchar_coding_system = intern ("char-coding-system");
7063   staticpro (&Qchar_coding_system);
7064
7065   /* Intern this now in case it isn't already done.
7066      Setting this variable twice is harmless.
7067      But don't staticpro it here--that is done in alloc.c.  */
7068   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7069   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7070   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
7071
7072   Qvalid_codes = intern ("valid-codes");
7073   staticpro (&Qvalid_codes);
7074
7075   Qemacs_mule = intern ("emacs-mule");
7076   staticpro (&Qemacs_mule);
7077
7078   Qraw_text = intern ("raw-text");
7079   staticpro (&Qraw_text);
7080
7081   defsubr (&Scoding_system_p);
7082   defsubr (&Sread_coding_system);
7083   defsubr (&Sread_non_nil_coding_system);
7084   defsubr (&Scheck_coding_system);
7085   defsubr (&Sdetect_coding_region);
7086   defsubr (&Sdetect_coding_string);
7087   defsubr (&Sfind_coding_systems_region_internal);
7088   defsubr (&Sdecode_coding_region);
7089   defsubr (&Sencode_coding_region);
7090   defsubr (&Sdecode_coding_string);
7091   defsubr (&Sencode_coding_string);
7092   defsubr (&Sdecode_sjis_char);
7093   defsubr (&Sencode_sjis_char);
7094   defsubr (&Sdecode_big5_char);
7095   defsubr (&Sencode_big5_char);
7096   defsubr (&Sset_terminal_coding_system_internal);
7097   defsubr (&Sset_safe_terminal_coding_system_internal);
7098   defsubr (&Sterminal_coding_system);
7099   defsubr (&Sset_keyboard_coding_system_internal);
7100   defsubr (&Skeyboard_coding_system);
7101   defsubr (&Sfind_operation_coding_system);
7102   defsubr (&Supdate_coding_systems_internal);
7103   defsubr (&Sset_coding_priority_internal);
7104
7105   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7106     "List of coding systems.\n\
7107 \n\
7108 Do not alter the value of this variable manually.  This variable should be\n\
7109 updated by the functions `make-coding-system' and\n\
7110 `define-coding-system-alias'.");
7111   Vcoding_system_list = Qnil;
7112
7113   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7114     "Alist of coding system names.\n\
7115 Each element is one element list of coding system name.\n\
7116 This variable is given to `completing-read' as TABLE argument.\n\
7117 \n\
7118 Do not alter the value of this variable manually.  This variable should be\n\
7119 updated by the functions `make-coding-system' and\n\
7120 `define-coding-system-alias'.");
7121   Vcoding_system_alist = Qnil;
7122
7123   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7124     "List of coding-categories (symbols) ordered by priority.");
7125   {
7126     int i;
7127
7128     Vcoding_category_list = Qnil;
7129     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7130       Vcoding_category_list
7131         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7132                  Vcoding_category_list);
7133   }
7134
7135   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7136     "Specify the coding system for read operations.\n\
7137 It is useful to bind this variable with `let', but do not set it globally.\n\
7138 If the value is a coding system, it is used for decoding on read operation.\n\
7139 If not, an appropriate element is used from one of the coding system alists:\n\
7140 There are three such tables, `file-coding-system-alist',\n\
7141 `process-coding-system-alist', and `network-coding-system-alist'.");
7142   Vcoding_system_for_read = Qnil;
7143
7144   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7145     "Specify the coding system for write operations.\n\
7146 Programs bind this variable with `let', but you should not set it globally.\n\
7147 If the value is a coding system, it is used for encoding of output,\n\
7148 when writing it to a file and when sending it to a file or subprocess.\n\
7149 \n\
7150 If this does not specify a coding system, an appropriate element\n\
7151 is used from one of the coding system alists:\n\
7152 There are three such tables, `file-coding-system-alist',\n\
7153 `process-coding-system-alist', and `network-coding-system-alist'.\n\
7154 For output to files, if the above procedure does not specify a coding system,\n\
7155 the value of `buffer-file-coding-system' is used.");
7156   Vcoding_system_for_write = Qnil;
7157
7158   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7159     "Coding system used in the latest file or process I/O.");
7160   Vlast_coding_system_used = Qnil;
7161
7162   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7163     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
7164 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
7165 such conversion.");
7166   inhibit_eol_conversion = 0;
7167
7168   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7169     "Non-nil means process buffer inherits coding system of process output.\n\
7170 Bind it to t if the process output is to be treated as if it were a file\n\
7171 read from some filesystem.");
7172   inherit_process_coding_system = 0;
7173
7174   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7175     "Alist to decide a coding system to use for a file I/O operation.\n\
7176 The format is ((PATTERN . VAL) ...),\n\
7177 where PATTERN is a regular expression matching a file name,\n\
7178 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7179 If VAL is a coding system, it is used for both decoding and encoding\n\
7180 the file contents.\n\
7181 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7182 and the cdr part is used for encoding.\n\
7183 If VAL is a function symbol, the function must return a coding system\n\
7184 or a cons of coding systems which are used as above.\n\
7185 \n\
7186 See also the function `find-operation-coding-system'\n\
7187 and the variable `auto-coding-alist'.");
7188   Vfile_coding_system_alist = Qnil;
7189
7190   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7191     "Alist to decide a coding system to use for a process I/O operation.\n\
7192 The format is ((PATTERN . VAL) ...),\n\
7193 where PATTERN is a regular expression matching a program name,\n\
7194 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7195 If VAL is a coding system, it is used for both decoding what received\n\
7196 from the program and encoding what sent to the program.\n\
7197 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7198 and the cdr part is used for encoding.\n\
7199 If VAL is a function symbol, the function must return a coding system\n\
7200 or a cons of coding systems which are used as above.\n\
7201 \n\
7202 See also the function `find-operation-coding-system'.");
7203   Vprocess_coding_system_alist = Qnil;
7204
7205   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7206     "Alist to decide a coding system to use for a network I/O operation.\n\
7207 The format is ((PATTERN . VAL) ...),\n\
7208 where PATTERN is a regular expression matching a network service name\n\
7209 or is a port number to connect to,\n\
7210 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7211 If VAL is a coding system, it is used for both decoding what received\n\
7212 from the network stream and encoding what sent to the network stream.\n\
7213 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7214 and the cdr part is used for encoding.\n\
7215 If VAL is a function symbol, the function must return a coding system\n\
7216 or a cons of coding systems which are used as above.\n\
7217 \n\
7218 See also the function `find-operation-coding-system'.");
7219   Vnetwork_coding_system_alist = Qnil;
7220
7221   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7222     "Coding system to use with system messages.");
7223   Vlocale_coding_system = Qnil;
7224
7225   /* The eol mnemonics are reset in startup.el system-dependently.  */
7226   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7227     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
7228   eol_mnemonic_unix = build_string (":");
7229
7230   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7231     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
7232   eol_mnemonic_dos = build_string ("\\");
7233
7234   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7235     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
7236   eol_mnemonic_mac = build_string ("/");
7237
7238   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7239     "*String displayed in mode line when end-of-line format is not yet determined.");
7240   eol_mnemonic_undecided = build_string (":");
7241
7242   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7243     "*Non-nil enables character translation while encoding and decoding.");
7244   Venable_character_translation = Qt;
7245
7246   DEFVAR_LISP ("standard-translation-table-for-decode",
7247     &Vstandard_translation_table_for_decode,
7248     "Table for translating characters while decoding.");
7249   Vstandard_translation_table_for_decode = Qnil;
7250
7251   DEFVAR_LISP ("standard-translation-table-for-encode",
7252     &Vstandard_translation_table_for_encode,
7253     "Table for translating characters while encoding.");
7254   Vstandard_translation_table_for_encode = Qnil;
7255
7256   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7257     "Alist of charsets vs revision numbers.\n\
7258 While encoding, if a charset (car part of an element) is found,\n\
7259 designate it with the escape sequence identifying revision (cdr part of the element).");
7260   Vcharset_revision_alist = Qnil;
7261
7262   DEFVAR_LISP ("default-process-coding-system",
7263                &Vdefault_process_coding_system,
7264     "Cons of coding systems used for process I/O by default.\n\
7265 The car part is used for decoding a process output,\n\
7266 the cdr part is used for encoding a text to be sent to a process.");
7267   Vdefault_process_coding_system = Qnil;
7268
7269   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7270     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
7271 This is a vector of length 256.\n\
7272 If Nth element is non-nil, the existence of code N in a file\n\
7273 \(or output of subprocess) doesn't prevent it to be detected as\n\
7274 a coding system of ISO 2022 variant which has a flag\n\
7275 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
7276 or reading output of a subprocess.\n\
7277 Only 128th through 159th elements has a meaning.");
7278   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7279
7280   DEFVAR_LISP ("select-safe-coding-system-function",
7281                &Vselect_safe_coding_system_function,
7282     "Function to call to select safe coding system for encoding a text.\n\
7283 \n\
7284 If set, this function is called to force a user to select a proper\n\
7285 coding system which can encode the text in the case that a default\n\
7286 coding system used in each operation can't encode the text.\n\
7287 \n\
7288 The default value is `select-safe-coding-system' (which see).");
7289   Vselect_safe_coding_system_function = Qnil;
7290
7291   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7292     "Char-table containing safe coding systems of each characters.\n\
7293 Each element doesn't include such generic coding systems that can\n\
7294 encode any characters.   They are in the first extra slot.");
7295   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7296
7297   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7298                &inhibit_iso_escape_detection,
7299     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
7300 \n\
7301 By default, on reading a file, Emacs tries to detect how the text is\n\
7302 encoded.  This code detection is sensitive to escape sequences.  If\n\
7303 the sequence is valid as ISO2022, the code is determined as one of\n\
7304 the ISO2022 encodings, and the file is decoded by the corresponding\n\
7305 coding system (e.g. `iso-2022-7bit').\n\
7306 \n\
7307 However, there may be a case that you want to read escape sequences in\n\
7308 a file as is.  In such a case, you can set this variable to non-nil.\n\
7309 Then, as the code detection ignores any escape sequences, no file is\n\
7310 detected as encoded in some ISO2022 encoding.  The result is that all\n\
7311 escape sequences become visible in a buffer.\n\
7312 \n\
7313 The default value is nil, and it is strongly recommended not to change\n\
7314 it.  That is because many Emacs Lisp source files that contain\n\
7315 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
7316 in Emacs's distribution, and they won't be decoded correctly on\n\
7317 reading if you suppress escape sequence detection.\n\
7318 \n\
7319 The other way to read escape sequences in a file without decoding is\n\
7320 to explicitly specify some coding system that doesn't use ISO2022's\n\
7321 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
7322   inhibit_iso_escape_detection = 0;
7323 }
7324
7325 char *
7326 emacs_strerror (error_number)
7327      int error_number;
7328 {
7329   char *str;
7330
7331   synchronize_system_messages_locale ();
7332   str = strerror (error_number);
7333
7334   if (! NILP (Vlocale_coding_system))
7335     {
7336       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7337                                                       Vlocale_coding_system,
7338                                                       0);
7339       str = (char *) XSTRING (dec)->data;
7340     }
7341
7342   return str;
7343 }
7344
7345 #endif /* emacs */
7346