src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEMS ***
  41
  42   A coding system is an encoding mechanism for one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-mule), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in buffers and strings
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode the character sets ASCII and Big5.  Widely
  70   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for text containing random 8-bit code.  Emacs does
  78   no code conversion on such text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it as CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of text is encoded depends on the operating system.
  97   For instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text character encoding and end-of-line encoding are
 103   independent, any coding system described above can have any
 104   end-of-line format.  So Emacs has information about end-of-line
 105   format in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX are set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 116   of the range 0x80..0x9F are in multibyte form.  */
 117 #if 0
 118 int
 119 detect_coding_emacs_mule (src, src_end, multibytep)
 120      unsigned char *src, *src_end;
 121      int multibytep;
 122 {
 123   ...
 124 }
 125 #endif
 126
 127 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 128
 129   These functions decode SRC_BYTES length of unibyte text at SOURCE
 130   encoded in CODING to Emacs' internal format.  The resulting
 131   multibyte text goes to a place pointed to by DESTINATION, the length
 132   of which should not exceed DST_BYTES.
 133
 134   These functions set the information about original and decoded texts
 135   in the members `produced', `produced_char', `consumed', and
 136   `consumed_char' of the structure *CODING.  They also set the member
 137   `result' to one of CODING_FINISH_XXX indicating how the decoding
 138   finished.
 139
 140   DST_BYTES zero means that the source area and destination area are
 141   overlapped, which means that we can produce a decoded text until it
 142   reaches the head of the not-yet-decoded source text.
 143
 144   Below is a template for these functions.  */
 145 #if 0
 146 static void
 147 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 148      struct coding_system *coding;
 149      unsigned char *source, *destination;
 150      int src_bytes, dst_bytes;
 151 {
 152   ...
 153 }
 154 #endif
 155
 156 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 157
 158   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 159   internal multibyte format to CODING.  The resulting unibyte text
 160   goes to a place pointed to by DESTINATION, the length of which
 161   should not exceed DST_BYTES.
 162
 163   These functions set the information about original and encoded texts
 164   in the members `produced', `produced_char', `consumed', and
 165   `consumed_char' of the structure *CODING.  They also set the member
 166   `result' to one of CODING_FINISH_XXX indicating how the encoding
 167   finished.
 168
 169   DST_BYTES zero means that the source area and destination area are
 170   overlapped, which means that we can produce encoded text until it
 171   reaches at the head of the not-yet-encoded source text.
 172
 173   Below is a template for these functions.  */
 174 #if 0
 175 static void
 176 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 177      struct coding_system *coding;
 178      unsigned char *source, *destination;
 179      int src_bytes, dst_bytes;
 180 {
 181   ...
 182 }
 183 #endif
 184
 185 /*** COMMONLY USED MACROS ***/
 186
 187 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 188    get one, two, and three bytes from the source text respectively.
 189    If there are not enough bytes in the source, they jump to
 190    `label_end_of_loop'.  The caller should set variables `coding',
 191    `src' and `src_end' to appropriate pointer in advance.  These
 192    macros are called from decoding routines `decode_coding_XXX', thus
 193    it is assumed that the source text is unibyte.  */
 194
 195 #define ONE_MORE_BYTE(c1)                                       \
 196   do {                                                          \
 197     if (src >= src_end)                                         \
 198       {                                                         \
 199         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 200         goto label_end_of_loop;                                 \
 201       }                                                         \
 202     c1 = *src++;                                                \
 203   } while (0)
 204
 205 #define TWO_MORE_BYTES(c1, c2)                                  \
 206   do {                                                          \
 207     if (src + 1 >= src_end)                                     \
 208       {                                                         \
 209         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 210         goto label_end_of_loop;                                 \
 211       }                                                         \
 212     c1 = *src++;                                                \
 213     c2 = *src++;                                                \
 214   } while (0)
 215
 216
 217 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 218    form if MULTIBYTEP is nonzero.  */
 219
 220 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 221   do {                                                          \
 222     if (src >= src_end)                                         \
 223       {                                                         \
 224         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 225         goto label_end_of_loop;                                 \
 226       }                                                         \
 227     c1 = *src++;                                                \
 228     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 229       c1 = *src++ - 0x20;                                       \
 230   } while (0)
 231
 232 /* Set C to the next character at the source text pointed by `src'.
 233    If there are not enough characters in the source, jump to
 234    `label_end_of_loop'.  The caller should set variables `coding'
 235    `src', `src_end', and `translation_table' to appropriate pointers
 236    in advance.  This macro is used in encoding routines
 237    `encode_coding_XXX', thus it assumes that the source text is in
 238    multibyte form except for 8-bit characters.  8-bit characters are
 239    in multibyte form if coding->src_multibyte is nonzero, else they
 240    are represented by a single byte.  */
 241
 242 #define ONE_MORE_CHAR(c)                                        \
 243   do {                                                          \
 244     int len = src_end - src;                                    \
 245     int bytes;                                                  \
 246     if (len <= 0)                                               \
 247       {                                                         \
 248         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 249         goto label_end_of_loop;                                 \
 250       }                                                         \
 251     if (coding->src_multibyte                                   \
 252         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 253       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 254     else                                                        \
 255       c = *src, bytes = 1;                                      \
 256     if (!NILP (translation_table))                              \
 257       c = translate_char (translation_table, c, -1, 0, 0);      \
 258     src += bytes;                                               \
 259   } while (0)
 260
 261
 262 /* Produce a multibyte form of characater C to `dst'.  Jump to
 263    `label_end_of_loop' if there's not enough space at `dst'.
 264
 265    If we are now in the middle of a composition sequence, the decoded
 266    character may be ALTCHAR (for the current composition).  In that
 267    case, the character goes to coding->cmp_data->data instead of
 268    `dst'.
 269
 270    This macro is used in decoding routines.  */
 271
 272 #define EMIT_CHAR(c)                                                    \
 273   do {                                                                  \
 274     if (! COMPOSING_P (coding)                                          \
 275         || coding->composing == COMPOSITION_RELATIVE                    \
 276         || coding->composing == COMPOSITION_WITH_RULE)                  \
 277       {                                                                 \
 278         int bytes = CHAR_BYTES (c);                                     \
 279         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 280           {                                                             \
 281             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 282             goto label_end_of_loop;                                     \
 283           }                                                             \
 284         dst += CHAR_STRING (c, dst);                                    \
 285         coding->produced_char++;                                        \
 286       }                                                                 \
 287                                                                         \
 288     if (COMPOSING_P (coding)                                            \
 289         && coding->composing != COMPOSITION_RELATIVE)                   \
 290       {                                                                 \
 291         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 292         coding->composition_rule_follows                                \
 293           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 294       }                                                                 \
 295   } while (0)
 296
 297
 298 #define EMIT_ONE_BYTE(c)                                        \
 299   do {                                                          \
 300     if (dst >= (dst_bytes ? dst_end : src))                     \
 301       {                                                         \
 302         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 303         goto label_end_of_loop;                                 \
 304       }                                                         \
 305     *dst++ = c;                                                 \
 306   } while (0)
 307
 308 #define EMIT_TWO_BYTES(c1, c2)                                  \
 309   do {                                                          \
 310     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 311       {                                                         \
 312         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 313         goto label_end_of_loop;                                 \
 314       }                                                         \
 315     *dst++ = c1, *dst++ = c2;                                   \
 316   } while (0)
 317
 318 #define EMIT_BYTES(from, to)                                    \
 319   do {                                                          \
 320     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 321       {                                                         \
 322         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 323         goto label_end_of_loop;                                 \
 324       }                                                         \
 325     while (from < to)                                           \
 326       *dst++ = *from++;                                         \
 327   } while (0)
 328
 329 \f
 330 /*** 1. Preamble ***/
 331
 332 #ifdef emacs
 333 #include <config.h>
 334 #endif
 335
 336 #include <stdio.h>
 337
 338 #ifdef emacs
 339
 340 #include "lisp.h"
 341 #include "buffer.h"
 342 #include "charset.h"
 343 #include "composite.h"
 344 #include "ccl.h"
 345 #include "coding.h"
 346 #include "window.h"
 347
 348 #else  /* not emacs */
 349
 350 #include "mulelib.h"
 351
 352 #endif /* not emacs */
 353
 354 Lisp_Object Qcoding_system, Qeol_type;
 355 Lisp_Object Qbuffer_file_coding_system;
 356 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 357 Lisp_Object Qno_conversion, Qundecided;
 358 Lisp_Object Qcoding_system_history;
 359 Lisp_Object Qsafe_chars;
 360 Lisp_Object Qvalid_codes;
 361
 362 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 363 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 364 Lisp_Object Qstart_process, Qopen_network_stream;
 365 Lisp_Object Qtarget_idx;
 366
 367 Lisp_Object Vselect_safe_coding_system_function;
 368
 369 /* Mnemonic string for each format of end-of-line.  */
 370 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 371 /* Mnemonic string to indicate format of end-of-line is not yet
 372    decided.  */
 373 Lisp_Object eol_mnemonic_undecided;
 374
 375 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 376    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 377 int system_eol_type;
 378
 379 #ifdef emacs
 380
 381 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 382
 383 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 384
 385 /* Coding system emacs-mule and raw-text are for converting only
 386    end-of-line format.  */
 387 Lisp_Object Qemacs_mule, Qraw_text;
 388
 389 /* Coding-systems are handed between Emacs Lisp programs and C internal
 390    routines by the following three variables.  */
 391 /* Coding-system for reading files and receiving data from process.  */
 392 Lisp_Object Vcoding_system_for_read;
 393 /* Coding-system for writing files and sending data to process.  */
 394 Lisp_Object Vcoding_system_for_write;
 395 /* Coding-system actually used in the latest I/O.  */
 396 Lisp_Object Vlast_coding_system_used;
 397
 398 /* A vector of length 256 which contains information about special
 399    Latin codes (especially for dealing with Microsoft codes).  */
 400 Lisp_Object Vlatin_extra_code_table;
 401
 402 /* Flag to inhibit code conversion of end-of-line format.  */
 403 int inhibit_eol_conversion;
 404
 405 /* Flag to inhibit ISO2022 escape sequence detection.  */
 406 int inhibit_iso_escape_detection;
 407
 408 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 409 int inherit_process_coding_system;
 410
 411 /* Coding system to be used to encode text for terminal display.  */
 412 struct coding_system terminal_coding;
 413
 414 /* Coding system to be used to encode text for terminal display when
 415    terminal coding system is nil.  */
 416 struct coding_system safe_terminal_coding;
 417
 418 /* Coding system of what is sent from terminal keyboard.  */
 419 struct coding_system keyboard_coding;
 420
 421 /* Default coding system to be used to write a file.  */
 422 struct coding_system default_buffer_file_coding;
 423
 424 Lisp_Object Vfile_coding_system_alist;
 425 Lisp_Object Vprocess_coding_system_alist;
 426 Lisp_Object Vnetwork_coding_system_alist;
 427
 428 Lisp_Object Vlocale_coding_system;
 429
 430 #endif /* emacs */
 431
 432 Lisp_Object Qcoding_category, Qcoding_category_index;
 433
 434 /* List of symbols `coding-category-xxx' ordered by priority.  */
 435 Lisp_Object Vcoding_category_list;
 436
 437 /* Table of coding categories (Lisp symbols).  */
 438 Lisp_Object Vcoding_category_table;
 439
 440 /* Table of names of symbol for each coding-category.  */
 441 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 442   "coding-category-emacs-mule",
 443   "coding-category-sjis",
 444   "coding-category-iso-7",
 445   "coding-category-iso-7-tight",
 446   "coding-category-iso-8-1",
 447   "coding-category-iso-8-2",
 448   "coding-category-iso-7-else",
 449   "coding-category-iso-8-else",
 450   "coding-category-ccl",
 451   "coding-category-big5",
 452   "coding-category-utf-8",
 453   "coding-category-utf-16-be",
 454   "coding-category-utf-16-le",
 455   "coding-category-raw-text",
 456   "coding-category-binary"
 457 };
 458
 459 /* Table of pointers to coding systems corresponding to each coding
 460    categories.  */
 461 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 462
 463 /* Table of coding category masks.  Nth element is a mask for a coding
 464    cateogry of which priority is Nth.  */
 465 static
 466 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 467
 468 /* Flag to tell if we look up translation table on character code
 469    conversion.  */
 470 Lisp_Object Venable_character_translation;
 471 /* Standard translation table to look up on decoding (reading).  */
 472 Lisp_Object Vstandard_translation_table_for_decode;
 473 /* Standard translation table to look up on encoding (writing).  */
 474 Lisp_Object Vstandard_translation_table_for_encode;
 475
 476 Lisp_Object Qtranslation_table;
 477 Lisp_Object Qtranslation_table_id;
 478 Lisp_Object Qtranslation_table_for_decode;
 479 Lisp_Object Qtranslation_table_for_encode;
 480
 481 /* Alist of charsets vs revision number.  */
 482 Lisp_Object Vcharset_revision_alist;
 483
 484 /* Default coding systems used for process I/O.  */
 485 Lisp_Object Vdefault_process_coding_system;
 486
 487 /* Global flag to tell that we can't call post-read-conversion and
 488    pre-write-conversion functions.  Usually the value is zero, but it
 489    is set to 1 temporarily while such functions are running.  This is
 490    to avoid infinite recursive call.  */
 491 static int inhibit_pre_post_conversion;
 492
 493 /* Char-table containing safe coding systems of each character.  */
 494 Lisp_Object Vchar_coding_system_table;
 495 Lisp_Object Qchar_coding_system;
 496
 497 /* Return `safe-chars' property of coding system CODING.  Don't check
 498    validity of CODING.  */
 499
 500 Lisp_Object
 501 coding_safe_chars (coding)
 502      struct coding_system *coding;
 503 {
 504   Lisp_Object coding_spec, plist, safe_chars;
 505
 506   coding_spec = Fget (coding->symbol, Qcoding_system);
 507   plist = XVECTOR (coding_spec)->contents[3];
 508   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 509   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 510 }
 511
 512 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 513   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 514
 515 \f
 516 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 517
 518 /* Emacs' internal format for representation of multiple character
 519    sets is a kind of multi-byte encoding, i.e. characters are
 520    represented by variable-length sequences of one-byte codes.
 521
 522    ASCII characters and control characters (e.g. `tab', `newline') are
 523    represented by one-byte sequences which are their ASCII codes, in
 524    the range 0x00 through 0x7F.
 525
 526    8-bit characters of the range 0x80..0x9F are represented by
 527    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 528    code + 0x20).
 529
 530    8-bit characters of the range 0xA0..0xFF are represented by
 531    one-byte sequences which are their 8-bit code.
 532
 533    The other characters are represented by a sequence of `base
 534    leading-code', optional `extended leading-code', and one or two
 535    `position-code's.  The length of the sequence is determined by the
 536    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 537    whereas extended leading-code and position-code take the range 0xA0
 538    through 0xFF.  See `charset.h' for more details about leading-code
 539    and position-code.
 540
 541    --- CODE RANGE of Emacs' internal format ---
 542    character set        range
 543    -------------        -----
 544    ascii                0x00..0x7F
 545    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 546    eight-bit-graphic    0xA0..0xBF
 547    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 548    ---------------------------------------------
 549
 550    As this is the internal character representation, the format is
 551    usually not used externally (i.e. in a file or in a data sent to a
 552    process).  But, it is possible to have a text externally in this
 553    format (i.e. by encoding by the coding system `emacs-mule').
 554
 555    In that case, a sequence of one-byte codes has a slightly different
 556    form.
 557
 558    At first, all characters in eight-bit-control are represented by
 559    one-byte sequences which are their 8-bit code.
 560
 561    Next, character composition data are represented by the byte
 562    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 563    where,
 564         METHOD is 0xF0 plus one of composition method (enum
 565         composition_method),
 566
 567         BYTES is 0x20 plus a byte length of this composition data,
 568
 569         CHARS is 0x20 plus a number of characters composed by this
 570         data,
 571
 572         COMPONENTs are characters of multibye form or composition
 573         rules encoded by two-byte of ASCII codes.
 574
 575    In addition, for backward compatibility, the following formats are
 576    also recognized as composition data on decoding.
 577
 578    0x80 MSEQ ...
 579    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 580
 581    Here,
 582         MSEQ is a multibyte form but in these special format:
 583           ASCII: 0xA0 ASCII_CODE+0x80,
 584           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 585         RULE is a one byte code of the range 0xA0..0xF0 that
 586         represents a composition rule.
 587   */
 588
 589 enum emacs_code_class_type emacs_code_class[256];
 590
 591 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 592    Check if a text is encoded in Emacs' internal format.  If it is,
 593    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 594
 595 static int
 596 detect_coding_emacs_mule (src, src_end, multibytep)
 597       unsigned char *src, *src_end;
 598       int multibytep;
 599 {
 600   unsigned char c;
 601   int composing = 0;
 602   /* Dummy for ONE_MORE_BYTE.  */
 603   struct coding_system dummy_coding;
 604   struct coding_system *coding = &dummy_coding;
 605
 606   while (1)
 607     {
 608       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 609
 610       if (composing)
 611         {
 612           if (c < 0xA0)
 613             composing = 0;
 614           else if (c == 0xA0)
 615             {
 616               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 617               c &= 0x7F;
 618             }
 619           else
 620             c -= 0x20;
 621         }
 622
 623       if (c < 0x20)
 624         {
 625           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 626             return 0;
 627         }
 628       else if (c >= 0x80 && c < 0xA0)
 629         {
 630           if (c == 0x80)
 631             /* Old leading code for a composite character.  */
 632             composing = 1;
 633           else
 634             {
 635               unsigned char *src_base = src - 1;
 636               int bytes;
 637
 638               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 639                                                bytes))
 640                 return 0;
 641               src = src_base + bytes;
 642             }
 643         }
 644     }
 645  label_end_of_loop:
 646   return CODING_CATEGORY_MASK_EMACS_MULE;
 647 }
 648
 649
 650 /* Record the starting position START and METHOD of one composition.  */
 651
 652 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 653   do {                                                          \
 654     struct composition_data *cmp_data = coding->cmp_data;       \
 655     int *data = cmp_data->data + cmp_data->used;                \
 656     coding->cmp_data_start = cmp_data->used;                    \
 657     data[0] = -1;                                               \
 658     data[1] = cmp_data->char_offset + start;                    \
 659     data[3] = (int) method;                                     \
 660     cmp_data->used += 4;                                        \
 661   } while (0)
 662
 663 /* Record the ending position END of the current composition.  */
 664
 665 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 666   do {                                                          \
 667     struct composition_data *cmp_data = coding->cmp_data;       \
 668     int *data = cmp_data->data + coding->cmp_data_start;        \
 669     data[0] = cmp_data->used - coding->cmp_data_start;          \
 670     data[2] = cmp_data->char_offset + end;                      \
 671   } while (0)
 672
 673 /* Record one COMPONENT (alternate character or composition rule).  */
 674
 675 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
 676   (coding->cmp_data->data[coding->cmp_data->used++] = component)
 677
 678
 679 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 680    is not less than SRC_END, return -1 without inccrementing Src.  */
 681
 682 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 683
 684
 685 /* Decode a character represented as a component of composition
 686    sequence of Emacs 20 style at SRC.  Set C to that character, store
 687    its multibyte form sequence at P, and set P to the end of that
 688    sequence.  If no valid character is found, set C to -1.  */
 689
 690 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 691   do {                                                          \
 692     int bytes;                                                  \
 693                                                                 \
 694     c = SAFE_ONE_MORE_BYTE ();                                  \
 695     if (c < 0)                                                  \
 696       break;                                                    \
 697     if (CHAR_HEAD_P (c))                                        \
 698       c = -1;                                                   \
 699     else if (c == 0xA0)                                         \
 700       {                                                         \
 701         c = SAFE_ONE_MORE_BYTE ();                              \
 702         if (c < 0xA0)                                           \
 703           c = -1;                                               \
 704         else                                                    \
 705           {                                                     \
 706             c -= 0xA0;                                          \
 707             *p++ = c;                                           \
 708           }                                                     \
 709       }                                                         \
 710     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 711       {                                                         \
 712         unsigned char *p0 = p;                                  \
 713                                                                 \
 714         c -= 0x20;                                              \
 715         *p++ = c;                                               \
 716         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 717         while (--bytes)                                         \
 718           {                                                     \
 719             c = SAFE_ONE_MORE_BYTE ();                          \
 720             if (c < 0)                                          \
 721               break;                                            \
 722             *p++ = c;                                           \
 723           }                                                     \
 724         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 725           c = STRING_CHAR (p0, bytes);                          \
 726         else                                                    \
 727           c = -1;                                               \
 728       }                                                         \
 729     else                                                        \
 730       c = -1;                                                   \
 731   } while (0)
 732
 733
 734 /* Decode a composition rule represented as a component of composition
 735    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 736    valid rule is found, set C to -1.  */
 737
 738 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 739   do {                                                  \
 740     c = SAFE_ONE_MORE_BYTE ();                          \
 741     c -= 0xA0;                                          \
 742     if (c < 0 || c >= 81)                               \
 743       c = -1;                                           \
 744     else                                                \
 745       {                                                 \
 746         gref = c / 9, nref = c % 9;                     \
 747         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 748       }                                                 \
 749   } while (0)
 750
 751
 752 /* Decode composition sequence encoded by `emacs-mule' at the source
 753    pointed by SRC.  SRC_END is the end of source.  Store information
 754    of the composition in CODING->cmp_data.
 755
 756    For backward compatibility, decode also a composition sequence of
 757    Emacs 20 style.  In that case, the composition sequence contains
 758    characters that should be extracted into a buffer or string.  Store
 759    those characters at *DESTINATION in multibyte form.
 760
 761    If we encounter an invalid byte sequence, return 0.
 762    If we encounter an insufficient source or destination, or
 763    insufficient space in CODING->cmp_data, return 1.
 764    Otherwise, return consumed bytes in the source.
 765
 766 */
 767 static INLINE int
 768 decode_composition_emacs_mule (coding, src, src_end,
 769                                destination, dst_end, dst_bytes)
 770      struct coding_system *coding;
 771      unsigned char *src, *src_end, **destination, *dst_end;
 772      int dst_bytes;
 773 {
 774   unsigned char *dst = *destination;
 775   int method, data_len, nchars;
 776   unsigned char *src_base = src++;
 777   /* Store compoments of composition.  */
 778   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 779   int ncomponent;
 780   /* Store multibyte form of characters to be composed.  This is for
 781      Emacs 20 style composition sequence.  */
 782   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 783   unsigned char *bufp = buf;
 784   int c, i, gref, nref;
 785
 786   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 787       >= COMPOSITION_DATA_SIZE)
 788     {
 789       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 790       return -1;
 791     }
 792
 793   ONE_MORE_BYTE (c);
 794   if (c - 0xF0 >= COMPOSITION_RELATIVE
 795            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 796     {
 797       int with_rule;
 798
 799       method = c - 0xF0;
 800       with_rule = (method == COMPOSITION_WITH_RULE
 801                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 802       ONE_MORE_BYTE (c);
 803       data_len = c - 0xA0;
 804       if (data_len < 4
 805           || src_base + data_len > src_end)
 806         return 0;
 807       ONE_MORE_BYTE (c);
 808       nchars = c - 0xA0;
 809       if (c < 1)
 810         return 0;
 811       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 812         {
 813           if (ncomponent % 2 && with_rule)
 814             {
 815               ONE_MORE_BYTE (gref);
 816               gref -= 32;
 817               ONE_MORE_BYTE (nref);
 818               nref -= 32;
 819               c = COMPOSITION_ENCODE_RULE (gref, nref);
 820             }
 821           else
 822             {
 823               int bytes;
 824               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 825                 c = STRING_CHAR (src, bytes);
 826               else
 827                 c = *src, bytes = 1;
 828               src += bytes;
 829             }
 830           component[ncomponent] = c;
 831         }
 832     }
 833   else
 834     {
 835       /* This may be an old Emacs 20 style format.  See the comment at
 836          the section 2 of this file.  */
 837       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 838       if (src == src_end
 839           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 840         goto label_end_of_loop;
 841
 842       src_end = src;
 843       src = src_base + 1;
 844       if (c < 0xC0)
 845         {
 846           method = COMPOSITION_RELATIVE;
 847           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 848             {
 849               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 850               if (c < 0)
 851                 break;
 852               component[ncomponent++] = c;
 853             }
 854           if (ncomponent < 2)
 855             return 0;
 856           nchars = ncomponent;
 857         }
 858       else if (c == 0xFF)
 859         {
 860           method = COMPOSITION_WITH_RULE;
 861           src++;
 862           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 863           if (c < 0)
 864             return 0;
 865           component[0] = c;
 866           for (ncomponent = 1;
 867                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 868             {
 869               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 870               if (c < 0)
 871                 break;
 872               component[ncomponent++] = c;
 873               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 874               if (c < 0)
 875                 break;
 876               component[ncomponent++] = c;
 877             }
 878           if (ncomponent < 3)
 879             return 0;
 880           nchars = (ncomponent + 1) / 2;
 881         }
 882       else
 883         return 0;
 884     }
 885
 886   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 887     {
 888       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 889       for (i = 0; i < ncomponent; i++)
 890         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 891       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 892       if (buf < bufp)
 893         {
 894           unsigned char *p = buf;
 895           EMIT_BYTES (p, bufp);
 896           *destination += bufp - buf;
 897           coding->produced_char += nchars;
 898         }
 899       return (src - src_base);
 900     }
 901  label_end_of_loop:
 902   return -1;
 903 }
 904
 905 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 906
 907 static void
 908 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 909      struct coding_system *coding;
 910      unsigned char *source, *destination;
 911      int src_bytes, dst_bytes;
 912 {
 913   unsigned char *src = source;
 914   unsigned char *src_end = source + src_bytes;
 915   unsigned char *dst = destination;
 916   unsigned char *dst_end = destination + dst_bytes;
 917   /* SRC_BASE remembers the start position in source in each loop.
 918      The loop will be exited when there's not enough source code, or
 919      when there's not enough destination area to produce a
 920      character.  */
 921   unsigned char *src_base;
 922
 923   coding->produced_char = 0;
 924   while ((src_base = src) < src_end)
 925     {
 926       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 927       int bytes;
 928
 929       if (*src == '\r')
 930         {
 931           int c = *src++;
 932
 933           if (coding->eol_type == CODING_EOL_CR)
 934             c = '\n';
 935           else if (coding->eol_type == CODING_EOL_CRLF)
 936             {
 937               ONE_MORE_BYTE (c);
 938               if (c != '\n')
 939                 {
 940                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 941                     {
 942                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 943                       goto label_end_of_loop;
 944                     }
 945                   src--;
 946                   c = '\r';
 947                 }
 948             }
 949           *dst++ = c;
 950           coding->produced_char++;
 951           continue;
 952         }
 953       else if (*src == '\n')
 954         {
 955           if ((coding->eol_type == CODING_EOL_CR
 956                || coding->eol_type == CODING_EOL_CRLF)
 957               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 958             {
 959               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 960               goto label_end_of_loop;
 961             }
 962           *dst++ = *src++;
 963           coding->produced_char++;
 964           continue;
 965         }
 966       else if (*src == 0x80)
 967         {
 968           /* Start of composition data.  */
 969           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 970                                                          &dst, dst_end,
 971                                                          dst_bytes);
 972           if (consumed < 0)
 973             goto label_end_of_loop;
 974           else if (consumed > 0)
 975             {
 976               src += consumed;
 977               continue;
 978             }
 979           bytes = CHAR_STRING (*src, tmp);
 980           p = tmp;
 981           src++;
 982         }
 983       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 984         {
 985           p = src;
 986           src += bytes;
 987         }
 988       else
 989         {
 990           bytes = CHAR_STRING (*src, tmp);
 991           p = tmp;
 992           src++;
 993         }
 994       if (dst + bytes >= (dst_bytes ? dst_end : src))
 995         {
 996           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 997           break;
 998         }
 999       while (bytes--) *dst++ = *p++;
1000       coding->produced_char++;
1001     }
1002  label_end_of_loop:
1003   coding->consumed = coding->consumed_char = src_base - source;
1004   coding->produced = dst - destination;
1005 }
1006
1007
1008 /* Encode composition data stored at DATA into a special byte sequence
1009    starting by 0x80.  Update CODING->cmp_data_start and maybe
1010    CODING->cmp_data for the next call.  */
1011
1012 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1013   do {                                                                  \
1014     unsigned char buf[1024], *p0 = buf, *p;                             \
1015     int len = data[0];                                                  \
1016     int i;                                                              \
1017                                                                         \
1018     buf[0] = 0x80;                                                      \
1019     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1020     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1021     p = buf + 4;                                                        \
1022     if (data[3] == COMPOSITION_WITH_RULE                                \
1023         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1024       {                                                                 \
1025         p += CHAR_STRING (data[4], p);                                  \
1026         for (i = 5; i < len; i += 2)                                    \
1027           {                                                             \
1028             int gref, nref;                                             \
1029              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1030             *p++ = 0x20 + gref;                                         \
1031             *p++ = 0x20 + nref;                                         \
1032             p += CHAR_STRING (data[i + 1], p);                          \
1033           }                                                             \
1034       }                                                                 \
1035     else                                                                \
1036       {                                                                 \
1037         for (i = 4; i < len; i++)                                       \
1038           p += CHAR_STRING (data[i], p);                                \
1039       }                                                                 \
1040     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1041                                                                         \
1042     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1043       {                                                                 \
1044         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1045         goto label_end_of_loop;                                         \
1046       }                                                                 \
1047     while (p0 < p)                                                      \
1048       *dst++ = *p0++;                                                   \
1049     coding->cmp_data_start += data[0];                                  \
1050     if (coding->cmp_data_start == coding->cmp_data->used                \
1051         && coding->cmp_data->next)                                      \
1052       {                                                                 \
1053         coding->cmp_data = coding->cmp_data->next;                      \
1054         coding->cmp_data_start = 0;                                     \
1055       }                                                                 \
1056   } while (0)
1057
1058
1059 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1060                             unsigned char *, int, int));
1061
1062 static void
1063 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1064      struct coding_system *coding;
1065      unsigned char *source, *destination;
1066      int src_bytes, dst_bytes;
1067 {
1068   unsigned char *src = source;
1069   unsigned char *src_end = source + src_bytes;
1070   unsigned char *dst = destination;
1071   unsigned char *dst_end = destination + dst_bytes;
1072   unsigned char *src_base;
1073   int c;
1074   int char_offset;
1075   int *data;
1076
1077   Lisp_Object translation_table;
1078
1079   translation_table = Qnil;
1080
1081   /* Optimization for the case that there's no composition.  */
1082   if (!coding->cmp_data || coding->cmp_data->used == 0)
1083     {
1084       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1085       return;
1086     }
1087
1088   char_offset = coding->cmp_data->char_offset;
1089   data = coding->cmp_data->data + coding->cmp_data_start;
1090   while (1)
1091     {
1092       src_base = src;
1093
1094       /* If SRC starts a composition, encode the information about the
1095          composition in advance.  */
1096       if (coding->cmp_data_start < coding->cmp_data->used
1097           && char_offset + coding->consumed_char == data[1])
1098         {
1099           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1100           char_offset = coding->cmp_data->char_offset;
1101           data = coding->cmp_data->data + coding->cmp_data_start;
1102         }
1103
1104       ONE_MORE_CHAR (c);
1105       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1106                         || coding->eol_type == CODING_EOL_CR))
1107         {
1108           if (coding->eol_type == CODING_EOL_CRLF)
1109             EMIT_TWO_BYTES ('\r', c);
1110           else
1111             EMIT_ONE_BYTE ('\r');
1112         }
1113       else if (SINGLE_BYTE_CHAR_P (c))
1114         EMIT_ONE_BYTE (c);
1115       else
1116         EMIT_BYTES (src_base, src);
1117       coding->consumed_char++;
1118     }
1119  label_end_of_loop:
1120   coding->consumed = src_base - source;
1121   coding->produced = coding->produced_char = dst - destination;
1122   return;
1123 }
1124
1125 \f
1126 /*** 3. ISO2022 handlers ***/
1127
1128 /* The following note describes the coding system ISO2022 briefly.
1129    Since the intention of this note is to help understand the
1130    functions in this file, some parts are NOT ACCURATE or are OVERLY
1131    SIMPLIFIED.  For thorough understanding, please refer to the
1132    original document of ISO2022.  This is equivalent to the standard
1133    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1134
1135    ISO2022 provides many mechanisms to encode several character sets
1136    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1137    is encoded using bytes less than 128.  This may make the encoded
1138    text a little bit longer, but the text passes more easily through
1139    several types of gateway, some of which strip off the MSB (Most
1140    Signigant Bit).
1141
1142    There are two kinds of character sets: control character sets and
1143    graphic character sets.  The former contain control characters such
1144    as `newline' and `escape' to provide control functions (control
1145    functions are also provided by escape sequences).  The latter
1146    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1147    two control character sets and many graphic character sets.
1148
1149    Graphic character sets are classified into one of the following
1150    four classes, according to the number of bytes (DIMENSION) and
1151    number of characters in one dimension (CHARS) of the set:
1152    - DIMENSION1_CHARS94
1153    - DIMENSION1_CHARS96
1154    - DIMENSION2_CHARS94
1155    - DIMENSION2_CHARS96
1156
1157    In addition, each character set is assigned an identification tag,
1158    unique for each set, called the "final character" (denoted as <F>
1159    hereafter).  The <F> of each character set is decided by ECMA(*)
1160    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1161    (0x30..0x3F are for private use only).
1162
1163    Note (*): ECMA = European Computer Manufacturers Association
1164
1165    Here are examples of graphic character sets [NAME(<F>)]:
1166         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1167         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1168         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1169         o DIMENSION2_CHARS96 -- none for the moment
1170
1171    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1172         C0 [0x00..0x1F] -- control character plane 0
1173         GL [0x20..0x7F] -- graphic character plane 0
1174         C1 [0x80..0x9F] -- control character plane 1
1175         GR [0xA0..0xFF] -- graphic character plane 1
1176
1177    A control character set is directly designated and invoked to C0 or
1178    C1 by an escape sequence.  The most common case is that:
1179    - ISO646's  control character set is designated/invoked to C0, and
1180    - ISO6429's control character set is designated/invoked to C1,
1181    and usually these designations/invocations are omitted in encoded
1182    text.  In a 7-bit environment, only C0 can be used, and a control
1183    character for C1 is encoded by an appropriate escape sequence to
1184    fit into the environment.  All control characters for C1 are
1185    defined to have corresponding escape sequences.
1186
1187    A graphic character set is at first designated to one of four
1188    graphic registers (G0 through G3), then these graphic registers are
1189    invoked to GL or GR.  These designations and invocations can be
1190    done independently.  The most common case is that G0 is invoked to
1191    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1192    these invocations and designations are omitted in encoded text.
1193    In a 7-bit environment, only GL can be used.
1194
1195    When a graphic character set of CHARS94 is invoked to GL, codes
1196    0x20 and 0x7F of the GL area work as control characters SPACE and
1197    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1198    be used.
1199
1200    There are two ways of invocation: locking-shift and single-shift.
1201    With locking-shift, the invocation lasts until the next different
1202    invocation, whereas with single-shift, the invocation affects the
1203    following character only and doesn't affect the locking-shift
1204    state.  Invocations are done by the following control characters or
1205    escape sequences:
1206
1207    ----------------------------------------------------------------------
1208    abbrev  function                  cntrl escape seq   description
1209    ----------------------------------------------------------------------
1210    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1211    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1212    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1213    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1214    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1215    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1216    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1217    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1218    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1219    ----------------------------------------------------------------------
1220    (*) These are not used by any known coding system.
1221
1222    Control characters for these functions are defined by macros
1223    ISO_CODE_XXX in `coding.h'.
1224
1225    Designations are done by the following escape sequences:
1226    ----------------------------------------------------------------------
1227    escape sequence      description
1228    ----------------------------------------------------------------------
1229    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1230    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1231    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1232    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1233    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1234    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1235    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1236    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1237    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1238    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1239    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1240    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1241    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1242    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1243    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1244    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1245    ----------------------------------------------------------------------
1246
1247    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1248    of dimension 1, chars 94, and final character <F>, etc...
1249
1250    Note (*): Although these designations are not allowed in ISO2022,
1251    Emacs accepts them on decoding, and produces them on encoding
1252    CHARS96 character sets in a coding system which is characterized as
1253    7-bit environment, non-locking-shift, and non-single-shift.
1254
1255    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1256    '(' can be omitted.  We refer to this as "short-form" hereafter.
1257
1258    Now you may notice that there are a lot of ways of encoding the
1259    same multilingual text in ISO2022.  Actually, there exist many
1260    coding systems such as Compound Text (used in X11's inter client
1261    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
1262    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
1263    localized platforms), and all of these are variants of ISO2022.
1264
1265    In addition to the above, Emacs handles two more kinds of escape
1266    sequences: ISO6429's direction specification and Emacs' private
1267    sequence for specifying character composition.
1268
1269    ISO6429's direction specification takes the following form:
1270         o CSI ']'      -- end of the current direction
1271         o CSI '0' ']'  -- end of the current direction
1272         o CSI '1' ']'  -- start of left-to-right text
1273         o CSI '2' ']'  -- start of right-to-left text
1274    The control character CSI (0x9B: control sequence introducer) is
1275    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1276
1277    Character composition specification takes the following form:
1278         o ESC '0' -- start relative composition
1279         o ESC '1' -- end composition
1280         o ESC '2' -- start rule-base composition (*)
1281         o ESC '3' -- start relative composition with alternate chars  (**)
1282         o ESC '4' -- start rule-base composition with alternate chars  (**)
1283   Since these are not standard escape sequences of any ISO standard,
1284   the use of them with these meanings is restricted to Emacs only.
1285
1286   (*) This form is used only in Emacs 20.5 and older versions,
1287   but the newer versions can safely decode it.
1288   (**) This form is used only in Emacs 21.1 and newer versions,
1289   and the older versions can't decode it.
1290
1291   Here's a list of example usages of these composition escape
1292   sequences (categorized by `enum composition_method').
1293
1294   COMPOSITION_RELATIVE:
1295         ESC 0 CHAR [ CHAR ] ESC 1
1296   COMPOSITOIN_WITH_RULE:
1297         ESC 2 CHAR [ RULE CHAR ] ESC 1
1298   COMPOSITION_WITH_ALTCHARS:
1299         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1300   COMPOSITION_WITH_RULE_ALTCHARS:
1301         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1302
1303 enum iso_code_class_type iso_code_class[256];
1304
1305 #define CHARSET_OK(idx, charset, c)                                     \
1306   (coding_system_table[idx]                                             \
1307    && (charset == CHARSET_ASCII                                         \
1308        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
1309            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1310    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1311                                               charset)                  \
1312        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1313
1314 #define SHIFT_OUT_OK(idx) \
1315   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1316
1317 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1318    Check if a text is encoded in ISO2022.  If it is, return an
1319    integer in which appropriate flag bits any of:
1320         CODING_CATEGORY_MASK_ISO_7
1321         CODING_CATEGORY_MASK_ISO_7_TIGHT
1322         CODING_CATEGORY_MASK_ISO_8_1
1323         CODING_CATEGORY_MASK_ISO_8_2
1324         CODING_CATEGORY_MASK_ISO_7_ELSE
1325         CODING_CATEGORY_MASK_ISO_8_ELSE
1326    are set.  If a code which should never appear in ISO2022 is found,
1327    returns 0.  */
1328
1329 static int
1330 detect_coding_iso2022 (src, src_end, multibytep)
1331      unsigned char *src, *src_end;
1332      int multibytep;
1333 {
1334   int mask = CODING_CATEGORY_MASK_ISO;
1335   int mask_found = 0;
1336   int reg[4], shift_out = 0, single_shifting = 0;
1337   int c, c1, charset;
1338   /* Dummy for ONE_MORE_BYTE.  */
1339   struct coding_system dummy_coding;
1340   struct coding_system *coding = &dummy_coding;
1341   Lisp_Object safe_chars;
1342
1343   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1344   while (mask && src < src_end)
1345     {
1346       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1347       switch (c)
1348         {
1349         case ISO_CODE_ESC:
1350           if (inhibit_iso_escape_detection)
1351             break;
1352           single_shifting = 0;
1353           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1354           if (c >= '(' && c <= '/')
1355             {
1356               /* Designation sequence for a charset of dimension 1.  */
1357               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1358               if (c1 < ' ' || c1 >= 0x80
1359                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1360                 /* Invalid designation sequence.  Just ignore.  */
1361                 break;
1362               reg[(c - '(') % 4] = charset;
1363             }
1364           else if (c == '$')
1365             {
1366               /* Designation sequence for a charset of dimension 2.  */
1367               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1368               if (c >= '@' && c <= 'B')
1369                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1370                 reg[0] = charset = iso_charset_table[1][0][c];
1371               else if (c >= '(' && c <= '/')
1372                 {
1373                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1374                   if (c1 < ' ' || c1 >= 0x80
1375                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1376                     /* Invalid designation sequence.  Just ignore.  */
1377                     break;
1378                   reg[(c - '(') % 4] = charset;
1379                 }
1380               else
1381                 /* Invalid designation sequence.  Just ignore.  */
1382                 break;
1383             }
1384           else if (c == 'N' || c == 'O')
1385             {
1386               /* ESC <Fe> for SS2 or SS3.  */
1387               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1388               break;
1389             }
1390           else if (c >= '0' && c <= '4')
1391             {
1392               /* ESC <Fp> for start/end composition.  */
1393               mask_found |= CODING_CATEGORY_MASK_ISO;
1394               break;
1395             }
1396           else
1397             /* Invalid escape sequence.  Just ignore.  */
1398             break;
1399
1400           /* We found a valid designation sequence for CHARSET.  */
1401           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1402           c = MAKE_CHAR (charset, 0, 0);
1403           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1404             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1405           else
1406             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1407           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1408             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1409           else
1410             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1411           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1412             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1413           else
1414             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1415           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1416             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1417           else
1418             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1419           break;
1420
1421         case ISO_CODE_SO:
1422           if (inhibit_iso_escape_detection)
1423             break;
1424           single_shifting = 0;
1425           if (shift_out == 0
1426               && (reg[1] >= 0
1427                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1428                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1429             {
1430               /* Locking shift out.  */
1431               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1432               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1433             }
1434           break;
1435
1436         case ISO_CODE_SI:
1437           if (inhibit_iso_escape_detection)
1438             break;
1439           single_shifting = 0;
1440           if (shift_out == 1)
1441             {
1442               /* Locking shift in.  */
1443               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1444               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1445             }
1446           break;
1447
1448         case ISO_CODE_CSI:
1449           single_shifting = 0;
1450         case ISO_CODE_SS2:
1451         case ISO_CODE_SS3:
1452           {
1453             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1454
1455             if (inhibit_iso_escape_detection)
1456               break;
1457             if (c != ISO_CODE_CSI)
1458               {
1459                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1460                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1461                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1462                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1463                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1464                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1465                 single_shifting = 1;
1466               }
1467             if (VECTORP (Vlatin_extra_code_table)
1468                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1469               {
1470                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1471                     & CODING_FLAG_ISO_LATIN_EXTRA)
1472                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1473                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1474                     & CODING_FLAG_ISO_LATIN_EXTRA)
1475                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1476               }
1477             mask &= newmask;
1478             mask_found |= newmask;
1479           }
1480           break;
1481
1482         default:
1483           if (c < 0x80)
1484             {
1485               single_shifting = 0;
1486               break;
1487             }
1488           else if (c < 0xA0)
1489             {
1490               single_shifting = 0;
1491               if (VECTORP (Vlatin_extra_code_table)
1492                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1493                 {
1494                   int newmask = 0;
1495
1496                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1497                       & CODING_FLAG_ISO_LATIN_EXTRA)
1498                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1499                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1500                       & CODING_FLAG_ISO_LATIN_EXTRA)
1501                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1502                   mask &= newmask;
1503                   mask_found |= newmask;
1504                 }
1505               else
1506                 return 0;
1507             }
1508           else
1509             {
1510               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1511                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1512               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1513               /* Check the length of succeeding codes of the range
1514                  0xA0..0FF.  If the byte length is odd, we exclude
1515                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1516                  when we are not single shifting.  */
1517               if (!single_shifting
1518                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1519                 {
1520                   int i = 1;
1521                   while (src < src_end)
1522                     {
1523                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1524                       if (c < 0xA0)
1525                         break;
1526                       i++;
1527                     }
1528
1529                   if (i & 1 && src < src_end)
1530                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1531                   else
1532                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1533                 }
1534             }
1535           break;
1536         }
1537     }
1538  label_end_of_loop:
1539   return (mask & mask_found);
1540 }
1541
1542 /* Decode a character of which charset is CHARSET, the 1st position
1543    code is C1, the 2nd position code is C2, and return the decoded
1544    character code.  If the variable `translation_table' is non-nil,
1545    returned the translated code.  */
1546
1547 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1548   (NILP (translation_table)                     \
1549    ? MAKE_CHAR (charset, c1, c2)                \
1550    : translate_char (translation_table, -1, charset, c1, c2))
1551
1552 /* Set designation state into CODING.  */
1553 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1554   do {                                                                     \
1555     int charset, c;                                                        \
1556                                                                            \
1557     if (final_char < '0' || final_char >= 128)                             \
1558       goto label_invalid_code;                                             \
1559     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1560                                  make_number (chars),                      \
1561                                  make_number (final_char));                \
1562     c = MAKE_CHAR (charset, 0, 0);                                         \
1563     if (charset >= 0                                                       \
1564         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1565             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1566       {                                                                    \
1567         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1568             && reg == 0                                                    \
1569             && charset == CHARSET_ASCII)                                   \
1570           {                                                                \
1571             /* We should insert this designation sequence as is so         \
1572                that it is surely written back to a file.  */               \
1573             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1574             goto label_invalid_code;                                       \
1575           }                                                                \
1576         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1577         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1578             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1579           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1580         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1581       }                                                                    \
1582     else                                                                   \
1583       {                                                                    \
1584         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1585         goto label_invalid_code;                                           \
1586       }                                                                    \
1587   } while (0)
1588
1589 /* Allocate a memory block for storing information about compositions.
1590    The block is chained to the already allocated blocks.  */
1591
1592 void
1593 coding_allocate_composition_data (coding, char_offset)
1594      struct coding_system *coding;
1595      int char_offset;
1596 {
1597   struct composition_data *cmp_data
1598     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1599
1600   cmp_data->char_offset = char_offset;
1601   cmp_data->used = 0;
1602   cmp_data->prev = coding->cmp_data;
1603   cmp_data->next = NULL;
1604   if (coding->cmp_data)
1605     coding->cmp_data->next = cmp_data;
1606   coding->cmp_data = cmp_data;
1607   coding->cmp_data_start = 0;
1608 }
1609
1610 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1611    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1612    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1613    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1614    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1615   */
1616
1617 #define DECODE_COMPOSITION_START(c1)                                       \
1618   do {                                                                     \
1619     if (coding->composing == COMPOSITION_DISABLED)                         \
1620       {                                                                    \
1621         *dst++ = ISO_CODE_ESC;                                             \
1622         *dst++ = c1 & 0x7f;                                                \
1623         coding->produced_char += 2;                                        \
1624       }                                                                    \
1625     else if (!COMPOSING_P (coding))                                        \
1626       {                                                                    \
1627         /* This is surely the start of a composition.  We must be sure     \
1628            that coding->cmp_data has enough space to store the             \
1629            information about the composition.  If not, terminate the       \
1630            current decoding loop, allocate one more memory block for       \
1631            coding->cmp_data in the calller, then start the decoding        \
1632            loop again.  We can't allocate memory here directly because     \
1633            it may cause buffer/string relocation.  */                      \
1634         if (!coding->cmp_data                                              \
1635             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1636                 >= COMPOSITION_DATA_SIZE))                                 \
1637           {                                                                \
1638             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1639             goto label_end_of_loop;                                        \
1640           }                                                                \
1641         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1642                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1643                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1644                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1645         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1646                                       coding->composing);                  \
1647         coding->composition_rule_follows = 0;                              \
1648       }                                                                    \
1649     else                                                                   \
1650       {                                                                    \
1651         /* We are already handling a composition.  If the method is        \
1652            the following two, the codes following the current escape       \
1653            sequence are actual characters stored in a buffer.  */          \
1654         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1655             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1656           {                                                                \
1657             coding->composing = COMPOSITION_RELATIVE;                      \
1658             coding->composition_rule_follows = 0;                          \
1659           }                                                                \
1660       }                                                                    \
1661   } while (0)
1662
1663 /* Handle compositoin end sequence ESC 1.  */
1664
1665 #define DECODE_COMPOSITION_END(c1)                                      \
1666   do {                                                                  \
1667     if (coding->composing == COMPOSITION_DISABLED)                      \
1668       {                                                                 \
1669         *dst++ = ISO_CODE_ESC;                                          \
1670         *dst++ = c1;                                                    \
1671         coding->produced_char += 2;                                     \
1672       }                                                                 \
1673     else                                                                \
1674       {                                                                 \
1675         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1676         coding->composing = COMPOSITION_NO;                             \
1677       }                                                                 \
1678   } while (0)
1679
1680 /* Decode a composition rule from the byte C1 (and maybe one more byte
1681    from SRC) and store one encoded composition rule in
1682    coding->cmp_data.  */
1683
1684 #define DECODE_COMPOSITION_RULE(c1)                                     \
1685   do {                                                                  \
1686     int rule = 0;                                                       \
1687     (c1) -= 32;                                                         \
1688     if (c1 < 81)                /* old format (before ver.21) */        \
1689       {                                                                 \
1690         int gref = (c1) / 9;                                            \
1691         int nref = (c1) % 9;                                            \
1692         if (gref == 4) gref = 10;                                       \
1693         if (nref == 4) nref = 10;                                       \
1694         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1695       }                                                                 \
1696     else if (c1 < 93)           /* new format (after ver.21) */         \
1697       {                                                                 \
1698         ONE_MORE_BYTE (c2);                                             \
1699         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1700       }                                                                 \
1701     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1702     coding->composition_rule_follows = 0;                               \
1703   } while (0)
1704
1705
1706 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1707
1708 static void
1709 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1710      struct coding_system *coding;
1711      unsigned char *source, *destination;
1712      int src_bytes, dst_bytes;
1713 {
1714   unsigned char *src = source;
1715   unsigned char *src_end = source + src_bytes;
1716   unsigned char *dst = destination;
1717   unsigned char *dst_end = destination + dst_bytes;
1718   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1719   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1720   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1721   /* SRC_BASE remembers the start position in source in each loop.
1722      The loop will be exited when there's not enough source code
1723      (within macro ONE_MORE_BYTE), or when there's not enough
1724      destination area to produce a character (within macro
1725      EMIT_CHAR).  */
1726   unsigned char *src_base;
1727   int c, charset;
1728   Lisp_Object translation_table;
1729   Lisp_Object safe_chars;
1730
1731   safe_chars = coding_safe_chars (coding);
1732
1733   if (NILP (Venable_character_translation))
1734     translation_table = Qnil;
1735   else
1736     {
1737       translation_table = coding->translation_table_for_decode;
1738       if (NILP (translation_table))
1739         translation_table = Vstandard_translation_table_for_decode;
1740     }
1741
1742   coding->result = CODING_FINISH_NORMAL;
1743
1744   while (1)
1745     {
1746       int c1, c2;
1747
1748       src_base = src;
1749       ONE_MORE_BYTE (c1);
1750
1751       /* We produce no character or one character.  */
1752       switch (iso_code_class [c1])
1753         {
1754         case ISO_0x20_or_0x7F:
1755           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1756             {
1757               DECODE_COMPOSITION_RULE (c1);
1758               continue;
1759             }
1760           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1761             {
1762               /* This is SPACE or DEL.  */
1763               charset = CHARSET_ASCII;
1764               break;
1765             }
1766           /* This is a graphic character, we fall down ...  */
1767
1768         case ISO_graphic_plane_0:
1769           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1770             {
1771               DECODE_COMPOSITION_RULE (c1);
1772               continue;
1773             }
1774           charset = charset0;
1775           break;
1776
1777         case ISO_0xA0_or_0xFF:
1778           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1779               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1780             goto label_invalid_code;
1781           /* This is a graphic character, we fall down ... */
1782
1783         case ISO_graphic_plane_1:
1784           if (charset1 < 0)
1785             goto label_invalid_code;
1786           charset = charset1;
1787           break;
1788
1789         case ISO_control_0:
1790           if (COMPOSING_P (coding))
1791             DECODE_COMPOSITION_END ('1');
1792
1793           /* All ISO2022 control characters in this class have the
1794              same representation in Emacs internal format.  */
1795           if (c1 == '\n'
1796               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1797               && (coding->eol_type == CODING_EOL_CR
1798                   || coding->eol_type == CODING_EOL_CRLF))
1799             {
1800               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1801               goto label_end_of_loop;
1802             }
1803           charset = CHARSET_ASCII;
1804           break;
1805
1806         case ISO_control_1:
1807           if (COMPOSING_P (coding))
1808             DECODE_COMPOSITION_END ('1');
1809           goto label_invalid_code;
1810
1811         case ISO_carriage_return:
1812           if (COMPOSING_P (coding))
1813             DECODE_COMPOSITION_END ('1');
1814
1815           if (coding->eol_type == CODING_EOL_CR)
1816             c1 = '\n';
1817           else if (coding->eol_type == CODING_EOL_CRLF)
1818             {
1819               ONE_MORE_BYTE (c1);
1820               if (c1 != ISO_CODE_LF)
1821                 {
1822                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1823                     {
1824                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1825                       goto label_end_of_loop;
1826                     }
1827                   src--;
1828                   c1 = '\r';
1829                 }
1830             }
1831           charset = CHARSET_ASCII;
1832           break;
1833
1834         case ISO_shift_out:
1835           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1836               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1837             goto label_invalid_code;
1838           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1839           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1840           continue;
1841
1842         case ISO_shift_in:
1843           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1844             goto label_invalid_code;
1845           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1846           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1847           continue;
1848
1849         case ISO_single_shift_2_7:
1850         case ISO_single_shift_2:
1851           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1852             goto label_invalid_code;
1853           /* SS2 is handled as an escape sequence of ESC 'N' */
1854           c1 = 'N';
1855           goto label_escape_sequence;
1856
1857         case ISO_single_shift_3:
1858           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1859             goto label_invalid_code;
1860           /* SS2 is handled as an escape sequence of ESC 'O' */
1861           c1 = 'O';
1862           goto label_escape_sequence;
1863
1864         case ISO_control_sequence_introducer:
1865           /* CSI is handled as an escape sequence of ESC '[' ...  */
1866           c1 = '[';
1867           goto label_escape_sequence;
1868
1869         case ISO_escape:
1870           ONE_MORE_BYTE (c1);
1871         label_escape_sequence:
1872           /* Escape sequences handled by Emacs are invocation,
1873              designation, direction specification, and character
1874              composition specification.  */
1875           switch (c1)
1876             {
1877             case '&':           /* revision of following character set */
1878               ONE_MORE_BYTE (c1);
1879               if (!(c1 >= '@' && c1 <= '~'))
1880                 goto label_invalid_code;
1881               ONE_MORE_BYTE (c1);
1882               if (c1 != ISO_CODE_ESC)
1883                 goto label_invalid_code;
1884               ONE_MORE_BYTE (c1);
1885               goto label_escape_sequence;
1886
1887             case '$':           /* designation of 2-byte character set */
1888               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1889                 goto label_invalid_code;
1890               ONE_MORE_BYTE (c1);
1891               if (c1 >= '@' && c1 <= 'B')
1892                 {       /* designation of JISX0208.1978, GB2312.1980,
1893                            or JISX0208.1980 */
1894                   DECODE_DESIGNATION (0, 2, 94, c1);
1895                 }
1896               else if (c1 >= 0x28 && c1 <= 0x2B)
1897                 {       /* designation of DIMENSION2_CHARS94 character set */
1898                   ONE_MORE_BYTE (c2);
1899                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1900                 }
1901               else if (c1 >= 0x2C && c1 <= 0x2F)
1902                 {       /* designation of DIMENSION2_CHARS96 character set */
1903                   ONE_MORE_BYTE (c2);
1904                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1905                 }
1906               else
1907                 goto label_invalid_code;
1908               /* We must update these variables now.  */
1909               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1910               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1911               continue;
1912
1913             case 'n':           /* invocation of locking-shift-2 */
1914               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1915                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1916                 goto label_invalid_code;
1917               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1918               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1919               continue;
1920
1921             case 'o':           /* invocation of locking-shift-3 */
1922               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1923                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1924                 goto label_invalid_code;
1925               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1926               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927               continue;
1928
1929             case 'N':           /* invocation of single-shift-2 */
1930               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1931                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1932                 goto label_invalid_code;
1933               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1934               ONE_MORE_BYTE (c1);
1935               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1936                 goto label_invalid_code;
1937               break;
1938
1939             case 'O':           /* invocation of single-shift-3 */
1940               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1941                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1942                 goto label_invalid_code;
1943               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1944               ONE_MORE_BYTE (c1);
1945               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1946                 goto label_invalid_code;
1947               break;
1948
1949             case '0': case '2': case '3': case '4': /* start composition */
1950               DECODE_COMPOSITION_START (c1);
1951               continue;
1952
1953             case '1':           /* end composition */
1954               DECODE_COMPOSITION_END (c1);
1955               continue;
1956
1957             case '[':           /* specification of direction */
1958               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1959                 goto label_invalid_code;
1960               /* For the moment, nested direction is not supported.
1961                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1962                  left-to-right, and nozero means right-to-left.  */
1963               ONE_MORE_BYTE (c1);
1964               switch (c1)
1965                 {
1966                 case ']':       /* end of the current direction */
1967                   coding->mode &= ~CODING_MODE_DIRECTION;
1968
1969                 case '0':       /* end of the current direction */
1970                 case '1':       /* start of left-to-right direction */
1971                   ONE_MORE_BYTE (c1);
1972                   if (c1 == ']')
1973                     coding->mode &= ~CODING_MODE_DIRECTION;
1974                   else
1975                     goto label_invalid_code;
1976                   break;
1977
1978                 case '2':       /* start of right-to-left direction */
1979                   ONE_MORE_BYTE (c1);
1980                   if (c1 == ']')
1981                     coding->mode |= CODING_MODE_DIRECTION;
1982                   else
1983                     goto label_invalid_code;
1984                   break;
1985
1986                 default:
1987                   goto label_invalid_code;
1988                 }
1989               continue;
1990
1991             default:
1992               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1993                 goto label_invalid_code;
1994               if (c1 >= 0x28 && c1 <= 0x2B)
1995                 {       /* designation of DIMENSION1_CHARS94 character set */
1996                   ONE_MORE_BYTE (c2);
1997                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1998                 }
1999               else if (c1 >= 0x2C && c1 <= 0x2F)
2000                 {       /* designation of DIMENSION1_CHARS96 character set */
2001                   ONE_MORE_BYTE (c2);
2002                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2003                 }
2004               else
2005                 goto label_invalid_code;
2006               /* We must update these variables now.  */
2007               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2008               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2009               continue;
2010             }
2011         }
2012
2013       /* Now we know CHARSET and 1st position code C1 of a character.
2014          Produce a multibyte sequence for that character while getting
2015          2nd position code C2 if necessary.  */
2016       if (CHARSET_DIMENSION (charset) == 2)
2017         {
2018           ONE_MORE_BYTE (c2);
2019           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2020             /* C2 is not in a valid range.  */
2021             goto label_invalid_code;
2022         }
2023       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2024       EMIT_CHAR (c);
2025       continue;
2026
2027     label_invalid_code:
2028       coding->errors++;
2029       if (COMPOSING_P (coding))
2030         DECODE_COMPOSITION_END ('1');
2031       src = src_base;
2032       c = *src++;
2033       EMIT_CHAR (c);
2034     }
2035
2036  label_end_of_loop:
2037   coding->consumed = coding->consumed_char = src_base - source;
2038   coding->produced = dst - destination;
2039   return;
2040 }
2041
2042
2043 /* ISO2022 encoding stuff.  */
2044
2045 /*
2046    It is not enough to say just "ISO2022" on encoding, we have to
2047    specify more details.  In Emacs, each ISO2022 coding system
2048    variant has the following specifications:
2049         1. Initial designation to G0 thru G3.
2050         2. Allows short-form designation?
2051         3. ASCII should be designated to G0 before control characters?
2052         4. ASCII should be designated to G0 at end of line?
2053         5. 7-bit environment or 8-bit environment?
2054         6. Use locking-shift?
2055         7. Use Single-shift?
2056    And the following two are only for Japanese:
2057         8. Use ASCII in place of JIS0201-1976-Roman?
2058         9. Use JISX0208-1983 in place of JISX0208-1978?
2059    These specifications are encoded in `coding->flags' as flag bits
2060    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2061    details.
2062 */
2063
2064 /* Produce codes (escape sequence) for designating CHARSET to graphic
2065    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2066    '@', 'A', or 'B' and the coding system CODING allows, produce
2067    designation sequence of short-form.  */
2068
2069 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2070   do {                                                                  \
2071     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2072     char *intermediate_char_94 = "()*+";                                \
2073     char *intermediate_char_96 = ",-./";                                \
2074     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2075                                                                         \
2076     if (revision < 255)                                                 \
2077       {                                                                 \
2078         *dst++ = ISO_CODE_ESC;                                          \
2079         *dst++ = '&';                                                   \
2080         *dst++ = '@' + revision;                                        \
2081       }                                                                 \
2082     *dst++ = ISO_CODE_ESC;                                              \
2083     if (CHARSET_DIMENSION (charset) == 1)                               \
2084       {                                                                 \
2085         if (CHARSET_CHARS (charset) == 94)                              \
2086           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2087         else                                                            \
2088           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2089       }                                                                 \
2090     else                                                                \
2091       {                                                                 \
2092         *dst++ = '$';                                                   \
2093         if (CHARSET_CHARS (charset) == 94)                              \
2094           {                                                             \
2095             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2096                 || reg != 0                                             \
2097                 || final_char < '@' || final_char > 'B')                \
2098               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2099           }                                                             \
2100         else                                                            \
2101           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2102       }                                                                 \
2103     *dst++ = final_char;                                                \
2104     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2105   } while (0)
2106
2107 /* The following two macros produce codes (control character or escape
2108    sequence) for ISO2022 single-shift functions (single-shift-2 and
2109    single-shift-3).  */
2110
2111 #define ENCODE_SINGLE_SHIFT_2                           \
2112   do {                                                  \
2113     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2114       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2115     else                                                \
2116       *dst++ = ISO_CODE_SS2;                            \
2117     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2118   } while (0)
2119
2120 #define ENCODE_SINGLE_SHIFT_3                           \
2121   do {                                                  \
2122     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2123       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2124     else                                                \
2125       *dst++ = ISO_CODE_SS3;                            \
2126     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2127   } while (0)
2128
2129 /* The following four macros produce codes (control character or
2130    escape sequence) for ISO2022 locking-shift functions (shift-in,
2131    shift-out, locking-shift-2, and locking-shift-3).  */
2132
2133 #define ENCODE_SHIFT_IN                         \
2134   do {                                          \
2135     *dst++ = ISO_CODE_SI;                       \
2136     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2137   } while (0)
2138
2139 #define ENCODE_SHIFT_OUT                        \
2140   do {                                          \
2141     *dst++ = ISO_CODE_SO;                       \
2142     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2143   } while (0)
2144
2145 #define ENCODE_LOCKING_SHIFT_2                  \
2146   do {                                          \
2147     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2148     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2149   } while (0)
2150
2151 #define ENCODE_LOCKING_SHIFT_3                  \
2152   do {                                          \
2153     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2154     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2155   } while (0)
2156
2157 /* Produce codes for a DIMENSION1 character whose character set is
2158    CHARSET and whose position-code is C1.  Designation and invocation
2159    sequences are also produced in advance if necessary.  */
2160
2161 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2162   do {                                                                  \
2163     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2164       {                                                                 \
2165         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2166           *dst++ = c1 & 0x7F;                                           \
2167         else                                                            \
2168           *dst++ = c1 | 0x80;                                           \
2169         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2170         break;                                                          \
2171       }                                                                 \
2172     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2173       {                                                                 \
2174         *dst++ = c1 & 0x7F;                                             \
2175         break;                                                          \
2176       }                                                                 \
2177     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2178       {                                                                 \
2179         *dst++ = c1 | 0x80;                                             \
2180         break;                                                          \
2181       }                                                                 \
2182     else                                                                \
2183       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2184          must invoke it, or, at first, designate it to some graphic     \
2185          register.  Then repeat the loop to actually produce the        \
2186          character.  */                                                 \
2187       dst = encode_invocation_designation (charset, coding, dst);       \
2188   } while (1)
2189
2190 /* Produce codes for a DIMENSION2 character whose character set is
2191    CHARSET and whose position-codes are C1 and C2.  Designation and
2192    invocation codes are also produced in advance if necessary.  */
2193
2194 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2195   do {                                                                  \
2196     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2197       {                                                                 \
2198         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2199           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2200         else                                                            \
2201           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2202         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2203         break;                                                          \
2204       }                                                                 \
2205     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2206       {                                                                 \
2207         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2208         break;                                                          \
2209       }                                                                 \
2210     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2211       {                                                                 \
2212         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2213         break;                                                          \
2214       }                                                                 \
2215     else                                                                \
2216       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2217          must invoke it, or, at first, designate it to some graphic     \
2218          register.  Then repeat the loop to actually produce the        \
2219          character.  */                                                 \
2220       dst = encode_invocation_designation (charset, coding, dst);       \
2221   } while (1)
2222
2223 #define ENCODE_ISO_CHARACTER(c)                                 \
2224   do {                                                          \
2225     int charset, c1, c2;                                        \
2226                                                                 \
2227     SPLIT_CHAR (c, charset, c1, c2);                            \
2228     if (CHARSET_DEFINED_P (charset))                            \
2229       {                                                         \
2230         if (CHARSET_DIMENSION (charset) == 1)                   \
2231           {                                                     \
2232             if (charset == CHARSET_ASCII                        \
2233                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2234               charset = charset_latin_jisx0201;                 \
2235             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2236           }                                                     \
2237         else                                                    \
2238           {                                                     \
2239             if (charset == charset_jisx0208                     \
2240                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2241               charset = charset_jisx0208_1978;                  \
2242             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2243           }                                                     \
2244       }                                                         \
2245     else                                                        \
2246       {                                                         \
2247         *dst++ = c1;                                            \
2248         if (c2 >= 0)                                            \
2249           *dst++ = c2;                                          \
2250       }                                                         \
2251   } while (0)
2252
2253
2254 /* Instead of encoding character C, produce one or two `?'s.  */
2255
2256 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2257   do {                                                                  \
2258     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2259     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2260       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2261   } while (0)
2262
2263
2264 /* Produce designation and invocation codes at a place pointed by DST
2265    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2266    Return new DST.  */
2267
2268 unsigned char *
2269 encode_invocation_designation (charset, coding, dst)
2270      int charset;
2271      struct coding_system *coding;
2272      unsigned char *dst;
2273 {
2274   int reg;                      /* graphic register number */
2275
2276   /* At first, check designations.  */
2277   for (reg = 0; reg < 4; reg++)
2278     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2279       break;
2280
2281   if (reg >= 4)
2282     {
2283       /* CHARSET is not yet designated to any graphic registers.  */
2284       /* At first check the requested designation.  */
2285       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2286       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2287         /* Since CHARSET requests no special designation, designate it
2288            to graphic register 0.  */
2289         reg = 0;
2290
2291       ENCODE_DESIGNATION (charset, reg, coding);
2292     }
2293
2294   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2295       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2296     {
2297       /* Since the graphic register REG is not invoked to any graphic
2298          planes, invoke it to graphic plane 0.  */
2299       switch (reg)
2300         {
2301         case 0:                 /* graphic register 0 */
2302           ENCODE_SHIFT_IN;
2303           break;
2304
2305         case 1:                 /* graphic register 1 */
2306           ENCODE_SHIFT_OUT;
2307           break;
2308
2309         case 2:                 /* graphic register 2 */
2310           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2311             ENCODE_SINGLE_SHIFT_2;
2312           else
2313             ENCODE_LOCKING_SHIFT_2;
2314           break;
2315
2316         case 3:                 /* graphic register 3 */
2317           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2318             ENCODE_SINGLE_SHIFT_3;
2319           else
2320             ENCODE_LOCKING_SHIFT_3;
2321           break;
2322         }
2323     }
2324
2325   return dst;
2326 }
2327
2328 /* Produce 2-byte codes for encoded composition rule RULE.  */
2329
2330 #define ENCODE_COMPOSITION_RULE(rule)           \
2331   do {                                          \
2332     int gref, nref;                             \
2333     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2334     *dst++ = 32 + 81 + gref;                    \
2335     *dst++ = 32 + nref;                         \
2336   } while (0)
2337
2338 /* Produce codes for indicating the start of a composition sequence
2339    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2340    which specify information about the composition.  See the comment
2341    in coding.h for the format of DATA.  */
2342
2343 #define ENCODE_COMPOSITION_START(coding, data)                          \
2344   do {                                                                  \
2345     coding->composing = data[3];                                        \
2346     *dst++ = ISO_CODE_ESC;                                              \
2347     if (coding->composing == COMPOSITION_RELATIVE)                      \
2348       *dst++ = '0';                                                     \
2349     else                                                                \
2350       {                                                                 \
2351         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2352                   ? '3' : '4');                                         \
2353         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2354         coding->composition_rule_follows = 0;                           \
2355       }                                                                 \
2356   } while (0)
2357
2358 /* Produce codes for indicating the end of the current composition.  */
2359
2360 #define ENCODE_COMPOSITION_END(coding, data)                    \
2361   do {                                                          \
2362     *dst++ = ISO_CODE_ESC;                                      \
2363     *dst++ = '1';                                               \
2364     coding->cmp_data_start += data[0];                          \
2365     coding->composing = COMPOSITION_NO;                         \
2366     if (coding->cmp_data_start == coding->cmp_data->used        \
2367         && coding->cmp_data->next)                              \
2368       {                                                         \
2369         coding->cmp_data = coding->cmp_data->next;              \
2370         coding->cmp_data_start = 0;                             \
2371       }                                                         \
2372   } while (0)
2373
2374 /* Produce composition start sequence ESC 0.  Here, this sequence
2375    doesn't mean the start of a new composition but means that we have
2376    just produced components (alternate chars and composition rules) of
2377    the composition and the actual text follows in SRC.  */
2378
2379 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2380   do {                                          \
2381     *dst++ = ISO_CODE_ESC;                      \
2382     *dst++ = '0';                               \
2383     coding->composing = COMPOSITION_RELATIVE;   \
2384   } while (0)
2385
2386 /* The following three macros produce codes for indicating direction
2387    of text.  */
2388 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2389   do {                                                  \
2390     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2391       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2392     else                                                \
2393       *dst++ = ISO_CODE_CSI;                            \
2394   } while (0)
2395
2396 #define ENCODE_DIRECTION_R2L    \
2397   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2398
2399 #define ENCODE_DIRECTION_L2R    \
2400   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2401
2402 /* Produce codes for designation and invocation to reset the graphic
2403    planes and registers to initial state.  */
2404 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2405   do {                                                                      \
2406     int reg;                                                                \
2407     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2408       ENCODE_SHIFT_IN;                                                      \
2409     for (reg = 0; reg < 4; reg++)                                           \
2410       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2411           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2412               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2413         ENCODE_DESIGNATION                                                  \
2414           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2415   } while (0)
2416
2417 /* Produce designation sequences of charsets in the line started from
2418    SRC to a place pointed by DST, and return updated DST.
2419
2420    If the current block ends before any end-of-line, we may fail to
2421    find all the necessary designations.  */
2422
2423 static unsigned char *
2424 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2425      struct coding_system *coding;
2426      Lisp_Object translation_table;
2427      unsigned char *src, *src_end, *dst;
2428 {
2429   int charset, c, found = 0, reg;
2430   /* Table of charsets to be designated to each graphic register.  */
2431   int r[4];
2432
2433   for (reg = 0; reg < 4; reg++)
2434     r[reg] = -1;
2435
2436   while (found < 4)
2437     {
2438       ONE_MORE_CHAR (c);
2439       if (c == '\n')
2440         break;
2441
2442       charset = CHAR_CHARSET (c);
2443       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2444       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2445         {
2446           found++;
2447           r[reg] = charset;
2448         }
2449     }
2450
2451  label_end_of_loop:
2452   if (found)
2453     {
2454       for (reg = 0; reg < 4; reg++)
2455         if (r[reg] >= 0
2456             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2457           ENCODE_DESIGNATION (r[reg], reg, coding);
2458     }
2459
2460   return dst;
2461 }
2462
2463 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2464
2465 static void
2466 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2467      struct coding_system *coding;
2468      unsigned char *source, *destination;
2469      int src_bytes, dst_bytes;
2470 {
2471   unsigned char *src = source;
2472   unsigned char *src_end = source + src_bytes;
2473   unsigned char *dst = destination;
2474   unsigned char *dst_end = destination + dst_bytes;
2475   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2476      from DST_END to assure overflow checking is necessary only at the
2477      head of loop.  */
2478   unsigned char *adjusted_dst_end = dst_end - 19;
2479   /* SRC_BASE remembers the start position in source in each loop.
2480      The loop will be exited when there's not enough source text to
2481      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2482      there's not enough destination area to produce encoded codes
2483      (within macro EMIT_BYTES).  */
2484   unsigned char *src_base;
2485   int c;
2486   Lisp_Object translation_table;
2487   Lisp_Object safe_chars;
2488
2489   safe_chars = coding_safe_chars (coding);
2490
2491   if (NILP (Venable_character_translation))
2492     translation_table = Qnil;
2493   else
2494     {
2495       translation_table = coding->translation_table_for_encode;
2496       if (NILP (translation_table))
2497         translation_table = Vstandard_translation_table_for_encode;
2498     }
2499
2500   coding->consumed_char = 0;
2501   coding->errors = 0;
2502   while (1)
2503     {
2504       src_base = src;
2505
2506       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2507         {
2508           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2509           break;
2510         }
2511
2512       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2513           && CODING_SPEC_ISO_BOL (coding))
2514         {
2515           /* We have to produce designation sequences if any now.  */
2516           dst = encode_designation_at_bol (coding, translation_table,
2517                                            src, src_end, dst);
2518           CODING_SPEC_ISO_BOL (coding) = 0;
2519         }
2520
2521       /* Check composition start and end.  */
2522       if (coding->composing != COMPOSITION_DISABLED
2523           && coding->cmp_data_start < coding->cmp_data->used)
2524         {
2525           struct composition_data *cmp_data = coding->cmp_data;
2526           int *data = cmp_data->data + coding->cmp_data_start;
2527           int this_pos = cmp_data->char_offset + coding->consumed_char;
2528
2529           if (coding->composing == COMPOSITION_RELATIVE)
2530             {
2531               if (this_pos == data[2])
2532                 {
2533                   ENCODE_COMPOSITION_END (coding, data);
2534                   cmp_data = coding->cmp_data;
2535                   data = cmp_data->data + coding->cmp_data_start;
2536                 }
2537             }
2538           else if (COMPOSING_P (coding))
2539             {
2540               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2541               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2542                 /* We have consumed components of the composition.
2543                    What follows in SRC is the compositions's base
2544                    text.  */
2545                 ENCODE_COMPOSITION_FAKE_START (coding);
2546               else
2547                 {
2548                   int c = cmp_data->data[coding->cmp_data_index++];
2549                   if (coding->composition_rule_follows)
2550                     {
2551                       ENCODE_COMPOSITION_RULE (c);
2552                       coding->composition_rule_follows = 0;
2553                     }
2554                   else
2555                     {
2556                       if (coding->flags & CODING_FLAG_ISO_SAFE
2557                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2558                         ENCODE_UNSAFE_CHARACTER (c);
2559                       else
2560                         ENCODE_ISO_CHARACTER (c);
2561                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2562                         coding->composition_rule_follows = 1;
2563                     }
2564                   continue;
2565                 }
2566             }
2567           if (!COMPOSING_P (coding))
2568             {
2569               if (this_pos == data[1])
2570                 {
2571                   ENCODE_COMPOSITION_START (coding, data);
2572                   continue;
2573                 }
2574             }
2575         }
2576
2577       ONE_MORE_CHAR (c);
2578
2579       /* Now encode the character C.  */
2580       if (c < 0x20 || c == 0x7F)
2581         {
2582           if (c == '\r')
2583             {
2584               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2585                 {
2586                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2587                     ENCODE_RESET_PLANE_AND_REGISTER;
2588                   *dst++ = c;
2589                   continue;
2590                 }
2591               /* fall down to treat '\r' as '\n' ...  */
2592               c = '\n';
2593             }
2594           if (c == '\n')
2595             {
2596               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2597                 ENCODE_RESET_PLANE_AND_REGISTER;
2598               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2599                 bcopy (coding->spec.iso2022.initial_designation,
2600                        coding->spec.iso2022.current_designation,
2601                        sizeof coding->spec.iso2022.initial_designation);
2602               if (coding->eol_type == CODING_EOL_LF
2603                   || coding->eol_type == CODING_EOL_UNDECIDED)
2604                 *dst++ = ISO_CODE_LF;
2605               else if (coding->eol_type == CODING_EOL_CRLF)
2606                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2607               else
2608                 *dst++ = ISO_CODE_CR;
2609               CODING_SPEC_ISO_BOL (coding) = 1;
2610             }
2611           else
2612             {
2613               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2614                 ENCODE_RESET_PLANE_AND_REGISTER;
2615               *dst++ = c;
2616             }
2617         }
2618       else if (ASCII_BYTE_P (c))
2619         ENCODE_ISO_CHARACTER (c);
2620       else if (SINGLE_BYTE_CHAR_P (c))
2621         {
2622           *dst++ = c;
2623           coding->errors++;
2624         }
2625       else if (coding->flags & CODING_FLAG_ISO_SAFE
2626                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2627         ENCODE_UNSAFE_CHARACTER (c);
2628       else
2629         ENCODE_ISO_CHARACTER (c);
2630
2631       coding->consumed_char++;
2632     }
2633
2634  label_end_of_loop:
2635   coding->consumed = src_base - source;
2636   coding->produced = coding->produced_char = dst - destination;
2637 }
2638
2639 \f
2640 /*** 4. SJIS and BIG5 handlers ***/
2641
2642 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2643    quite widely.  So, for the moment, Emacs supports them in the bare
2644    C code.  But, in the future, they may be supported only by CCL.  */
2645
2646 /* SJIS is a coding system encoding three character sets: ASCII, right
2647    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2648    as is.  A character of charset katakana-jisx0201 is encoded by
2649    "position-code + 0x80".  A character of charset japanese-jisx0208
2650    is encoded in 2-byte but two position-codes are divided and shifted
2651    so that it fits in the range below.
2652
2653    --- CODE RANGE of SJIS ---
2654    (character set)      (range)
2655    ASCII                0x00 .. 0x7F
2656    KATAKANA-JISX0201    0xA0 .. 0xDF
2657    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2658             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2659    -------------------------------
2660
2661 */
2662
2663 /* BIG5 is a coding system encoding two character sets: ASCII and
2664    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2665    character set and is encoded in two bytes.
2666
2667    --- CODE RANGE of BIG5 ---
2668    (character set)      (range)
2669    ASCII                0x00 .. 0x7F
2670    Big5 (1st byte)      0xA1 .. 0xFE
2671         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2672    --------------------------
2673
2674    Since the number of characters in Big5 is larger than maximum
2675    characters in Emacs' charset (96x96), it can't be handled as one
2676    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2677    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2678    contains frequently used characters and the latter contains less
2679    frequently used characters.  */
2680
2681 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2682    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2683    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2684    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2685
2686 /* Number of Big5 characters which have the same code in 1st byte.  */
2687 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2688
2689 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2690   do {                                                                  \
2691     unsigned int temp                                                   \
2692       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2693     if (b1 < 0xC9)                                                      \
2694       charset = charset_big5_1;                                         \
2695     else                                                                \
2696       {                                                                 \
2697         charset = charset_big5_2;                                       \
2698         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2699       }                                                                 \
2700     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2701     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2702   } while (0)
2703
2704 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2705   do {                                                                  \
2706     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2707     if (charset == charset_big5_2)                                      \
2708       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2709     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2710     b2 = temp % BIG5_SAME_ROW;                                          \
2711     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2712   } while (0)
2713
2714 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2715    Check if a text is encoded in SJIS.  If it is, return
2716    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2717
2718 static int
2719 detect_coding_sjis (src, src_end, multibytep)
2720      unsigned char *src, *src_end;
2721      int multibytep;
2722 {
2723   int c;
2724   /* Dummy for ONE_MORE_BYTE.  */
2725   struct coding_system dummy_coding;
2726   struct coding_system *coding = &dummy_coding;
2727
2728   while (1)
2729     {
2730       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2731       if (c >= 0x81)
2732         {
2733           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2734             {
2735               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2736               if (c < 0x40 || c == 0x7F || c > 0xFC)
2737                 return 0;
2738             }
2739           else if (c > 0xDF)
2740             return 0;
2741         }
2742     }
2743  label_end_of_loop:
2744   return CODING_CATEGORY_MASK_SJIS;
2745 }
2746
2747 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2748    Check if a text is encoded in BIG5.  If it is, return
2749    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2750
2751 static int
2752 detect_coding_big5 (src, src_end, multibytep)
2753      unsigned char *src, *src_end;
2754      int multibytep;
2755 {
2756   int c;
2757   /* Dummy for ONE_MORE_BYTE.  */
2758   struct coding_system dummy_coding;
2759   struct coding_system *coding = &dummy_coding;
2760
2761   while (1)
2762     {
2763       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2764       if (c >= 0xA1)
2765         {
2766           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2767           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2768             return 0;
2769         }
2770     }
2771  label_end_of_loop:
2772   return CODING_CATEGORY_MASK_BIG5;
2773 }
2774
2775 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2776    Check if a text is encoded in UTF-8.  If it is, return
2777    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2778
2779 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2780 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2781 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2782 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2783 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2784 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2785 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2786
2787 static int
2788 detect_coding_utf_8 (src, src_end, multibytep)
2789      unsigned char *src, *src_end;
2790      int multibytep;
2791 {
2792   unsigned char c;
2793   int seq_maybe_bytes;
2794   /* Dummy for ONE_MORE_BYTE.  */
2795   struct coding_system dummy_coding;
2796   struct coding_system *coding = &dummy_coding;
2797
2798   while (1)
2799     {
2800       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2801       if (UTF_8_1_OCTET_P (c))
2802         continue;
2803       else if (UTF_8_2_OCTET_LEADING_P (c))
2804         seq_maybe_bytes = 1;
2805       else if (UTF_8_3_OCTET_LEADING_P (c))
2806         seq_maybe_bytes = 2;
2807       else if (UTF_8_4_OCTET_LEADING_P (c))
2808         seq_maybe_bytes = 3;
2809       else if (UTF_8_5_OCTET_LEADING_P (c))
2810         seq_maybe_bytes = 4;
2811       else if (UTF_8_6_OCTET_LEADING_P (c))
2812         seq_maybe_bytes = 5;
2813       else
2814         return 0;
2815
2816       do
2817         {
2818           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2819           if (!UTF_8_EXTRA_OCTET_P (c))
2820             return 0;
2821           seq_maybe_bytes--;
2822         }
2823       while (seq_maybe_bytes > 0);
2824     }
2825
2826  label_end_of_loop:
2827   return CODING_CATEGORY_MASK_UTF_8;
2828 }
2829
2830 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2831    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2832    Little Endian (otherwise).  If it is, return
2833    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2834    else return 0.  */
2835
2836 #define UTF_16_INVALID_P(val)   \
2837   (((val) == 0xFFFE)            \
2838    || ((val) == 0xFFFF))
2839
2840 #define UTF_16_HIGH_SURROGATE_P(val) \
2841   (((val) & 0xD800) == 0xD800)
2842
2843 #define UTF_16_LOW_SURROGATE_P(val) \
2844   (((val) & 0xDC00) == 0xDC00)
2845
2846 static int
2847 detect_coding_utf_16 (src, src_end, multibytep)
2848      unsigned char *src, *src_end;
2849      int multibytep;
2850 {
2851   unsigned char c1, c2;
2852   /* Dummy for TWO_MORE_BYTES.  */
2853   struct coding_system dummy_coding;
2854   struct coding_system *coding = &dummy_coding;
2855
2856   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2857   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2858
2859   if ((c1 == 0xFF) && (c2 == 0xFE))
2860     return CODING_CATEGORY_MASK_UTF_16_LE;
2861   else if ((c1 == 0xFE) && (c2 == 0xFF))
2862     return CODING_CATEGORY_MASK_UTF_16_BE;
2863
2864  label_end_of_loop:
2865   return 0;
2866 }
2867
2868 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2869    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2870
2871 static void
2872 decode_coding_sjis_big5 (coding, source, destination,
2873                          src_bytes, dst_bytes, sjis_p)
2874      struct coding_system *coding;
2875      unsigned char *source, *destination;
2876      int src_bytes, dst_bytes;
2877      int sjis_p;
2878 {
2879   unsigned char *src = source;
2880   unsigned char *src_end = source + src_bytes;
2881   unsigned char *dst = destination;
2882   unsigned char *dst_end = destination + dst_bytes;
2883   /* SRC_BASE remembers the start position in source in each loop.
2884      The loop will be exited when there's not enough source code
2885      (within macro ONE_MORE_BYTE), or when there's not enough
2886      destination area to produce a character (within macro
2887      EMIT_CHAR).  */
2888   unsigned char *src_base;
2889   Lisp_Object translation_table;
2890
2891   if (NILP (Venable_character_translation))
2892     translation_table = Qnil;
2893   else
2894     {
2895       translation_table = coding->translation_table_for_decode;
2896       if (NILP (translation_table))
2897         translation_table = Vstandard_translation_table_for_decode;
2898     }
2899
2900   coding->produced_char = 0;
2901   while (1)
2902     {
2903       int c, charset, c1, c2;
2904
2905       src_base = src;
2906       ONE_MORE_BYTE (c1);
2907
2908       if (c1 < 0x80)
2909         {
2910           charset = CHARSET_ASCII;
2911           if (c1 < 0x20)
2912             {
2913               if (c1 == '\r')
2914                 {
2915                   if (coding->eol_type == CODING_EOL_CRLF)
2916                     {
2917                       ONE_MORE_BYTE (c2);
2918                       if (c2 == '\n')
2919                         c1 = c2;
2920                       else if (coding->mode
2921                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2922                         {
2923                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2924                           goto label_end_of_loop;
2925                         }
2926                       else
2927                         /* To process C2 again, SRC is subtracted by 1.  */
2928                         src--;
2929                     }
2930                   else if (coding->eol_type == CODING_EOL_CR)
2931                     c1 = '\n';
2932                 }
2933               else if (c1 == '\n'
2934                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2935                        && (coding->eol_type == CODING_EOL_CR
2936                            || coding->eol_type == CODING_EOL_CRLF))
2937                 {
2938                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2939                   goto label_end_of_loop;
2940                 }
2941             }
2942         }
2943       else
2944         {
2945           if (sjis_p)
2946             {
2947               if (c1 >= 0xF0)
2948                 goto label_invalid_code;
2949               if (c1 < 0xA0 || c1 >= 0xE0)
2950                 {
2951                   /* SJIS -> JISX0208 */
2952                   ONE_MORE_BYTE (c2);
2953                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2954                     goto label_invalid_code;
2955                   DECODE_SJIS (c1, c2, c1, c2);
2956                   charset = charset_jisx0208;
2957                 }
2958               else
2959                 /* SJIS -> JISX0201-Kana */
2960                 charset = charset_katakana_jisx0201;
2961             }
2962           else
2963             {
2964               /* BIG5 -> Big5 */
2965               if (c1 < 0xA1 || c1 > 0xFE)
2966                 goto label_invalid_code;
2967               ONE_MORE_BYTE (c2);
2968               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2969                 goto label_invalid_code;
2970               DECODE_BIG5 (c1, c2, charset, c1, c2);
2971             }
2972         }
2973
2974       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2975       EMIT_CHAR (c);
2976       continue;
2977
2978     label_invalid_code:
2979       coding->errors++;
2980       src = src_base;
2981       c = *src++;
2982       EMIT_CHAR (c);
2983     }
2984
2985  label_end_of_loop:
2986   coding->consumed = coding->consumed_char = src_base - source;
2987   coding->produced = dst - destination;
2988   return;
2989 }
2990
2991 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2992    This function can encode charsets `ascii', `katakana-jisx0201',
2993    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2994    are sure that all these charsets are registered as official charset
2995    (i.e. do not have extended leading-codes).  Characters of other
2996    charsets are produced without any encoding.  If SJIS_P is 1, encode
2997    SJIS text, else encode BIG5 text.  */
2998
2999 static void
3000 encode_coding_sjis_big5 (coding, source, destination,
3001                          src_bytes, dst_bytes, sjis_p)
3002      struct coding_system *coding;
3003      unsigned char *source, *destination;
3004      int src_bytes, dst_bytes;
3005      int sjis_p;
3006 {
3007   unsigned char *src = source;
3008   unsigned char *src_end = source + src_bytes;
3009   unsigned char *dst = destination;
3010   unsigned char *dst_end = destination + dst_bytes;
3011   /* SRC_BASE remembers the start position in source in each loop.
3012      The loop will be exited when there's not enough source text to
3013      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3014      there's not enough destination area to produce encoded codes
3015      (within macro EMIT_BYTES).  */
3016   unsigned char *src_base;
3017   Lisp_Object translation_table;
3018
3019   if (NILP (Venable_character_translation))
3020     translation_table = Qnil;
3021   else
3022     {
3023       translation_table = coding->translation_table_for_encode;
3024       if (NILP (translation_table))
3025         translation_table = Vstandard_translation_table_for_encode;
3026     }
3027
3028   while (1)
3029     {
3030       int c, charset, c1, c2;
3031
3032       src_base = src;
3033       ONE_MORE_CHAR (c);
3034
3035       /* Now encode the character C.  */
3036       if (SINGLE_BYTE_CHAR_P (c))
3037         {
3038           switch (c)
3039             {
3040             case '\r':
3041               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3042                 {
3043                   EMIT_ONE_BYTE (c);
3044                   break;
3045                 }
3046               c = '\n';
3047             case '\n':
3048               if (coding->eol_type == CODING_EOL_CRLF)
3049                 {
3050                   EMIT_TWO_BYTES ('\r', c);
3051                   break;
3052                 }
3053               else if (coding->eol_type == CODING_EOL_CR)
3054                 c = '\r';
3055             default:
3056               EMIT_ONE_BYTE (c);
3057             }
3058         }
3059       else
3060         {
3061           SPLIT_CHAR (c, charset, c1, c2);
3062           if (sjis_p)
3063             {
3064               if (charset == charset_jisx0208
3065                   || charset == charset_jisx0208_1978)
3066                 {
3067                   ENCODE_SJIS (c1, c2, c1, c2);
3068                   EMIT_TWO_BYTES (c1, c2);
3069                 }
3070               else if (charset == charset_katakana_jisx0201)
3071                 EMIT_ONE_BYTE (c1 | 0x80);
3072               else if (charset == charset_latin_jisx0201)
3073                 EMIT_ONE_BYTE (c1);
3074               else
3075                 /* There's no way other than producing the internal
3076                    codes as is.  */
3077                 EMIT_BYTES (src_base, src);
3078             }
3079           else
3080             {
3081               if (charset == charset_big5_1 || charset == charset_big5_2)
3082                 {
3083                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3084                   EMIT_TWO_BYTES (c1, c2);
3085                 }
3086               else
3087                 /* There's no way other than producing the internal
3088                    codes as is.  */
3089                 EMIT_BYTES (src_base, src);
3090             }
3091         }
3092       coding->consumed_char++;
3093     }
3094
3095  label_end_of_loop:
3096   coding->consumed = src_base - source;
3097   coding->produced = coding->produced_char = dst - destination;
3098 }
3099
3100 \f
3101 /*** 5. CCL handlers ***/
3102
3103 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3104    Check if a text is encoded in a coding system of which
3105    encoder/decoder are written in CCL program.  If it is, return
3106    CODING_CATEGORY_MASK_CCL, else return 0.  */
3107
3108 static int
3109 detect_coding_ccl (src, src_end, multibytep)
3110      unsigned char *src, *src_end;
3111      int multibytep;
3112 {
3113   unsigned char *valid;
3114   int c;
3115   /* Dummy for ONE_MORE_BYTE.  */
3116   struct coding_system dummy_coding;
3117   struct coding_system *coding = &dummy_coding;
3118
3119   /* No coding system is assigned to coding-category-ccl.  */
3120   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3121     return 0;
3122
3123   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3124   while (1)
3125     {
3126       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3127       if (! valid[c])
3128         return 0;
3129     }
3130  label_end_of_loop:
3131   return CODING_CATEGORY_MASK_CCL;
3132 }
3133
3134 \f
3135 /*** 6. End-of-line handlers ***/
3136
3137 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3138
3139 static void
3140 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3141      struct coding_system *coding;
3142      unsigned char *source, *destination;
3143      int src_bytes, dst_bytes;
3144 {
3145   unsigned char *src = source;
3146   unsigned char *dst = destination;
3147   unsigned char *src_end = src + src_bytes;
3148   unsigned char *dst_end = dst + dst_bytes;
3149   Lisp_Object translation_table;
3150   /* SRC_BASE remembers the start position in source in each loop.
3151      The loop will be exited when there's not enough source code
3152      (within macro ONE_MORE_BYTE), or when there's not enough
3153      destination area to produce a character (within macro
3154      EMIT_CHAR).  */
3155   unsigned char *src_base;
3156   int c;
3157
3158   translation_table = Qnil;
3159   switch (coding->eol_type)
3160     {
3161     case CODING_EOL_CRLF:
3162       while (1)
3163         {
3164           src_base = src;
3165           ONE_MORE_BYTE (c);
3166           if (c == '\r')
3167             {
3168               ONE_MORE_BYTE (c);
3169               if (c != '\n')
3170                 {
3171                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3172                     {
3173                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
3174                       goto label_end_of_loop;
3175                     }
3176                   src--;
3177                   c = '\r';
3178                 }
3179             }
3180           else if (c == '\n'
3181                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3182             {
3183               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3184               goto label_end_of_loop;
3185             }
3186           EMIT_CHAR (c);
3187         }
3188       break;
3189
3190     case CODING_EOL_CR:
3191       while (1)
3192         {
3193           src_base = src;
3194           ONE_MORE_BYTE (c);
3195           if (c == '\n')
3196             {
3197               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3198                 {
3199                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3200                   goto label_end_of_loop;
3201                 }
3202             }
3203           else if (c == '\r')
3204             c = '\n';
3205           EMIT_CHAR (c);
3206         }
3207       break;
3208
3209     default:                    /* no need for EOL handling */
3210       while (1)
3211         {
3212           src_base = src;
3213           ONE_MORE_BYTE (c);
3214           EMIT_CHAR (c);
3215         }
3216     }
3217
3218  label_end_of_loop:
3219   coding->consumed = coding->consumed_char = src_base - source;
3220   coding->produced = dst - destination;
3221   return;
3222 }
3223
3224 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3225    format of end-of-line according to `coding->eol_type'.  It also
3226    convert multibyte form 8-bit characers to unibyte if
3227    CODING->src_multibyte is nonzero.  If `coding->mode &
3228    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3229    also means end-of-line.  */
3230
3231 static void
3232 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3233      struct coding_system *coding;
3234      unsigned char *source, *destination;
3235      int src_bytes, dst_bytes;
3236 {
3237   unsigned char *src = source;
3238   unsigned char *dst = destination;
3239   unsigned char *src_end = src + src_bytes;
3240   unsigned char *dst_end = dst + dst_bytes;
3241   Lisp_Object translation_table;
3242   /* SRC_BASE remembers the start position in source in each loop.
3243      The loop will be exited when there's not enough source text to
3244      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3245      there's not enough destination area to produce encoded codes
3246      (within macro EMIT_BYTES).  */
3247   unsigned char *src_base;
3248   int c;
3249   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3250
3251   translation_table = Qnil;
3252   if (coding->src_multibyte
3253       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3254     {
3255       src_end--;
3256       src_bytes--;
3257       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3258     }
3259
3260   if (coding->eol_type == CODING_EOL_CRLF)
3261     {
3262       while (src < src_end)
3263         {
3264           src_base = src;
3265           c = *src++;
3266           if (c >= 0x20)
3267             EMIT_ONE_BYTE (c);
3268           else if (c == '\n' || (c == '\r' && selective_display))
3269             EMIT_TWO_BYTES ('\r', '\n');
3270           else
3271             EMIT_ONE_BYTE (c);
3272         }
3273       src_base = src;
3274     label_end_of_loop:
3275       ;
3276     }
3277   else
3278     {
3279       if (!dst_bytes || src_bytes <= dst_bytes)
3280         {
3281           safe_bcopy (src, dst, src_bytes);
3282           src_base = src_end;
3283           dst += src_bytes;
3284         }
3285       else
3286         {
3287           if (coding->src_multibyte
3288               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3289             dst_bytes--;
3290           safe_bcopy (src, dst, dst_bytes);
3291           src_base = src + dst_bytes;
3292           dst = destination + dst_bytes;
3293           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3294         }
3295       if (coding->eol_type == CODING_EOL_CR)
3296         {
3297           for (src = destination; src < dst; src++)
3298             if (*src == '\n') *src = '\r';
3299         }
3300       else if (selective_display)
3301         {
3302           for (src = destination; src < dst; src++)
3303             if (*src == '\r') *src = '\n';
3304         }
3305     }
3306   if (coding->src_multibyte)
3307     dst = destination + str_as_unibyte (destination, dst - destination);
3308
3309   coding->consumed = src_base - source;
3310   coding->produced = dst - destination;
3311   coding->produced_char = coding->produced;
3312 }
3313
3314 \f
3315 /*** 7. C library functions ***/
3316
3317 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3318    has a property `coding-system'.  The value of this property is a
3319    vector of length 5 (called the coding-vector).  Among elements of
3320    this vector, the first (element[0]) and the fifth (element[4])
3321    carry important information for decoding/encoding.  Before
3322    decoding/encoding, this information should be set in fields of a
3323    structure of type `coding_system'.
3324
3325    The value of the property `coding-system' can be a symbol of another
3326    subsidiary coding-system.  In that case, Emacs gets coding-vector
3327    from that symbol.
3328
3329    `element[0]' contains information to be set in `coding->type'.  The
3330    value and its meaning is as follows:
3331
3332    0 -- coding_type_emacs_mule
3333    1 -- coding_type_sjis
3334    2 -- coding_type_iso2022
3335    3 -- coding_type_big5
3336    4 -- coding_type_ccl encoder/decoder written in CCL
3337    nil -- coding_type_no_conversion
3338    t -- coding_type_undecided (automatic conversion on decoding,
3339                                no-conversion on encoding)
3340
3341    `element[4]' contains information to be set in `coding->flags' and
3342    `coding->spec'.  The meaning varies by `coding->type'.
3343
3344    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3345    of length 32 (of which the first 13 sub-elements are used now).
3346    Meanings of these sub-elements are:
3347
3348    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3349         If the value is an integer of valid charset, the charset is
3350         assumed to be designated to graphic register N initially.
3351
3352         If the value is minus, it is a minus value of charset which
3353         reserves graphic register N, which means that the charset is
3354         not designated initially but should be designated to graphic
3355         register N just before encoding a character in that charset.
3356
3357         If the value is nil, graphic register N is never used on
3358         encoding.
3359
3360    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3361         Each value takes t or nil.  See the section ISO2022 of
3362         `coding.h' for more information.
3363
3364    If `coding->type' is `coding_type_big5', element[4] is t to denote
3365    BIG5-ETen or nil to denote BIG5-HKU.
3366
3367    If `coding->type' takes the other value, element[4] is ignored.
3368
3369    Emacs Lisp's coding systems also carry information about format of
3370    end-of-line in a value of property `eol-type'.  If the value is
3371    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3372    means CODING_EOL_CR.  If it is not integer, it should be a vector
3373    of subsidiary coding systems of which property `eol-type' has one
3374    of the above values.
3375
3376 */
3377
3378 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3379    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3380    is setup so that no conversion is necessary and return -1, else
3381    return 0.  */
3382
3383 int
3384 setup_coding_system (coding_system, coding)
3385      Lisp_Object coding_system;
3386      struct coding_system *coding;
3387 {
3388   Lisp_Object coding_spec, coding_type, eol_type, plist;
3389   Lisp_Object val;
3390
3391   /* At first, zero clear all members.  */
3392   bzero (coding, sizeof (struct coding_system));
3393
3394   /* Initialize some fields required for all kinds of coding systems.  */
3395   coding->symbol = coding_system;
3396   coding->heading_ascii = -1;
3397   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3398   coding->composing = COMPOSITION_DISABLED;
3399   coding->cmp_data = NULL;
3400
3401   if (NILP (coding_system))
3402     goto label_invalid_coding_system;
3403
3404   coding_spec = Fget (coding_system, Qcoding_system);
3405
3406   if (!VECTORP (coding_spec)
3407       || XVECTOR (coding_spec)->size != 5
3408       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3409     goto label_invalid_coding_system;
3410
3411   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3412   if (VECTORP (eol_type))
3413     {
3414       coding->eol_type = CODING_EOL_UNDECIDED;
3415       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3416     }
3417   else if (XFASTINT (eol_type) == 1)
3418     {
3419       coding->eol_type = CODING_EOL_CRLF;
3420       coding->common_flags
3421         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3422     }
3423   else if (XFASTINT (eol_type) == 2)
3424     {
3425       coding->eol_type = CODING_EOL_CR;
3426       coding->common_flags
3427         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3428     }
3429   else
3430     coding->eol_type = CODING_EOL_LF;
3431
3432   coding_type = XVECTOR (coding_spec)->contents[0];
3433   /* Try short cut.  */
3434   if (SYMBOLP (coding_type))
3435     {
3436       if (EQ (coding_type, Qt))
3437         {
3438           coding->type = coding_type_undecided;
3439           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3440         }
3441       else
3442         coding->type = coding_type_no_conversion;
3443       /* Initialize this member.  Any thing other than
3444          CODING_CATEGORY_IDX_UTF_16_BE and
3445          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3446          special treatment in detect_eol.  */
3447       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3448
3449       return 0;
3450     }
3451
3452   /* Get values of coding system properties:
3453      `post-read-conversion', `pre-write-conversion',
3454      `translation-table-for-decode', `translation-table-for-encode'.  */
3455   plist = XVECTOR (coding_spec)->contents[3];
3456   /* Pre & post conversion functions should be disabled if
3457      inhibit_eol_conversion is nozero.  This is the case that a code
3458      conversion function is called while those functions are running.  */
3459   if (! inhibit_pre_post_conversion)
3460     {
3461       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3462       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3463     }
3464   val = Fplist_get (plist, Qtranslation_table_for_decode);
3465   if (SYMBOLP (val))
3466     val = Fget (val, Qtranslation_table_for_decode);
3467   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3468   val = Fplist_get (plist, Qtranslation_table_for_encode);
3469   if (SYMBOLP (val))
3470     val = Fget (val, Qtranslation_table_for_encode);
3471   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3472   val = Fplist_get (plist, Qcoding_category);
3473   if (!NILP (val))
3474     {
3475       val = Fget (val, Qcoding_category_index);
3476       if (INTEGERP (val))
3477         coding->category_idx = XINT (val);
3478       else
3479         goto label_invalid_coding_system;
3480     }
3481   else
3482     goto label_invalid_coding_system;
3483
3484   /* If the coding system has non-nil `composition' property, enable
3485      composition handling.  */
3486   val = Fplist_get (plist, Qcomposition);
3487   if (!NILP (val))
3488     coding->composing = COMPOSITION_NO;
3489
3490   switch (XFASTINT (coding_type))
3491     {
3492     case 0:
3493       coding->type = coding_type_emacs_mule;
3494       coding->common_flags
3495         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3496       coding->composing = COMPOSITION_NO;
3497       if (!NILP (coding->post_read_conversion))
3498         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3499       if (!NILP (coding->pre_write_conversion))
3500         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3501       break;
3502
3503     case 1:
3504       coding->type = coding_type_sjis;
3505       coding->common_flags
3506         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3507       break;
3508
3509     case 2:
3510       coding->type = coding_type_iso2022;
3511       coding->common_flags
3512         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3513       {
3514         Lisp_Object val, temp;
3515         Lisp_Object *flags;
3516         int i, charset, reg_bits = 0;
3517
3518         val = XVECTOR (coding_spec)->contents[4];
3519
3520         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3521           goto label_invalid_coding_system;
3522
3523         flags = XVECTOR (val)->contents;
3524         coding->flags
3525           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3526              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3527              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3528              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3529              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3530              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3531              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3532              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3533              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3534              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3535              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3536              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3537              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3538              );
3539
3540         /* Invoke graphic register 0 to plane 0.  */
3541         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3542         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3543         CODING_SPEC_ISO_INVOCATION (coding, 1)
3544           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3545         /* Not single shifting at first.  */
3546         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3547         /* Beginning of buffer should also be regarded as bol. */
3548         CODING_SPEC_ISO_BOL (coding) = 1;
3549
3550         for (charset = 0; charset <= MAX_CHARSET; charset++)
3551           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3552         val = Vcharset_revision_alist;
3553         while (CONSP (val))
3554           {
3555             charset = get_charset_id (Fcar_safe (XCAR (val)));
3556             if (charset >= 0
3557                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3558                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3559               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3560             val = XCDR (val);
3561           }
3562
3563         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3564            FLAGS[REG] can be one of below:
3565                 integer CHARSET: CHARSET occupies register I,
3566                 t: designate nothing to REG initially, but can be used
3567                   by any charsets,
3568                 list of integer, nil, or t: designate the first
3569                   element (if integer) to REG initially, the remaining
3570                   elements (if integer) is designated to REG on request,
3571                   if an element is t, REG can be used by any charsets,
3572                 nil: REG is never used.  */
3573         for (charset = 0; charset <= MAX_CHARSET; charset++)
3574           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3575             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3576         for (i = 0; i < 4; i++)
3577           {
3578             if (INTEGERP (flags[i])
3579                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3580                 || (charset = get_charset_id (flags[i])) >= 0)
3581               {
3582                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3583                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3584               }
3585             else if (EQ (flags[i], Qt))
3586               {
3587                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3588                 reg_bits |= 1 << i;
3589                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3590               }
3591             else if (CONSP (flags[i]))
3592               {
3593                 Lisp_Object tail;
3594                 tail = flags[i];
3595
3596                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3597                 if (INTEGERP (XCAR (tail))
3598                     && (charset = XINT (XCAR (tail)),
3599                         CHARSET_VALID_P (charset))
3600                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3601                   {
3602                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3603                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3604                   }
3605                 else
3606                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3607                 tail = XCDR (tail);
3608                 while (CONSP (tail))
3609                   {
3610                     if (INTEGERP (XCAR (tail))
3611                         && (charset = XINT (XCAR (tail)),
3612                             CHARSET_VALID_P (charset))
3613                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3614                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3615                         = i;
3616                     else if (EQ (XCAR (tail), Qt))
3617                       reg_bits |= 1 << i;
3618                     tail = XCDR (tail);
3619                   }
3620               }
3621             else
3622               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3623
3624             CODING_SPEC_ISO_DESIGNATION (coding, i)
3625               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3626           }
3627
3628         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3629           {
3630             /* REG 1 can be used only by locking shift in 7-bit env.  */
3631             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3632               reg_bits &= ~2;
3633             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3634               /* Without any shifting, only REG 0 and 1 can be used.  */
3635               reg_bits &= 3;
3636           }
3637
3638         if (reg_bits)
3639           for (charset = 0; charset <= MAX_CHARSET; charset++)
3640             {
3641               if (CHARSET_VALID_P (charset)
3642                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3643                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3644                 {
3645                   /* There exist some default graphic registers to be
3646                      used by CHARSET.  */
3647
3648                   /* We had better avoid designating a charset of
3649                      CHARS96 to REG 0 as far as possible.  */
3650                   if (CHARSET_CHARS (charset) == 96)
3651                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3652                       = (reg_bits & 2
3653                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3654                   else
3655                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3656                       = (reg_bits & 1
3657                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3658                 }
3659             }
3660       }
3661       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3662       coding->spec.iso2022.last_invalid_designation_register = -1;
3663       break;
3664
3665     case 3:
3666       coding->type = coding_type_big5;
3667       coding->common_flags
3668         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3669       coding->flags
3670         = (NILP (XVECTOR (coding_spec)->contents[4])
3671            ? CODING_FLAG_BIG5_HKU
3672            : CODING_FLAG_BIG5_ETEN);
3673       break;
3674
3675     case 4:
3676       coding->type = coding_type_ccl;
3677       coding->common_flags
3678         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3679       {
3680         val = XVECTOR (coding_spec)->contents[4];
3681         if (! CONSP (val)
3682             || setup_ccl_program (&(coding->spec.ccl.decoder),
3683                                   XCAR (val)) < 0
3684             || setup_ccl_program (&(coding->spec.ccl.encoder),
3685                                   XCDR (val)) < 0)
3686           goto label_invalid_coding_system;
3687
3688         bzero (coding->spec.ccl.valid_codes, 256);
3689         val = Fplist_get (plist, Qvalid_codes);
3690         if (CONSP (val))
3691           {
3692             Lisp_Object this;
3693
3694             for (; CONSP (val); val = XCDR (val))
3695               {
3696                 this = XCAR (val);
3697                 if (INTEGERP (this)
3698                     && XINT (this) >= 0 && XINT (this) < 256)
3699                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3700                 else if (CONSP (this)
3701                          && INTEGERP (XCAR (this))
3702                          && INTEGERP (XCDR (this)))
3703                   {
3704                     int start = XINT (XCAR (this));
3705                     int end = XINT (XCDR (this));
3706
3707                     if (start >= 0 && start <= end && end < 256)
3708                       while (start <= end)
3709                         coding->spec.ccl.valid_codes[start++] = 1;
3710                   }
3711               }
3712           }
3713       }
3714       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3715       coding->spec.ccl.cr_carryover = 0;
3716       coding->spec.ccl.eight_bit_carryover[0] = 0;
3717       break;
3718
3719     case 5:
3720       coding->type = coding_type_raw_text;
3721       break;
3722
3723     default:
3724       goto label_invalid_coding_system;
3725     }
3726   return 0;
3727
3728  label_invalid_coding_system:
3729   coding->type = coding_type_no_conversion;
3730   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3731   coding->common_flags = 0;
3732   coding->eol_type = CODING_EOL_LF;
3733   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3734   return -1;
3735 }
3736
3737 /* Free memory blocks allocated for storing composition information.  */
3738
3739 void
3740 coding_free_composition_data (coding)
3741      struct coding_system *coding;
3742 {
3743   struct composition_data *cmp_data = coding->cmp_data, *next;
3744
3745   if (!cmp_data)
3746     return;
3747   /* Memory blocks are chained.  At first, rewind to the first, then,
3748      free blocks one by one.  */
3749   while (cmp_data->prev)
3750     cmp_data = cmp_data->prev;
3751   while (cmp_data)
3752     {
3753       next = cmp_data->next;
3754       xfree (cmp_data);
3755       cmp_data = next;
3756     }
3757   coding->cmp_data = NULL;
3758 }
3759
3760 /* Set `char_offset' member of all memory blocks pointed by
3761    coding->cmp_data to POS.  */
3762
3763 void
3764 coding_adjust_composition_offset (coding, pos)
3765      struct coding_system *coding;
3766      int pos;
3767 {
3768   struct composition_data *cmp_data;
3769
3770   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3771     cmp_data->char_offset = pos;
3772 }
3773
3774 /* Setup raw-text or one of its subsidiaries in the structure
3775    coding_system CODING according to the already setup value eol_type
3776    in CODING.  CODING should be setup for some coding system in
3777    advance.  */
3778
3779 void
3780 setup_raw_text_coding_system (coding)
3781      struct coding_system *coding;
3782 {
3783   if (coding->type != coding_type_raw_text)
3784     {
3785       coding->symbol = Qraw_text;
3786       coding->type = coding_type_raw_text;
3787       if (coding->eol_type != CODING_EOL_UNDECIDED)
3788         {
3789           Lisp_Object subsidiaries;
3790           subsidiaries = Fget (Qraw_text, Qeol_type);
3791
3792           if (VECTORP (subsidiaries)
3793               && XVECTOR (subsidiaries)->size == 3)
3794             coding->symbol
3795               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3796         }
3797       setup_coding_system (coding->symbol, coding);
3798     }
3799   return;
3800 }
3801
3802 /* Emacs has a mechanism to automatically detect a coding system if it
3803    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3804    it's impossible to distinguish some coding systems accurately
3805    because they use the same range of codes.  So, at first, coding
3806    systems are categorized into 7, those are:
3807
3808    o coding-category-emacs-mule
3809
3810         The category for a coding system which has the same code range
3811         as Emacs' internal format.  Assigned the coding-system (Lisp
3812         symbol) `emacs-mule' by default.
3813
3814    o coding-category-sjis
3815
3816         The category for a coding system which has the same code range
3817         as SJIS.  Assigned the coding-system (Lisp
3818         symbol) `japanese-shift-jis' by default.
3819
3820    o coding-category-iso-7
3821
3822         The category for a coding system which has the same code range
3823         as ISO2022 of 7-bit environment.  This doesn't use any locking
3824         shift and single shift functions.  This can encode/decode all
3825         charsets.  Assigned the coding-system (Lisp symbol)
3826         `iso-2022-7bit' by default.
3827
3828    o coding-category-iso-7-tight
3829
3830         Same as coding-category-iso-7 except that this can
3831         encode/decode only the specified charsets.
3832
3833    o coding-category-iso-8-1
3834
3835         The category for a coding system which has the same code range
3836         as ISO2022 of 8-bit environment and graphic plane 1 used only
3837         for DIMENSION1 charset.  This doesn't use any locking shift
3838         and single shift functions.  Assigned the coding-system (Lisp
3839         symbol) `iso-latin-1' by default.
3840
3841    o coding-category-iso-8-2
3842
3843         The category for a coding system which has the same code range
3844         as ISO2022 of 8-bit environment and graphic plane 1 used only
3845         for DIMENSION2 charset.  This doesn't use any locking shift
3846         and single shift functions.  Assigned the coding-system (Lisp
3847         symbol) `japanese-iso-8bit' by default.
3848
3849    o coding-category-iso-7-else
3850
3851         The category for a coding system which has the same code range
3852         as ISO2022 of 7-bit environemnt but uses locking shift or
3853         single shift functions.  Assigned the coding-system (Lisp
3854         symbol) `iso-2022-7bit-lock' by default.
3855
3856    o coding-category-iso-8-else
3857
3858         The category for a coding system which has the same code range
3859         as ISO2022 of 8-bit environemnt but uses locking shift or
3860         single shift functions.  Assigned the coding-system (Lisp
3861         symbol) `iso-2022-8bit-ss2' by default.
3862
3863    o coding-category-big5
3864
3865         The category for a coding system which has the same code range
3866         as BIG5.  Assigned the coding-system (Lisp symbol)
3867         `cn-big5' by default.
3868
3869    o coding-category-utf-8
3870
3871         The category for a coding system which has the same code range
3872         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3873         symbol) `utf-8' by default.
3874
3875    o coding-category-utf-16-be
3876
3877         The category for a coding system in which a text has an
3878         Unicode signature (cf. Unicode Standard) in the order of BIG
3879         endian at the head.  Assigned the coding-system (Lisp symbol)
3880         `utf-16-be' by default.
3881
3882    o coding-category-utf-16-le
3883
3884         The category for a coding system in which a text has an
3885         Unicode signature (cf. Unicode Standard) in the order of
3886         LITTLE endian at the head.  Assigned the coding-system (Lisp
3887         symbol) `utf-16-le' by default.
3888
3889    o coding-category-ccl
3890
3891         The category for a coding system of which encoder/decoder is
3892         written in CCL programs.  The default value is nil, i.e., no
3893         coding system is assigned.
3894
3895    o coding-category-binary
3896
3897         The category for a coding system not categorized in any of the
3898         above.  Assigned the coding-system (Lisp symbol)
3899         `no-conversion' by default.
3900
3901    Each of them is a Lisp symbol and the value is an actual
3902    `coding-system' (this is also a Lisp symbol) assigned by a user.
3903    What Emacs does actually is to detect a category of coding system.
3904    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3905    decide a single possible category, it selects a category of the
3906    highest priority.  Priorities of categories are also specified by a
3907    user in a Lisp variable `coding-category-list'.
3908
3909 */
3910
3911 static
3912 int ascii_skip_code[256];
3913
3914 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3915    If it detects possible coding systems, return an integer in which
3916    appropriate flag bits are set.  Flag bits are defined by macros
3917    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3918    it should point the table `coding_priorities'.  In that case, only
3919    the flag bit for a coding system of the highest priority is set in
3920    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3921    range 0x80..0x9F are in multibyte form.
3922
3923    How many ASCII characters are at the head is returned as *SKIP.  */
3924
3925 static int
3926 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3927      unsigned char *source;
3928      int src_bytes, *priorities, *skip;
3929      int multibytep;
3930 {
3931   register unsigned char c;
3932   unsigned char *src = source, *src_end = source + src_bytes;
3933   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3934   int i;
3935
3936   /* At first, skip all ASCII characters and control characters except
3937      for three ISO2022 specific control characters.  */
3938   ascii_skip_code[ISO_CODE_SO] = 0;
3939   ascii_skip_code[ISO_CODE_SI] = 0;
3940   ascii_skip_code[ISO_CODE_ESC] = 0;
3941
3942  label_loop_detect_coding:
3943   while (src < src_end && ascii_skip_code[*src]) src++;
3944   *skip = src - source;
3945
3946   if (src >= src_end)
3947     /* We found nothing other than ASCII.  There's nothing to do.  */
3948     return 0;
3949
3950   c = *src;
3951   /* The text seems to be encoded in some multilingual coding system.
3952      Now, try to find in which coding system the text is encoded.  */
3953   if (c < 0x80)
3954     {
3955       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3956       /* C is an ISO2022 specific control code of C0.  */
3957       mask = detect_coding_iso2022 (src, src_end, multibytep);
3958       if (mask == 0)
3959         {
3960           /* No valid ISO2022 code follows C.  Try again.  */
3961           src++;
3962           if (c == ISO_CODE_ESC)
3963             ascii_skip_code[ISO_CODE_ESC] = 1;
3964           else
3965             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3966           goto label_loop_detect_coding;
3967         }
3968       if (priorities)
3969         {
3970           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3971             {
3972               if (mask & priorities[i])
3973                 return priorities[i];
3974             }
3975           return CODING_CATEGORY_MASK_RAW_TEXT;
3976         }
3977     }
3978   else
3979     {
3980       int try;
3981
3982       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3983         c = *src++ - 0x20;
3984
3985       if (c < 0xA0)
3986         {
3987           /* C is the first byte of SJIS character code,
3988              or a leading-code of Emacs' internal format (emacs-mule),
3989              or the first byte of UTF-16.  */
3990           try = (CODING_CATEGORY_MASK_SJIS
3991                   | CODING_CATEGORY_MASK_EMACS_MULE
3992                   | CODING_CATEGORY_MASK_UTF_16_BE
3993                   | CODING_CATEGORY_MASK_UTF_16_LE);
3994
3995           /* Or, if C is a special latin extra code,
3996              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3997              or is an ISO2022 control-sequence-introducer (CSI),
3998              we should also consider the possibility of ISO2022 codings.  */
3999           if ((VECTORP (Vlatin_extra_code_table)
4000                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4001               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4002               || (c == ISO_CODE_CSI
4003                   && (src < src_end
4004                       && (*src == ']'
4005                           || ((*src == '0' || *src == '1' || *src == '2')
4006                               && src + 1 < src_end
4007                               && src[1] == ']')))))
4008             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4009                      | CODING_CATEGORY_MASK_ISO_8BIT);
4010         }
4011       else
4012         /* C is a character of ISO2022 in graphic plane right,
4013            or a SJIS's 1-byte character code (i.e. JISX0201),
4014            or the first byte of BIG5's 2-byte code,
4015            or the first byte of UTF-8/16.  */
4016         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4017                 | CODING_CATEGORY_MASK_ISO_8BIT
4018                 | CODING_CATEGORY_MASK_SJIS
4019                 | CODING_CATEGORY_MASK_BIG5
4020                 | CODING_CATEGORY_MASK_UTF_8
4021                 | CODING_CATEGORY_MASK_UTF_16_BE
4022                 | CODING_CATEGORY_MASK_UTF_16_LE);
4023
4024       /* Or, we may have to consider the possibility of CCL.  */
4025       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4026           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4027               ->spec.ccl.valid_codes)[c])
4028         try |= CODING_CATEGORY_MASK_CCL;
4029
4030       mask = 0;
4031       utf16_examined_p = iso2022_examined_p = 0;
4032       if (priorities)
4033         {
4034           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4035             {
4036               if (!iso2022_examined_p
4037                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4038                 {
4039                   mask |= detect_coding_iso2022 (src, src_end);
4040                   iso2022_examined_p = 1;
4041                 }
4042               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4043                 mask |= detect_coding_sjis (src, src_end, multibytep);
4044               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4045                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4046               else if (!utf16_examined_p
4047                        && (priorities[i] & try &
4048                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4049                 {
4050                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4051                   utf16_examined_p = 1;
4052                 }
4053               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4054                 mask |= detect_coding_big5 (src, src_end, multibytep);
4055               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4056                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4057               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4058                 mask |= detect_coding_ccl (src, src_end, multibytep);
4059               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4060                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4061               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4062                 mask |= CODING_CATEGORY_MASK_BINARY;
4063               if (mask & priorities[i])
4064                 return priorities[i];
4065             }
4066           return CODING_CATEGORY_MASK_RAW_TEXT;
4067         }
4068       if (try & CODING_CATEGORY_MASK_ISO)
4069         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4070       if (try & CODING_CATEGORY_MASK_SJIS)
4071         mask |= detect_coding_sjis (src, src_end, multibytep);
4072       if (try & CODING_CATEGORY_MASK_BIG5)
4073         mask |= detect_coding_big5 (src, src_end, multibytep);
4074       if (try & CODING_CATEGORY_MASK_UTF_8)
4075         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4076       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4077         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4078       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4079         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4080       if (try & CODING_CATEGORY_MASK_CCL)
4081         mask |= detect_coding_ccl (src, src_end, multibytep);
4082     }
4083   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4084 }
4085
4086 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4087    The information of the detected coding system is set in CODING.  */
4088
4089 void
4090 detect_coding (coding, src, src_bytes)
4091      struct coding_system *coding;
4092      unsigned char *src;
4093      int src_bytes;
4094 {
4095   unsigned int idx;
4096   int skip, mask;
4097   Lisp_Object val;
4098
4099   val = Vcoding_category_list;
4100   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4101                              coding->src_multibyte);
4102   coding->heading_ascii = skip;
4103
4104   if (!mask) return;
4105
4106   /* We found a single coding system of the highest priority in MASK.  */
4107   idx = 0;
4108   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4109   if (! mask)
4110     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4111
4112   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
4113
4114   if (coding->eol_type != CODING_EOL_UNDECIDED)
4115     {
4116       Lisp_Object tmp;
4117
4118       tmp = Fget (val, Qeol_type);
4119       if (VECTORP (tmp))
4120         val = XVECTOR (tmp)->contents[coding->eol_type];
4121     }
4122
4123   /* Setup this new coding system while preserving some slots.  */
4124   {
4125     int src_multibyte = coding->src_multibyte;
4126     int dst_multibyte = coding->dst_multibyte;
4127
4128     setup_coding_system (val, coding);
4129     coding->src_multibyte = src_multibyte;
4130     coding->dst_multibyte = dst_multibyte;
4131     coding->heading_ascii = skip;
4132   }
4133 }
4134
4135 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4136    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4137    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4138
4139    How many non-eol characters are at the head is returned as *SKIP.  */
4140
4141 #define MAX_EOL_CHECK_COUNT 3
4142
4143 static int
4144 detect_eol_type (source, src_bytes, skip)
4145      unsigned char *source;
4146      int src_bytes, *skip;
4147 {
4148   unsigned char *src = source, *src_end = src + src_bytes;
4149   unsigned char c;
4150   int total = 0;                /* How many end-of-lines are found so far.  */
4151   int eol_type = CODING_EOL_UNDECIDED;
4152   int this_eol_type;
4153
4154   *skip = 0;
4155
4156   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4157     {
4158       c = *src++;
4159       if (c == '\n' || c == '\r')
4160         {
4161           if (*skip == 0)
4162             *skip = src - 1 - source;
4163           total++;
4164           if (c == '\n')
4165             this_eol_type = CODING_EOL_LF;
4166           else if (src >= src_end || *src != '\n')
4167             this_eol_type = CODING_EOL_CR;
4168           else
4169             this_eol_type = CODING_EOL_CRLF, src++;
4170
4171           if (eol_type == CODING_EOL_UNDECIDED)
4172             /* This is the first end-of-line.  */
4173             eol_type = this_eol_type;
4174           else if (eol_type != this_eol_type)
4175             {
4176               /* The found type is different from what found before.  */
4177               eol_type = CODING_EOL_INCONSISTENT;
4178               break;
4179             }
4180         }
4181     }
4182
4183   if (*skip == 0)
4184     *skip = src_end - source;
4185   return eol_type;
4186 }
4187
4188 /* Like detect_eol_type, but detect EOL type in 2-octet
4189    big-endian/little-endian format for coding systems utf-16-be and
4190    utf-16-le.  */
4191
4192 static int
4193 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4194      unsigned char *source;
4195      int src_bytes, *skip, big_endian_p;
4196 {
4197   unsigned char *src = source, *src_end = src + src_bytes;
4198   unsigned int c1, c2;
4199   int total = 0;                /* How many end-of-lines are found so far.  */
4200   int eol_type = CODING_EOL_UNDECIDED;
4201   int this_eol_type;
4202   int msb, lsb;
4203
4204   if (big_endian_p)
4205     msb = 0, lsb = 1;
4206   else
4207     msb = 1, lsb = 0;
4208
4209   *skip = 0;
4210
4211   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4212     {
4213       c1 = (src[msb] << 8) | (src[lsb]);
4214       src += 2;
4215
4216       if (c1 == '\n' || c1 == '\r')
4217         {
4218           if (*skip == 0)
4219             *skip = src - 2 - source;
4220           total++;
4221           if (c1 == '\n')
4222             {
4223               this_eol_type = CODING_EOL_LF;
4224             }
4225           else
4226             {
4227               if ((src + 1) >= src_end)
4228                 {
4229                   this_eol_type = CODING_EOL_CR;
4230                 }
4231               else
4232                 {
4233                   c2 = (src[msb] << 8) | (src[lsb]);
4234                   if (c2 == '\n')
4235                     this_eol_type = CODING_EOL_CRLF, src += 2;
4236                   else
4237                     this_eol_type = CODING_EOL_CR;
4238                 }
4239             }
4240
4241           if (eol_type == CODING_EOL_UNDECIDED)
4242             /* This is the first end-of-line.  */
4243             eol_type = this_eol_type;
4244           else if (eol_type != this_eol_type)
4245             {
4246               /* The found type is different from what found before.  */
4247               eol_type = CODING_EOL_INCONSISTENT;
4248               break;
4249             }
4250         }
4251     }
4252
4253   if (*skip == 0)
4254     *skip = src_end - source;
4255   return eol_type;
4256 }
4257
4258 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4259    is encoded.  If it detects an appropriate format of end-of-line, it
4260    sets the information in *CODING.  */
4261
4262 void
4263 detect_eol (coding, src, src_bytes)
4264      struct coding_system *coding;
4265      unsigned char *src;
4266      int src_bytes;
4267 {
4268   Lisp_Object val;
4269   int skip;
4270   int eol_type;
4271
4272   switch (coding->category_idx)
4273     {
4274     case CODING_CATEGORY_IDX_UTF_16_BE:
4275       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4276       break;
4277     case CODING_CATEGORY_IDX_UTF_16_LE:
4278       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4279       break;
4280     default:
4281       eol_type = detect_eol_type (src, src_bytes, &skip);
4282       break;
4283     }
4284
4285   if (coding->heading_ascii > skip)
4286     coding->heading_ascii = skip;
4287   else
4288     skip = coding->heading_ascii;
4289
4290   if (eol_type == CODING_EOL_UNDECIDED)
4291     return;
4292   if (eol_type == CODING_EOL_INCONSISTENT)
4293     {
4294 #if 0
4295       /* This code is suppressed until we find a better way to
4296          distinguish raw text file and binary file.  */
4297
4298       /* If we have already detected that the coding is raw-text, the
4299          coding should actually be no-conversion.  */
4300       if (coding->type == coding_type_raw_text)
4301         {
4302           setup_coding_system (Qno_conversion, coding);
4303           return;
4304         }
4305       /* Else, let's decode only text code anyway.  */
4306 #endif /* 0 */
4307       eol_type = CODING_EOL_LF;
4308     }
4309
4310   val = Fget (coding->symbol, Qeol_type);
4311   if (VECTORP (val) && XVECTOR (val)->size == 3)
4312     {
4313       int src_multibyte = coding->src_multibyte;
4314       int dst_multibyte = coding->dst_multibyte;
4315
4316       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4317       coding->src_multibyte = src_multibyte;
4318       coding->dst_multibyte = dst_multibyte;
4319       coding->heading_ascii = skip;
4320     }
4321 }
4322
4323 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4324
4325 #define DECODING_BUFFER_MAG(coding)                     \
4326   (coding->type == coding_type_iso2022                  \
4327    ? 3                                                  \
4328    : (coding->type == coding_type_ccl                   \
4329       ? coding->spec.ccl.decoder.buf_magnification      \
4330       : 2))
4331
4332 /* Return maximum size (bytes) of a buffer enough for decoding
4333    SRC_BYTES of text encoded in CODING.  */
4334
4335 int
4336 decoding_buffer_size (coding, src_bytes)
4337      struct coding_system *coding;
4338      int src_bytes;
4339 {
4340   return (src_bytes * DECODING_BUFFER_MAG (coding)
4341           + CONVERSION_BUFFER_EXTRA_ROOM);
4342 }
4343
4344 /* Return maximum size (bytes) of a buffer enough for encoding
4345    SRC_BYTES of text to CODING.  */
4346
4347 int
4348 encoding_buffer_size (coding, src_bytes)
4349      struct coding_system *coding;
4350      int src_bytes;
4351 {
4352   int magnification;
4353
4354   if (coding->type == coding_type_ccl)
4355     magnification = coding->spec.ccl.encoder.buf_magnification;
4356   else if (CODING_REQUIRE_ENCODING (coding))
4357     magnification = 3;
4358   else
4359     magnification = 1;
4360
4361   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4362 }
4363
4364 /* Working buffer for code conversion.  */
4365 struct conversion_buffer
4366 {
4367   int size;                     /* size of data.  */
4368   int on_stack;                 /* 1 if allocated by alloca.  */
4369   unsigned char *data;
4370 };
4371
4372 /* Don't use alloca for allocating memory space larger than this, lest
4373    we overflow their stack.  */
4374 #define MAX_ALLOCA 16*1024
4375
4376 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4377 #define allocate_conversion_buffer(buf, len)            \
4378   do {                                                  \
4379     if (len < MAX_ALLOCA)                               \
4380       {                                                 \
4381         buf.data = (unsigned char *) alloca (len);      \
4382         buf.on_stack = 1;                               \
4383       }                                                 \
4384     else                                                \
4385       {                                                 \
4386         buf.data = (unsigned char *) xmalloc (len);     \
4387         buf.on_stack = 0;                               \
4388       }                                                 \
4389     buf.size = len;                                     \
4390   } while (0)
4391
4392 /* Double the allocated memory for *BUF.  */
4393 static void
4394 extend_conversion_buffer (buf)
4395      struct conversion_buffer *buf;
4396 {
4397   if (buf->on_stack)
4398     {
4399       unsigned char *save = buf->data;
4400       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4401       bcopy (save, buf->data, buf->size);
4402       buf->on_stack = 0;
4403     }
4404   else
4405     {
4406       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4407     }
4408   buf->size *= 2;
4409 }
4410
4411 /* Free the allocated memory for BUF if it is not on stack.  */
4412 static void
4413 free_conversion_buffer (buf)
4414      struct conversion_buffer *buf;
4415 {
4416   if (!buf->on_stack)
4417     xfree (buf->data);
4418 }
4419
4420 int
4421 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4422      struct coding_system *coding;
4423      unsigned char *source, *destination;
4424      int src_bytes, dst_bytes, encodep;
4425 {
4426   struct ccl_program *ccl
4427     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4428   unsigned char *dst = destination;
4429
4430   ccl->suppress_error = coding->suppress_error;
4431   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4432   if (encodep)
4433     {
4434       /* On encoding, EOL format is converted within ccl_driver.  For
4435          that, setup proper information in the structure CCL.  */
4436       ccl->eol_type = coding->eol_type;
4437       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4438         ccl->eol_type = CODING_EOL_LF;
4439       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4440     }
4441   ccl->multibyte = coding->src_multibyte;
4442   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4443     {
4444       /* Move carryover bytes to DESTINATION.  */
4445       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4446       while (*p)
4447         *dst++ = *p++;
4448       coding->spec.ccl.eight_bit_carryover[0] = 0;
4449       if (dst_bytes)
4450         dst_bytes -= dst - destination;
4451     }
4452
4453   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4454                                   &(coding->consumed))
4455                       + dst - destination);
4456
4457   if (encodep)
4458     {
4459       coding->produced_char = coding->produced;
4460       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4461     }
4462   else
4463     {
4464       /* On decoding, the destination should always multibyte.  But,
4465          CCL program might have been generated an invalid multibyte
4466          sequence.  Here we make such a sequence valid as
4467          multibyte.  */
4468       int bytes
4469         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4470
4471       if ((coding->consumed < src_bytes
4472            || !ccl->last_block)
4473           && coding->produced >= 1
4474           && destination[coding->produced - 1] >= 0x80)
4475         {
4476           /* We should not convert the tailing 8-bit codes to
4477              multibyte form even if they doesn't form a valid
4478              multibyte sequence.  They may form a valid sequence in
4479              the next call.  */
4480           int carryover = 0;
4481
4482           if (destination[coding->produced - 1] < 0xA0)
4483             carryover = 1;
4484           else if (coding->produced >= 2)
4485             {
4486               if (destination[coding->produced - 2] >= 0x80)
4487                 {
4488                   if (destination[coding->produced - 2] < 0xA0)
4489                     carryover = 2;
4490                   else if (coding->produced >= 3
4491                            && destination[coding->produced - 3] >= 0x80
4492                            && destination[coding->produced - 3] < 0xA0)
4493                     carryover = 3;
4494                 }
4495             }
4496           if (carryover > 0)
4497             {
4498               BCOPY_SHORT (destination + coding->produced - carryover,
4499                            coding->spec.ccl.eight_bit_carryover,
4500                            carryover);
4501               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4502               coding->produced -= carryover;
4503             }
4504         }
4505       coding->produced = str_as_multibyte (destination, bytes,
4506                                            coding->produced,
4507                                            &(coding->produced_char));
4508     }
4509
4510   switch (ccl->status)
4511     {
4512     case CCL_STAT_SUSPEND_BY_SRC:
4513       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4514       break;
4515     case CCL_STAT_SUSPEND_BY_DST:
4516       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4517       break;
4518     case CCL_STAT_QUIT:
4519     case CCL_STAT_INVALID_CMD:
4520       coding->result = CODING_FINISH_INTERRUPT;
4521       break;
4522     default:
4523       coding->result = CODING_FINISH_NORMAL;
4524       break;
4525     }
4526   return coding->result;
4527 }
4528
4529 /* Decode EOL format of the text at PTR of BYTES length destructively
4530    according to CODING->eol_type.  This is called after the CCL
4531    program produced a decoded text at PTR.  If we do CRLF->LF
4532    conversion, update CODING->produced and CODING->produced_char.  */
4533
4534 static void
4535 decode_eol_post_ccl (coding, ptr, bytes)
4536      struct coding_system *coding;
4537      unsigned char *ptr;
4538      int bytes;
4539 {
4540   Lisp_Object val, saved_coding_symbol;
4541   unsigned char *pend = ptr + bytes;
4542   int dummy;
4543
4544   /* Remember the current coding system symbol.  We set it back when
4545      an inconsistent EOL is found so that `last-coding-system-used' is
4546      set to the coding system that doesn't specify EOL conversion.  */
4547   saved_coding_symbol = coding->symbol;
4548
4549   coding->spec.ccl.cr_carryover = 0;
4550   if (coding->eol_type == CODING_EOL_UNDECIDED)
4551     {
4552       /* Here, to avoid the call of setup_coding_system, we directly
4553          call detect_eol_type.  */
4554       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4555       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4556         coding->eol_type = CODING_EOL_LF;
4557       if (coding->eol_type != CODING_EOL_UNDECIDED)
4558         {
4559           val = Fget (coding->symbol, Qeol_type);
4560           if (VECTORP (val) && XVECTOR (val)->size == 3)
4561             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4562         }
4563       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4564     }
4565
4566   if (coding->eol_type == CODING_EOL_LF
4567       || coding->eol_type == CODING_EOL_UNDECIDED)
4568     {
4569       /* We have nothing to do.  */
4570       ptr = pend;
4571     }
4572   else if (coding->eol_type == CODING_EOL_CRLF)
4573     {
4574       unsigned char *pstart = ptr, *p = ptr;
4575
4576       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4577           && *(pend - 1) == '\r')
4578         {
4579           /* If the last character is CR, we can't handle it here
4580              because LF will be in the not-yet-decoded source text.
4581              Recorded that the CR is not yet processed.  */
4582           coding->spec.ccl.cr_carryover = 1;
4583           coding->produced--;
4584           coding->produced_char--;
4585           pend--;
4586         }
4587       while (ptr < pend)
4588         {
4589           if (*ptr == '\r')
4590             {
4591               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4592                 {
4593                   *p++ = '\n';
4594                   ptr += 2;
4595                 }
4596               else
4597                 {
4598                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4599                     goto undo_eol_conversion;
4600                   *p++ = *ptr++;
4601                 }
4602             }
4603           else if (*ptr == '\n'
4604                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4605             goto undo_eol_conversion;
4606           else
4607             *p++ = *ptr++;
4608           continue;
4609
4610         undo_eol_conversion:
4611           /* We have faced with inconsistent EOL format at PTR.
4612              Convert all LFs before PTR back to CRLFs.  */
4613           for (p--, ptr--; p >= pstart; p--)
4614             {
4615               if (*p == '\n')
4616                 *ptr-- = '\n', *ptr-- = '\r';
4617               else
4618                 *ptr-- = *p;
4619             }
4620           /*  If carryover is recorded, cancel it because we don't
4621               convert CRLF anymore.  */
4622           if (coding->spec.ccl.cr_carryover)
4623             {
4624               coding->spec.ccl.cr_carryover = 0;
4625               coding->produced++;
4626               coding->produced_char++;
4627               pend++;
4628             }
4629           p = ptr = pend;
4630           coding->eol_type = CODING_EOL_LF;
4631           coding->symbol = saved_coding_symbol;
4632         }
4633       if (p < pend)
4634         {
4635           /* As each two-byte sequence CRLF was converted to LF, (PEND
4636              - P) is the number of deleted characters.  */
4637           coding->produced -= pend - p;
4638           coding->produced_char -= pend - p;
4639         }
4640     }
4641   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4642     {
4643       unsigned char *p = ptr;
4644
4645       for (; ptr < pend; ptr++)
4646         {
4647           if (*ptr == '\r')
4648             *ptr = '\n';
4649           else if (*ptr == '\n'
4650                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4651             {
4652               for (; p < ptr; p++)
4653                 {
4654                   if (*p == '\n')
4655                     *p = '\r';
4656                 }
4657               ptr = pend;
4658               coding->eol_type = CODING_EOL_LF;
4659               coding->symbol = saved_coding_symbol;
4660             }
4661         }
4662     }
4663 }
4664
4665 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4666    decoding, it may detect coding system and format of end-of-line if
4667    those are not yet decided.  The source should be unibyte, the
4668    result is multibyte if CODING->dst_multibyte is nonzero, else
4669    unibyte.  */
4670
4671 int
4672 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4673      struct coding_system *coding;
4674      unsigned char *source, *destination;
4675      int src_bytes, dst_bytes;
4676 {
4677   if (coding->type == coding_type_undecided)
4678     detect_coding (coding, source, src_bytes);
4679
4680   if (coding->eol_type == CODING_EOL_UNDECIDED
4681       && coding->type != coding_type_ccl)
4682     {
4683       detect_eol (coding, source, src_bytes);
4684       /* We had better recover the original eol format if we
4685          encounter an inconsitent eol format while decoding.  */
4686       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4687     }
4688
4689   coding->produced = coding->produced_char = 0;
4690   coding->consumed = coding->consumed_char = 0;
4691   coding->errors = 0;
4692   coding->result = CODING_FINISH_NORMAL;
4693
4694   switch (coding->type)
4695     {
4696     case coding_type_sjis:
4697       decode_coding_sjis_big5 (coding, source, destination,
4698                                src_bytes, dst_bytes, 1);
4699       break;
4700
4701     case coding_type_iso2022:
4702       decode_coding_iso2022 (coding, source, destination,
4703                              src_bytes, dst_bytes);
4704       break;
4705
4706     case coding_type_big5:
4707       decode_coding_sjis_big5 (coding, source, destination,
4708                                src_bytes, dst_bytes, 0);
4709       break;
4710
4711     case coding_type_emacs_mule:
4712       decode_coding_emacs_mule (coding, source, destination,
4713                                 src_bytes, dst_bytes);
4714       break;
4715
4716     case coding_type_ccl:
4717       if (coding->spec.ccl.cr_carryover)
4718         {
4719           /* Set the CR which is not processed by the previous call of
4720              decode_eol_post_ccl in DESTINATION.  */
4721           *destination = '\r';
4722           coding->produced++;
4723           coding->produced_char++;
4724           dst_bytes--;
4725         }
4726       ccl_coding_driver (coding, source,
4727                          destination + coding->spec.ccl.cr_carryover,
4728                          src_bytes, dst_bytes, 0);
4729       if (coding->eol_type != CODING_EOL_LF)
4730         decode_eol_post_ccl (coding, destination, coding->produced);
4731       break;
4732
4733     default:
4734       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4735     }
4736
4737   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4738       && coding->mode & CODING_MODE_LAST_BLOCK
4739       && coding->consumed == src_bytes)
4740     coding->result = CODING_FINISH_NORMAL;
4741
4742   if (coding->mode & CODING_MODE_LAST_BLOCK
4743       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4744     {
4745       unsigned char *src = source + coding->consumed;
4746       unsigned char *dst = destination + coding->produced;
4747
4748       src_bytes -= coding->consumed;
4749       coding->errors++;
4750       if (COMPOSING_P (coding))
4751         DECODE_COMPOSITION_END ('1');
4752       while (src_bytes--)
4753         {
4754           int c = *src++;
4755           dst += CHAR_STRING (c, dst);
4756           coding->produced_char++;
4757         }
4758       coding->consumed = coding->consumed_char = src - source;
4759       coding->produced = dst - destination;
4760       coding->result = CODING_FINISH_NORMAL;
4761     }
4762
4763   if (!coding->dst_multibyte)
4764     {
4765       coding->produced = str_as_unibyte (destination, coding->produced);
4766       coding->produced_char = coding->produced;
4767     }
4768
4769   return coding->result;
4770 }
4771
4772 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4773    multibyteness of the source is CODING->src_multibyte, the
4774    multibyteness of the result is always unibyte.  */
4775
4776 int
4777 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4778      struct coding_system *coding;
4779      unsigned char *source, *destination;
4780      int src_bytes, dst_bytes;
4781 {
4782   coding->produced = coding->produced_char = 0;
4783   coding->consumed = coding->consumed_char = 0;
4784   coding->errors = 0;
4785   coding->result = CODING_FINISH_NORMAL;
4786
4787   switch (coding->type)
4788     {
4789     case coding_type_sjis:
4790       encode_coding_sjis_big5 (coding, source, destination,
4791                                src_bytes, dst_bytes, 1);
4792       break;
4793
4794     case coding_type_iso2022:
4795       encode_coding_iso2022 (coding, source, destination,
4796                              src_bytes, dst_bytes);
4797       break;
4798
4799     case coding_type_big5:
4800       encode_coding_sjis_big5 (coding, source, destination,
4801                                src_bytes, dst_bytes, 0);
4802       break;
4803
4804     case coding_type_emacs_mule:
4805       encode_coding_emacs_mule (coding, source, destination,
4806                                 src_bytes, dst_bytes);
4807       break;
4808
4809     case coding_type_ccl:
4810       ccl_coding_driver (coding, source, destination,
4811                          src_bytes, dst_bytes, 1);
4812       break;
4813
4814     default:
4815       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4816     }
4817
4818   if (coding->mode & CODING_MODE_LAST_BLOCK
4819       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4820     {
4821       unsigned char *src = source + coding->consumed;
4822       unsigned char *src_end = src + src_bytes;
4823       unsigned char *dst = destination + coding->produced;
4824
4825       if (coding->type == coding_type_iso2022)
4826         ENCODE_RESET_PLANE_AND_REGISTER;
4827       if (COMPOSING_P (coding))
4828         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4829       if (coding->consumed < src_bytes)
4830         {
4831           int len = src_bytes - coding->consumed;
4832
4833           BCOPY_SHORT (source + coding->consumed, dst, len);
4834           if (coding->src_multibyte)
4835             len = str_as_unibyte (dst, len);
4836           dst += len;
4837           coding->consumed = src_bytes;
4838         }
4839       coding->produced = coding->produced_char = dst - destination;
4840       coding->result = CODING_FINISH_NORMAL;
4841     }
4842
4843   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4844       && coding->consumed == src_bytes)
4845     coding->result = CODING_FINISH_NORMAL;
4846
4847   return coding->result;
4848 }
4849
4850 /* Scan text in the region between *BEG and *END (byte positions),
4851    skip characters which we don't have to decode by coding system
4852    CODING at the head and tail, then set *BEG and *END to the region
4853    of the text we actually have to convert.  The caller should move
4854    the gap out of the region in advance if the region is from a
4855    buffer.
4856
4857    If STR is not NULL, *BEG and *END are indices into STR.  */
4858
4859 static void
4860 shrink_decoding_region (beg, end, coding, str)
4861      int *beg, *end;
4862      struct coding_system *coding;
4863      unsigned char *str;
4864 {
4865   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4866   int eol_conversion;
4867   Lisp_Object translation_table;
4868
4869   if (coding->type == coding_type_ccl
4870       || coding->type == coding_type_undecided
4871       || coding->eol_type != CODING_EOL_LF
4872       || !NILP (coding->post_read_conversion)
4873       || coding->composing != COMPOSITION_DISABLED)
4874     {
4875       /* We can't skip any data.  */
4876       return;
4877     }
4878   if (coding->type == coding_type_no_conversion
4879       || coding->type == coding_type_raw_text
4880       || coding->type == coding_type_emacs_mule)
4881     {
4882       /* We need no conversion, but don't have to skip any data here.
4883          Decoding routine handles them effectively anyway.  */
4884       return;
4885     }
4886
4887   translation_table = coding->translation_table_for_decode;
4888   if (NILP (translation_table) && !NILP (Venable_character_translation))
4889     translation_table = Vstandard_translation_table_for_decode;
4890   if (CHAR_TABLE_P (translation_table))
4891     {
4892       int i;
4893       for (i = 0; i < 128; i++)
4894         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4895           break;
4896       if (i < 128)
4897         /* Some ASCII character should be translated.  We give up
4898            shrinking.  */
4899         return;
4900     }
4901
4902   if (coding->heading_ascii >= 0)
4903     /* Detection routine has already found how much we can skip at the
4904        head.  */
4905     *beg += coding->heading_ascii;
4906
4907   if (str)
4908     {
4909       begp_orig = begp = str + *beg;
4910       endp_orig = endp = str + *end;
4911     }
4912   else
4913     {
4914       begp_orig = begp = BYTE_POS_ADDR (*beg);
4915       endp_orig = endp = begp + *end - *beg;
4916     }
4917
4918   eol_conversion = (coding->eol_type == CODING_EOL_CR
4919                     || coding->eol_type == CODING_EOL_CRLF);
4920
4921   switch (coding->type)
4922     {
4923     case coding_type_sjis:
4924     case coding_type_big5:
4925       /* We can skip all ASCII characters at the head.  */
4926       if (coding->heading_ascii < 0)
4927         {
4928           if (eol_conversion)
4929             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4930           else
4931             while (begp < endp && *begp < 0x80) begp++;
4932         }
4933       /* We can skip all ASCII characters at the tail except for the
4934          second byte of SJIS or BIG5 code.  */
4935       if (eol_conversion)
4936         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4937       else
4938         while (begp < endp && endp[-1] < 0x80) endp--;
4939       /* Do not consider LF as ascii if preceded by CR, since that
4940          confuses eol decoding. */
4941       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4942         endp++;
4943       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4944         endp++;
4945       break;
4946
4947     case coding_type_iso2022:
4948       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4949         /* We can't skip any data.  */
4950         break;
4951       if (coding->heading_ascii < 0)
4952         {
4953           /* We can skip all ASCII characters at the head except for a
4954              few control codes.  */
4955           while (begp < endp && (c = *begp) < 0x80
4956                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4957                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4958                  && (!eol_conversion || c != ISO_CODE_LF))
4959             begp++;
4960         }
4961       switch (coding->category_idx)
4962         {
4963         case CODING_CATEGORY_IDX_ISO_8_1:
4964         case CODING_CATEGORY_IDX_ISO_8_2:
4965           /* We can skip all ASCII characters at the tail.  */
4966           if (eol_conversion)
4967             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4968           else
4969             while (begp < endp && endp[-1] < 0x80) endp--;
4970           /* Do not consider LF as ascii if preceded by CR, since that
4971              confuses eol decoding. */
4972           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4973             endp++;
4974           break;
4975
4976         case CODING_CATEGORY_IDX_ISO_7:
4977         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4978           {
4979             /* We can skip all charactes at the tail except for 8-bit
4980                codes and ESC and the following 2-byte at the tail.  */
4981             unsigned char *eight_bit = NULL;
4982
4983             if (eol_conversion)
4984               while (begp < endp
4985                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4986                 {
4987                   if (!eight_bit && c & 0x80) eight_bit = endp;
4988                   endp--;
4989                 }
4990             else
4991               while (begp < endp
4992                      && (c = endp[-1]) != ISO_CODE_ESC)
4993                 {
4994                   if (!eight_bit && c & 0x80) eight_bit = endp;
4995                   endp--;
4996                 }
4997             /* Do not consider LF as ascii if preceded by CR, since that
4998                confuses eol decoding. */
4999             if (begp < endp && endp < endp_orig
5000                 && endp[-1] == '\r' && endp[0] == '\n')
5001               endp++;
5002             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5003               {
5004                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5005                   /* This is an ASCII designation sequence.  We can
5006                      surely skip the tail.  But, if we have
5007                      encountered an 8-bit code, skip only the codes
5008                      after that.  */
5009                   endp = eight_bit ? eight_bit : endp + 2;
5010                 else
5011                   /* Hmmm, we can't skip the tail.  */
5012                   endp = endp_orig;
5013               }
5014             else if (eight_bit)
5015               endp = eight_bit;
5016           }
5017         }
5018       break;
5019
5020     default:
5021       abort ();
5022     }
5023   *beg += begp - begp_orig;
5024   *end += endp - endp_orig;
5025   return;
5026 }
5027
5028 /* Like shrink_decoding_region but for encoding.  */
5029
5030 static void
5031 shrink_encoding_region (beg, end, coding, str)
5032      int *beg, *end;
5033      struct coding_system *coding;
5034      unsigned char *str;
5035 {
5036   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5037   int eol_conversion;
5038   Lisp_Object translation_table;
5039
5040   if (coding->type == coding_type_ccl
5041       || coding->eol_type == CODING_EOL_CRLF
5042       || coding->eol_type == CODING_EOL_CR
5043       || coding->cmp_data && coding->cmp_data->used > 0)
5044     {
5045       /* We can't skip any data.  */
5046       return;
5047     }
5048   if (coding->type == coding_type_no_conversion
5049       || coding->type == coding_type_raw_text
5050       || coding->type == coding_type_emacs_mule
5051       || coding->type == coding_type_undecided)
5052     {
5053       /* We need no conversion, but don't have to skip any data here.
5054          Encoding routine handles them effectively anyway.  */
5055       return;
5056     }
5057
5058   translation_table = coding->translation_table_for_encode;
5059   if (NILP (translation_table) && !NILP (Venable_character_translation))
5060     translation_table = Vstandard_translation_table_for_encode;
5061   if (CHAR_TABLE_P (translation_table))
5062     {
5063       int i;
5064       for (i = 0; i < 128; i++)
5065         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5066           break;
5067       if (i < 128)
5068         /* Some ASCII character should be tranlsated.  We give up
5069            shrinking.  */
5070         return;
5071     }
5072
5073   if (str)
5074     {
5075       begp_orig = begp = str + *beg;
5076       endp_orig = endp = str + *end;
5077     }
5078   else
5079     {
5080       begp_orig = begp = BYTE_POS_ADDR (*beg);
5081       endp_orig = endp = begp + *end - *beg;
5082     }
5083
5084   eol_conversion = (coding->eol_type == CODING_EOL_CR
5085                     || coding->eol_type == CODING_EOL_CRLF);
5086
5087   /* Here, we don't have to check coding->pre_write_conversion because
5088      the caller is expected to have handled it already.  */
5089   switch (coding->type)
5090     {
5091     case coding_type_iso2022:
5092       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5093         /* We can't skip any data.  */
5094         break;
5095       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5096         {
5097           unsigned char *bol = begp;
5098           while (begp < endp && *begp < 0x80)
5099             {
5100               begp++;
5101               if (begp[-1] == '\n')
5102                 bol = begp;
5103             }
5104           begp = bol;
5105           goto label_skip_tail;
5106         }
5107       /* fall down ... */
5108
5109     case coding_type_sjis:
5110     case coding_type_big5:
5111       /* We can skip all ASCII characters at the head and tail.  */
5112       if (eol_conversion)
5113         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5114       else
5115         while (begp < endp && *begp < 0x80) begp++;
5116     label_skip_tail:
5117       if (eol_conversion)
5118         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5119       else
5120         while (begp < endp && *(endp - 1) < 0x80) endp--;
5121       break;
5122
5123     default:
5124       abort ();
5125     }
5126
5127   *beg += begp - begp_orig;
5128   *end += endp - endp_orig;
5129   return;
5130 }
5131
5132 /* As shrinking conversion region requires some overhead, we don't try
5133    shrinking if the length of conversion region is less than this
5134    value.  */
5135 static int shrink_conversion_region_threshhold = 1024;
5136
5137 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5138   do {                                                                  \
5139     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5140       {                                                                 \
5141         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5142         else shrink_decoding_region (beg, end, coding, str);            \
5143       }                                                                 \
5144   } while (0)
5145
5146 static Lisp_Object
5147 code_convert_region_unwind (dummy)
5148      Lisp_Object dummy;
5149 {
5150   inhibit_pre_post_conversion = 0;
5151   return Qnil;
5152 }
5153
5154 /* Store information about all compositions in the range FROM and TO
5155    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5156    buffer or a string, defaults to the current buffer.  */
5157
5158 void
5159 coding_save_composition (coding, from, to, obj)
5160      struct coding_system *coding;
5161      int from, to;
5162      Lisp_Object obj;
5163 {
5164   Lisp_Object prop;
5165   int start, end;
5166
5167   if (coding->composing == COMPOSITION_DISABLED)
5168     return;
5169   if (!coding->cmp_data)
5170     coding_allocate_composition_data (coding, from);
5171   if (!find_composition (from, to, &start, &end, &prop, obj)
5172       || end > to)
5173     return;
5174   if (start < from
5175       && (!find_composition (end, to, &start, &end, &prop, obj)
5176           || end > to))
5177     return;
5178   coding->composing = COMPOSITION_NO;
5179   do
5180     {
5181       if (COMPOSITION_VALID_P (start, end, prop))
5182         {
5183           enum composition_method method = COMPOSITION_METHOD (prop);
5184           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5185               >= COMPOSITION_DATA_SIZE)
5186             coding_allocate_composition_data (coding, from);
5187           /* For relative composition, we remember start and end
5188              positions, for the other compositions, we also remember
5189              components.  */
5190           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5191           if (method != COMPOSITION_RELATIVE)
5192             {
5193               /* We must store a*/
5194               Lisp_Object val, ch;
5195
5196               val = COMPOSITION_COMPONENTS (prop);
5197               if (CONSP (val))
5198                 while (CONSP (val))
5199                   {
5200                     ch = XCAR (val), val = XCDR (val);
5201                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5202                   }
5203               else if (VECTORP (val) || STRINGP (val))
5204                 {
5205                   int len = (VECTORP (val)
5206                              ? XVECTOR (val)->size : XSTRING (val)->size);
5207                   int i;
5208                   for (i = 0; i < len; i++)
5209                     {
5210                       ch = (STRINGP (val)
5211                             ? Faref (val, make_number (i))
5212                             : XVECTOR (val)->contents[i]);
5213                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5214                     }
5215                 }
5216               else              /* INTEGERP (val) */
5217                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5218             }
5219           CODING_ADD_COMPOSITION_END (coding, end - from);
5220         }
5221       start = end;
5222     }
5223   while (start < to
5224          && find_composition (start, to, &start, &end, &prop, obj)
5225          && end <= to);
5226
5227   /* Make coding->cmp_data point to the first memory block.  */
5228   while (coding->cmp_data->prev)
5229     coding->cmp_data = coding->cmp_data->prev;
5230   coding->cmp_data_start = 0;
5231 }
5232
5233 /* Reflect the saved information about compositions to OBJ.
5234    CODING->cmp_data points to a memory block for the informaiton.  OBJ
5235    is a buffer or a string, defaults to the current buffer.  */
5236
5237 void
5238 coding_restore_composition (coding, obj)
5239      struct coding_system *coding;
5240      Lisp_Object obj;
5241 {
5242   struct composition_data *cmp_data = coding->cmp_data;
5243
5244   if (!cmp_data)
5245     return;
5246
5247   while (cmp_data->prev)
5248     cmp_data = cmp_data->prev;
5249
5250   while (cmp_data)
5251     {
5252       int i;
5253
5254       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5255            i += cmp_data->data[i])
5256         {
5257           int *data = cmp_data->data + i;
5258           enum composition_method method = (enum composition_method) data[3];
5259           Lisp_Object components;
5260
5261           if (method == COMPOSITION_RELATIVE)
5262             components = Qnil;
5263           else
5264             {
5265               int len = data[0] - 4, j;
5266               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5267
5268               for (j = 0; j < len; j++)
5269                 args[j] = make_number (data[4 + j]);
5270               components = (method == COMPOSITION_WITH_ALTCHARS
5271                             ? Fstring (len, args) : Fvector (len, args));
5272             }
5273           compose_text (data[1], data[2], components, Qnil, obj);
5274         }
5275       cmp_data = cmp_data->next;
5276     }
5277 }
5278
5279 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5280    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5281    coding system CODING, and return the status code of code conversion
5282    (currently, this value has no meaning).
5283
5284    How many characters (and bytes) are converted to how many
5285    characters (and bytes) are recorded in members of the structure
5286    CODING.
5287
5288    If REPLACE is nonzero, we do various things as if the original text
5289    is deleted and a new text is inserted.  See the comments in
5290    replace_range (insdel.c) to know what we are doing.
5291
5292    If REPLACE is zero, it is assumed that the source text is unibyte.
5293    Otherwize, it is assumed that the source text is multibyte.  */
5294
5295 int
5296 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5297      int from, from_byte, to, to_byte, encodep, replace;
5298      struct coding_system *coding;
5299 {
5300   int len = to - from, len_byte = to_byte - from_byte;
5301   int require, inserted, inserted_byte;
5302   int head_skip, tail_skip, total_skip = 0;
5303   Lisp_Object saved_coding_symbol;
5304   int first = 1;
5305   unsigned char *src, *dst;
5306   Lisp_Object deletion;
5307   int orig_point = PT, orig_len = len;
5308   int prev_Z;
5309   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5310
5311   deletion = Qnil;
5312   saved_coding_symbol = coding->symbol;
5313
5314   if (from < PT && PT < to)
5315     {
5316       TEMP_SET_PT_BOTH (from, from_byte);
5317       orig_point = from;
5318     }
5319
5320   if (replace)
5321     {
5322       int saved_from = from;
5323       int saved_inhibit_modification_hooks;
5324
5325       prepare_to_modify_buffer (from, to, &from);
5326       if (saved_from != from)
5327         {
5328           to = from + len;
5329           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5330           len_byte = to_byte - from_byte;
5331         }
5332
5333       /* The code conversion routine can not preserve text properties
5334          for now.  So, we must remove all text properties in the
5335          region.  Here, we must suppress all modification hooks.  */
5336       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5337       inhibit_modification_hooks = 1;
5338       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5339       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5340     }
5341
5342   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5343     {
5344       /* We must detect encoding of text and eol format.  */
5345
5346       if (from < GPT && to > GPT)
5347         move_gap_both (from, from_byte);
5348       if (coding->type == coding_type_undecided)
5349         {
5350           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5351           if (coding->type == coding_type_undecided)
5352             {
5353               /* It seems that the text contains only ASCII, but we
5354                  should not leave it undecided because the deeper
5355                  decoding routine (decode_coding) tries to detect the
5356                  encodings again in vain.  */
5357               coding->type = coding_type_emacs_mule;
5358               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5359             }
5360         }
5361       if (coding->eol_type == CODING_EOL_UNDECIDED
5362           && coding->type != coding_type_ccl)
5363         {
5364           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5365           if (coding->eol_type == CODING_EOL_UNDECIDED)
5366             coding->eol_type = CODING_EOL_LF;
5367           /* We had better recover the original eol format if we
5368              encounter an inconsitent eol format while decoding.  */
5369           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5370         }
5371     }
5372
5373   /* Now we convert the text.  */
5374
5375   /* For encoding, we must process pre-write-conversion in advance.  */
5376   if (! inhibit_pre_post_conversion
5377       && encodep
5378       && SYMBOLP (coding->pre_write_conversion)
5379       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5380     {
5381       /* The function in pre-write-conversion may put a new text in a
5382          new buffer.  */
5383       struct buffer *prev = current_buffer;
5384       Lisp_Object new;
5385       int count = specpdl_ptr - specpdl;
5386
5387       record_unwind_protect (code_convert_region_unwind, Qnil);
5388       /* We should not call any more pre-write/post-read-conversion
5389          functions while this pre-write-conversion is running.  */
5390       inhibit_pre_post_conversion = 1;
5391       call2 (coding->pre_write_conversion,
5392              make_number (from), make_number (to));
5393       inhibit_pre_post_conversion = 0;
5394       /* Discard the unwind protect.  */
5395       specpdl_ptr--;
5396
5397       if (current_buffer != prev)
5398         {
5399           len = ZV - BEGV;
5400           new = Fcurrent_buffer ();
5401           set_buffer_internal_1 (prev);
5402           del_range_2 (from, from_byte, to, to_byte, 0);
5403           TEMP_SET_PT_BOTH (from, from_byte);
5404           insert_from_buffer (XBUFFER (new), 1, len, 0);
5405           Fkill_buffer (new);
5406           if (orig_point >= to)
5407             orig_point += len - orig_len;
5408           else if (orig_point > from)
5409             orig_point = from;
5410           orig_len = len;
5411           to = from + len;
5412           from_byte = CHAR_TO_BYTE (from);
5413           to_byte = CHAR_TO_BYTE (to);
5414           len_byte = to_byte - from_byte;
5415           TEMP_SET_PT_BOTH (from, from_byte);
5416         }
5417     }
5418
5419   if (replace)
5420     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5421
5422   if (coding->composing != COMPOSITION_DISABLED)
5423     {
5424       if (encodep)
5425         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5426       else
5427         coding_allocate_composition_data (coding, from);
5428     }
5429
5430   /* Try to skip the heading and tailing ASCIIs.  */
5431   if (coding->type != coding_type_ccl)
5432     {
5433       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5434
5435       if (from < GPT && GPT < to)
5436         move_gap_both (from, from_byte);
5437       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5438       if (from_byte == to_byte
5439           && (encodep || NILP (coding->post_read_conversion))
5440           && ! CODING_REQUIRE_FLUSHING (coding))
5441         {
5442           coding->produced = len_byte;
5443           coding->produced_char = len;
5444           if (!replace)
5445             /* We must record and adjust for this new text now.  */
5446             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5447           return 0;
5448         }
5449
5450       head_skip = from_byte - from_byte_orig;
5451       tail_skip = to_byte_orig - to_byte;
5452       total_skip = head_skip + tail_skip;
5453       from += head_skip;
5454       to -= tail_skip;
5455       len -= total_skip; len_byte -= total_skip;
5456     }
5457
5458   /* For converion, we must put the gap before the text in addition to
5459      making the gap larger for efficient decoding.  The required gap
5460      size starts from 2000 which is the magic number used in make_gap.
5461      But, after one batch of conversion, it will be incremented if we
5462      find that it is not enough .  */
5463   require = 2000;
5464
5465   if (GAP_SIZE  < require)
5466     make_gap (require - GAP_SIZE);
5467   move_gap_both (from, from_byte);
5468
5469   inserted = inserted_byte = 0;
5470
5471   GAP_SIZE += len_byte;
5472   ZV -= len;
5473   Z -= len;
5474   ZV_BYTE -= len_byte;
5475   Z_BYTE -= len_byte;
5476
5477   if (GPT - BEG < BEG_UNCHANGED)
5478     BEG_UNCHANGED = GPT - BEG;
5479   if (Z - GPT < END_UNCHANGED)
5480     END_UNCHANGED = Z - GPT;
5481
5482   if (!encodep && coding->src_multibyte)
5483     {
5484       /* Decoding routines expects that the source text is unibyte.
5485          We must convert 8-bit characters of multibyte form to
5486          unibyte.  */
5487       int len_byte_orig = len_byte;
5488       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5489       if (len_byte < len_byte_orig)
5490         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5491                     len_byte);
5492       coding->src_multibyte = 0;
5493     }
5494
5495   for (;;)
5496     {
5497       int result;
5498
5499       /* The buffer memory is now:
5500          +--------+converted-text+---------+-------original-text-------+---+
5501          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5502                   |<---------------------- GAP ----------------------->|  */
5503       src = GAP_END_ADDR - len_byte;
5504       dst = GPT_ADDR + inserted_byte;
5505
5506       if (encodep)
5507         result = encode_coding (coding, src, dst, len_byte, 0);
5508       else
5509         result = decode_coding (coding, src, dst, len_byte, 0);
5510
5511       /* The buffer memory is now:
5512          +--------+-------converted-text----+--+------original-text----+---+
5513          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5514                   |<---------------------- GAP ----------------------->|  */
5515
5516       inserted += coding->produced_char;
5517       inserted_byte += coding->produced;
5518       len_byte -= coding->consumed;
5519
5520       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5521         {
5522           coding_allocate_composition_data (coding, from + inserted);
5523           continue;
5524         }
5525
5526       src += coding->consumed;
5527       dst += coding->produced;
5528
5529       if (result == CODING_FINISH_NORMAL)
5530         {
5531           src += len_byte;
5532           break;
5533         }
5534       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5535         {
5536           unsigned char *pend = dst, *p = pend - inserted_byte;
5537           Lisp_Object eol_type;
5538
5539           /* Encode LFs back to the original eol format (CR or CRLF).  */
5540           if (coding->eol_type == CODING_EOL_CR)
5541             {
5542               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5543             }
5544           else
5545             {
5546               int count = 0;
5547
5548               while (p < pend) if (*p++ == '\n') count++;
5549               if (src - dst < count)
5550                 {
5551                   /* We don't have sufficient room for encoding LFs
5552                      back to CRLF.  We must record converted and
5553                      not-yet-converted text back to the buffer
5554                      content, enlarge the gap, then record them out of
5555                      the buffer contents again.  */
5556                   int add = len_byte + inserted_byte;
5557
5558                   GAP_SIZE -= add;
5559                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5560                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5561                   make_gap (count - GAP_SIZE);
5562                   GAP_SIZE += add;
5563                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5564                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5565                   /* Don't forget to update SRC, DST, and PEND.  */
5566                   src = GAP_END_ADDR - len_byte;
5567                   dst = GPT_ADDR + inserted_byte;
5568                   pend = dst;
5569                 }
5570               inserted += count;
5571               inserted_byte += count;
5572               coding->produced += count;
5573               p = dst = pend + count;
5574               while (count)
5575                 {
5576                   *--p = *--pend;
5577                   if (*p == '\n') count--, *--p = '\r';
5578                 }
5579             }
5580
5581           /* Suppress eol-format conversion in the further conversion.  */
5582           coding->eol_type = CODING_EOL_LF;
5583
5584           /* Set the coding system symbol to that for Unix-like EOL.  */
5585           eol_type = Fget (saved_coding_symbol, Qeol_type);
5586           if (VECTORP (eol_type)
5587               && XVECTOR (eol_type)->size == 3
5588               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5589             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5590           else
5591             coding->symbol = saved_coding_symbol;
5592
5593           continue;
5594         }
5595       if (len_byte <= 0)
5596         {
5597           if (coding->type != coding_type_ccl
5598               || coding->mode & CODING_MODE_LAST_BLOCK)
5599             break;
5600           coding->mode |= CODING_MODE_LAST_BLOCK;
5601           continue;
5602         }
5603       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5604         {
5605           /* The source text ends in invalid codes.  Let's just
5606              make them valid buffer contents, and finish conversion.  */
5607           inserted += len_byte;
5608           inserted_byte += len_byte;
5609           while (len_byte--)
5610             *dst++ = *src++;
5611           break;
5612         }
5613       if (result == CODING_FINISH_INTERRUPT)
5614         {
5615           /* The conversion procedure was interrupted by a user.  */
5616           break;
5617         }
5618       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5619       if (coding->consumed < 1)
5620         {
5621           /* It's quite strange to require more memory without
5622              consuming any bytes.  Perhaps CCL program bug.  */
5623           break;
5624         }
5625       if (first)
5626         {
5627           /* We have just done the first batch of conversion which was
5628              stoped because of insufficient gap.  Let's reconsider the
5629              required gap size (i.e. SRT - DST) now.
5630
5631              We have converted ORIG bytes (== coding->consumed) into
5632              NEW bytes (coding->produced).  To convert the remaining
5633              LEN bytes, we may need REQUIRE bytes of gap, where:
5634                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5635                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5636              Here, we are sure that NEW >= ORIG.  */
5637           float ratio = coding->produced - coding->consumed;
5638           ratio /= coding->consumed;
5639           require = len_byte * ratio;
5640           first = 0;
5641         }
5642       if ((src - dst) < (require + 2000))
5643         {
5644           /* See the comment above the previous call of make_gap.  */
5645           int add = len_byte + inserted_byte;
5646
5647           GAP_SIZE -= add;
5648           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5649           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5650           make_gap (require + 2000);
5651           GAP_SIZE += add;
5652           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5653           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5654         }
5655     }
5656   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5657
5658   if (encodep && coding->dst_multibyte)
5659     {
5660       /* The output is unibyte.  We must convert 8-bit characters to
5661          multibyte form.  */
5662       if (inserted_byte * 2 > GAP_SIZE)
5663         {
5664           GAP_SIZE -= inserted_byte;
5665           ZV += inserted_byte; Z += inserted_byte;
5666           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5667           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5668           make_gap (inserted_byte - GAP_SIZE);
5669           GAP_SIZE += inserted_byte;
5670           ZV -= inserted_byte; Z -= inserted_byte;
5671           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5672           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5673         }
5674       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5675     }
5676
5677   /* If we have shrinked the conversion area, adjust it now.  */
5678   if (total_skip > 0)
5679     {
5680       if (tail_skip > 0)
5681         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5682       inserted += total_skip; inserted_byte += total_skip;
5683       GAP_SIZE += total_skip;
5684       GPT -= head_skip; GPT_BYTE -= head_skip;
5685       ZV -= total_skip; ZV_BYTE -= total_skip;
5686       Z -= total_skip; Z_BYTE -= total_skip;
5687       from -= head_skip; from_byte -= head_skip;
5688       to += tail_skip; to_byte += tail_skip;
5689     }
5690
5691   prev_Z = Z;
5692   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5693   inserted = Z - prev_Z;
5694
5695   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5696     coding_restore_composition (coding, Fcurrent_buffer ());
5697   coding_free_composition_data (coding);
5698
5699   if (! inhibit_pre_post_conversion
5700       && ! encodep && ! NILP (coding->post_read_conversion))
5701     {
5702       Lisp_Object val;
5703       int count = specpdl_ptr - specpdl;
5704
5705       if (from != PT)
5706         TEMP_SET_PT_BOTH (from, from_byte);
5707       prev_Z = Z;
5708       record_unwind_protect (code_convert_region_unwind, Qnil);
5709       /* We should not call any more pre-write/post-read-conversion
5710          functions while this post-read-conversion is running.  */
5711       inhibit_pre_post_conversion = 1;
5712       val = call1 (coding->post_read_conversion, make_number (inserted));
5713       inhibit_pre_post_conversion = 0;
5714       /* Discard the unwind protect.  */
5715       specpdl_ptr--;
5716       CHECK_NUMBER (val, 0);
5717       inserted += Z - prev_Z;
5718     }
5719
5720   if (orig_point >= from)
5721     {
5722       if (orig_point >= from + orig_len)
5723         orig_point += inserted - orig_len;
5724       else
5725         orig_point = from;
5726       TEMP_SET_PT (orig_point);
5727     }
5728
5729   if (replace)
5730     {
5731       signal_after_change (from, to - from, inserted);
5732       update_compositions (from, from + inserted, CHECK_BORDER);
5733     }
5734
5735   {
5736     coding->consumed = to_byte - from_byte;
5737     coding->consumed_char = to - from;
5738     coding->produced = inserted_byte;
5739     coding->produced_char = inserted;
5740   }
5741
5742   return 0;
5743 }
5744
5745 Lisp_Object
5746 run_pre_post_conversion_on_str (str, coding, encodep)
5747      Lisp_Object str;
5748      struct coding_system *coding;
5749      int encodep;
5750 {
5751   int count = specpdl_ptr - specpdl;
5752   struct gcpro gcpro1;
5753   int multibyte = STRING_MULTIBYTE (str);
5754
5755   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5756   record_unwind_protect (code_convert_region_unwind, Qnil);
5757   GCPRO1 (str);
5758   temp_output_buffer_setup (" *code-converting-work*");
5759   set_buffer_internal (XBUFFER (Vstandard_output));
5760   /* We must insert the contents of STR as is without
5761      unibyte<->multibyte conversion.  For that, we adjust the
5762      multibyteness of the working buffer to that of STR.  */
5763   Ferase_buffer ();
5764   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5765   insert_from_string (str, 0, 0,
5766                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5767   UNGCPRO;
5768   inhibit_pre_post_conversion = 1;
5769   if (encodep)
5770     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5771   else
5772     {
5773       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5774       call1 (coding->post_read_conversion, make_number (Z - BEG));
5775     }
5776   inhibit_pre_post_conversion = 0;
5777   str = make_buffer_string (BEG, Z, 1);
5778   return unbind_to (count, str);
5779 }
5780
5781 Lisp_Object
5782 decode_coding_string (str, coding, nocopy)
5783      Lisp_Object str;
5784      struct coding_system *coding;
5785      int nocopy;
5786 {
5787   int len;
5788   struct conversion_buffer buf;
5789   int from, to_byte;
5790   struct gcpro gcpro1;
5791   Lisp_Object saved_coding_symbol;
5792   int result;
5793   int require_decoding;
5794   int shrinked_bytes = 0;
5795   Lisp_Object newstr;
5796   int consumed, consumed_char, produced, produced_char;
5797
5798   from = 0;
5799   to_byte = STRING_BYTES (XSTRING (str));
5800
5801   saved_coding_symbol = coding->symbol;
5802   coding->src_multibyte = STRING_MULTIBYTE (str);
5803   coding->dst_multibyte = 1;
5804   if (CODING_REQUIRE_DETECTION (coding))
5805     {
5806       /* See the comments in code_convert_region.  */
5807       if (coding->type == coding_type_undecided)
5808         {
5809           detect_coding (coding, XSTRING (str)->data, to_byte);
5810           if (coding->type == coding_type_undecided)
5811             coding->type = coding_type_emacs_mule;
5812         }
5813       if (coding->eol_type == CODING_EOL_UNDECIDED
5814           && coding->type != coding_type_ccl)
5815         {
5816           saved_coding_symbol = coding->symbol;
5817           detect_eol (coding, XSTRING (str)->data, to_byte);
5818           if (coding->eol_type == CODING_EOL_UNDECIDED)
5819             coding->eol_type = CODING_EOL_LF;
5820           /* We had better recover the original eol format if we
5821              encounter an inconsitent eol format while decoding.  */
5822           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5823         }
5824     }
5825
5826   if (coding->type == coding_type_no_conversion
5827       || coding->type == coding_type_raw_text)
5828     coding->dst_multibyte = 0;
5829
5830   require_decoding = CODING_REQUIRE_DECODING (coding);
5831
5832   if (STRING_MULTIBYTE (str))
5833     {
5834       /* Decoding routines expect the source text to be unibyte.  */
5835       str = Fstring_as_unibyte (str);
5836       to_byte = STRING_BYTES (XSTRING (str));
5837       nocopy = 1;
5838       coding->src_multibyte = 0;
5839     }
5840
5841   /* Try to skip the heading and tailing ASCIIs.  */
5842   if (require_decoding && coding->type != coding_type_ccl)
5843     {
5844       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5845                                 0);
5846       if (from == to_byte)
5847         require_decoding = 0;
5848       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5849     }
5850
5851   if (!require_decoding)
5852     {
5853       coding->consumed = STRING_BYTES (XSTRING (str));
5854       coding->consumed_char = XSTRING (str)->size;
5855       if (coding->dst_multibyte)
5856         {
5857           str = Fstring_as_multibyte (str);
5858           nocopy = 1;
5859         }
5860       coding->produced = STRING_BYTES (XSTRING (str));
5861       coding->produced_char = XSTRING (str)->size;
5862       return (nocopy ? str : Fcopy_sequence (str));
5863     }
5864
5865   if (coding->composing != COMPOSITION_DISABLED)
5866     coding_allocate_composition_data (coding, from);
5867   len = decoding_buffer_size (coding, to_byte - from);
5868   allocate_conversion_buffer (buf, len);
5869
5870   consumed = consumed_char = produced = produced_char = 0;
5871   while (1)
5872     {
5873       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5874                               buf.data + produced, to_byte - from - consumed,
5875                               buf.size - produced);
5876       consumed += coding->consumed;
5877       consumed_char += coding->consumed_char;
5878       produced += coding->produced;
5879       produced_char += coding->produced_char;
5880       if (result == CODING_FINISH_NORMAL
5881           || (result == CODING_FINISH_INSUFFICIENT_SRC
5882               && coding->consumed == 0))
5883         break;
5884       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5885         coding_allocate_composition_data (coding, from + produced_char);
5886       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5887         extend_conversion_buffer (&buf);
5888       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5889         {
5890           Lisp_Object eol_type;
5891
5892           /* Recover the original EOL format.  */
5893           if (coding->eol_type == CODING_EOL_CR)
5894             {
5895               unsigned char *p;
5896               for (p = buf.data; p < buf.data + produced; p++)
5897                 if (*p == '\n') *p = '\r';
5898             }
5899           else if (coding->eol_type == CODING_EOL_CRLF)
5900             {
5901               int num_eol = 0;
5902               unsigned char *p0, *p1;
5903               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5904                 if (*p0 == '\n') num_eol++;
5905               if (produced + num_eol >= buf.size)
5906                 extend_conversion_buffer (&buf);
5907               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5908                 {
5909                   *--p1 = *--p0;
5910                   if (*p0 == '\n') *--p1 = '\r';
5911                 }
5912               produced += num_eol;
5913               produced_char += num_eol;
5914             }
5915           /* Suppress eol-format conversion in the further conversion.  */
5916           coding->eol_type = CODING_EOL_LF;
5917
5918           /* Set the coding system symbol to that for Unix-like EOL.  */
5919           eol_type = Fget (saved_coding_symbol, Qeol_type);
5920           if (VECTORP (eol_type)
5921               && XVECTOR (eol_type)->size == 3
5922               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5923             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5924           else
5925             coding->symbol = saved_coding_symbol;
5926
5927
5928         }
5929     }
5930
5931   coding->consumed = consumed;
5932   coding->consumed_char = consumed_char;
5933   coding->produced = produced;
5934   coding->produced_char = produced_char;
5935
5936   if (coding->dst_multibyte)
5937     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5938                                            produced + shrinked_bytes);
5939   else
5940     newstr = make_uninit_string (produced + shrinked_bytes);
5941   if (from > 0)
5942     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5943   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5944   if (shrinked_bytes > from)
5945     bcopy (XSTRING (str)->data + to_byte,
5946            XSTRING (newstr)->data + from + produced,
5947            shrinked_bytes - from);
5948   free_conversion_buffer (&buf);
5949
5950   if (coding->cmp_data && coding->cmp_data->used)
5951     coding_restore_composition (coding, newstr);
5952   coding_free_composition_data (coding);
5953
5954   if (SYMBOLP (coding->post_read_conversion)
5955       && !NILP (Ffboundp (coding->post_read_conversion)))
5956     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5957
5958   return newstr;
5959 }
5960
5961 Lisp_Object
5962 encode_coding_string (str, coding, nocopy)
5963      Lisp_Object str;
5964      struct coding_system *coding;
5965      int nocopy;
5966 {
5967   int len;
5968   struct conversion_buffer buf;
5969   int from, to, to_byte;
5970   int result;
5971   int shrinked_bytes = 0;
5972   Lisp_Object newstr;
5973   int consumed, consumed_char, produced, produced_char;
5974
5975   if (SYMBOLP (coding->pre_write_conversion)
5976       && !NILP (Ffboundp (coding->pre_write_conversion)))
5977     str = run_pre_post_conversion_on_str (str, coding, 1);
5978
5979   from = 0;
5980   to = XSTRING (str)->size;
5981   to_byte = STRING_BYTES (XSTRING (str));
5982
5983   /* Encoding routines determine the multibyteness of the source text
5984      by coding->src_multibyte.  */
5985   coding->src_multibyte = STRING_MULTIBYTE (str);
5986   coding->dst_multibyte = 0;
5987   if (! CODING_REQUIRE_ENCODING (coding))
5988     {
5989       coding->consumed = STRING_BYTES (XSTRING (str));
5990       coding->consumed_char = XSTRING (str)->size;
5991       if (STRING_MULTIBYTE (str))
5992         {
5993           str = Fstring_as_unibyte (str);
5994           nocopy = 1;
5995         }
5996       coding->produced = STRING_BYTES (XSTRING (str));
5997       coding->produced_char = XSTRING (str)->size;
5998       return (nocopy ? str : Fcopy_sequence (str));
5999     }
6000
6001   if (coding->composing != COMPOSITION_DISABLED)
6002     coding_save_composition (coding, from, to, str);
6003
6004   /* Try to skip the heading and tailing ASCIIs.  */
6005   if (coding->type != coding_type_ccl)
6006     {
6007       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
6008                                 1);
6009       if (from == to_byte)
6010         return (nocopy ? str : Fcopy_sequence (str));
6011       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
6012     }
6013
6014   len = encoding_buffer_size (coding, to_byte - from);
6015   allocate_conversion_buffer (buf, len);
6016
6017   consumed = consumed_char = produced = produced_char = 0;
6018   while (1)
6019     {
6020       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
6021                               buf.data + produced, to_byte - from - consumed,
6022                               buf.size - produced);
6023       consumed += coding->consumed;
6024       consumed_char += coding->consumed_char;
6025       produced += coding->produced;
6026       produced_char += coding->produced_char;
6027       if (result == CODING_FINISH_NORMAL
6028           || (result == CODING_FINISH_INSUFFICIENT_SRC
6029               && coding->consumed == 0))
6030         break;
6031       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6032       extend_conversion_buffer (&buf);
6033     }
6034
6035   coding->consumed = consumed;
6036   coding->consumed_char = consumed_char;
6037   coding->produced = produced;
6038   coding->produced_char = produced_char;
6039
6040   newstr = make_uninit_string (produced + shrinked_bytes);
6041   if (from > 0)
6042     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
6043   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
6044   if (shrinked_bytes > from)
6045     bcopy (XSTRING (str)->data + to_byte,
6046            XSTRING (newstr)->data + from + produced,
6047            shrinked_bytes - from);
6048
6049   free_conversion_buffer (&buf);
6050   coding_free_composition_data (coding);
6051
6052   return newstr;
6053 }
6054
6055 \f
6056 #ifdef emacs
6057 /*** 8. Emacs Lisp library functions ***/
6058
6059 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6060   "Return t if OBJECT is nil or a coding-system.\n\
6061 See the documentation of `make-coding-system' for information\n\
6062 about coding-system objects.")
6063   (obj)
6064      Lisp_Object obj;
6065 {
6066   if (NILP (obj))
6067     return Qt;
6068   if (!SYMBOLP (obj))
6069     return Qnil;
6070   /* Get coding-spec vector for OBJ.  */
6071   obj = Fget (obj, Qcoding_system);
6072   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6073           ? Qt : Qnil);
6074 }
6075
6076 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6077        Sread_non_nil_coding_system, 1, 1, 0,
6078   "Read a coding system from the minibuffer, prompting with string PROMPT.")
6079   (prompt)
6080      Lisp_Object prompt;
6081 {
6082   Lisp_Object val;
6083   do
6084     {
6085       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6086                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6087     }
6088   while (XSTRING (val)->size == 0);
6089   return (Fintern (val, Qnil));
6090 }
6091
6092 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6093   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
6094 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
6095   (prompt, default_coding_system)
6096      Lisp_Object prompt, default_coding_system;
6097 {
6098   Lisp_Object val;
6099   if (SYMBOLP (default_coding_system))
6100     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6101   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6102                           Qt, Qnil, Qcoding_system_history,
6103                           default_coding_system, Qnil);
6104   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6105 }
6106
6107 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6108        1, 1, 0,
6109   "Check validity of CODING-SYSTEM.\n\
6110 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
6111 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
6112 The value of property should be a vector of length 5.")
6113   (coding_system)
6114      Lisp_Object coding_system;
6115 {
6116   CHECK_SYMBOL (coding_system, 0);
6117   if (!NILP (Fcoding_system_p (coding_system)))
6118     return coding_system;
6119   while (1)
6120     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6121 }
6122 \f
6123 Lisp_Object
6124 detect_coding_system (src, src_bytes, highest, multibytep)
6125      unsigned char *src;
6126      int src_bytes, highest;
6127      int multibytep;
6128 {
6129   int coding_mask, eol_type;
6130   Lisp_Object val, tmp;
6131   int dummy;
6132
6133   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6134   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6135   if (eol_type == CODING_EOL_INCONSISTENT)
6136     eol_type = CODING_EOL_UNDECIDED;
6137
6138   if (!coding_mask)
6139     {
6140       val = Qundecided;
6141       if (eol_type != CODING_EOL_UNDECIDED)
6142         {
6143           Lisp_Object val2;
6144           val2 = Fget (Qundecided, Qeol_type);
6145           if (VECTORP (val2))
6146             val = XVECTOR (val2)->contents[eol_type];
6147         }
6148       return (highest ? val : Fcons (val, Qnil));
6149     }
6150
6151   /* At first, gather possible coding systems in VAL.  */
6152   val = Qnil;
6153   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6154     {
6155       Lisp_Object category_val, category_index;
6156
6157       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6158       category_val = Fsymbol_value (XCAR (tmp));
6159       if (!NILP (category_val)
6160           && NATNUMP (category_index)
6161           && (coding_mask & (1 << XFASTINT (category_index))))
6162         {
6163           val = Fcons (category_val, val);
6164           if (highest)
6165             break;
6166         }
6167     }
6168   if (!highest)
6169     val = Fnreverse (val);
6170
6171   /* Then, replace the elements with subsidiary coding systems.  */
6172   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6173     {
6174       if (eol_type != CODING_EOL_UNDECIDED
6175           && eol_type != CODING_EOL_INCONSISTENT)
6176         {
6177           Lisp_Object eol;
6178           eol = Fget (XCAR (tmp), Qeol_type);
6179           if (VECTORP (eol))
6180             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
6181         }
6182     }
6183   return (highest ? XCAR (val) : val);
6184 }
6185
6186 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6187        2, 3, 0,
6188   "Detect coding system of the text in the region between START and END.\n\
6189 Return a list of possible coding systems ordered by priority.\n\
6190 \n\
6191 If only ASCII characters are found, it returns a list of single element\n\
6192 `undecided' or its subsidiary coding system according to a detected\n\
6193 end-of-line format.\n\
6194 \n\
6195 If optional argument HIGHEST is non-nil, return the coding system of\n\
6196 highest priority.")
6197   (start, end, highest)
6198      Lisp_Object start, end, highest;
6199 {
6200   int from, to;
6201   int from_byte, to_byte;
6202
6203   CHECK_NUMBER_COERCE_MARKER (start, 0);
6204   CHECK_NUMBER_COERCE_MARKER (end, 1);
6205
6206   validate_region (&start, &end);
6207   from = XINT (start), to = XINT (end);
6208   from_byte = CHAR_TO_BYTE (from);
6209   to_byte = CHAR_TO_BYTE (to);
6210
6211   if (from < GPT && to >= GPT)
6212     move_gap_both (to, to_byte);
6213
6214   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6215                                to_byte - from_byte,
6216                                !NILP (highest),
6217                                !NILP (current_buffer
6218                                       ->enable_multibyte_characters));
6219 }
6220
6221 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6222        1, 2, 0,
6223   "Detect coding system of the text in STRING.\n\
6224 Return a list of possible coding systems ordered by priority.\n\
6225 \n\
6226 If only ASCII characters are found, it returns a list of single element\n\
6227 `undecided' or its subsidiary coding system according to a detected\n\
6228 end-of-line format.\n\
6229 \n\
6230 If optional argument HIGHEST is non-nil, return the coding system of\n\
6231 highest priority.")
6232   (string, highest)
6233      Lisp_Object string, highest;
6234 {
6235   CHECK_STRING (string, 0);
6236
6237   return detect_coding_system (XSTRING (string)->data,
6238                                STRING_BYTES (XSTRING (string)),
6239                                !NILP (highest),
6240                                STRING_MULTIBYTE (string));
6241 }
6242
6243 /* Return an intersection of lists L1 and L2.  */
6244
6245 static Lisp_Object
6246 intersection (l1, l2)
6247      Lisp_Object l1, l2;
6248 {
6249   Lisp_Object val;
6250
6251   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
6252     {
6253       if (!NILP (Fmemq (XCAR (l1), l2)))
6254         val = Fcons (XCAR (l1), val);
6255     }
6256   return val;
6257 }
6258
6259
6260 /*  Subroutine for Fsafe_coding_systems_region_internal.
6261
6262     Return a list of coding systems that safely encode the multibyte
6263     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
6264     possible coding systems.  If it is nil, it means that we have not
6265     yet found any coding systems.
6266
6267     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6268     element of WORK_TABLE is set to t once the element is looked up.
6269
6270     If a non-ASCII single byte char is found, set
6271     *single_byte_char_found to 1.  */
6272
6273 static Lisp_Object
6274 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6275      unsigned char *p, *pend;
6276      Lisp_Object safe_codings, work_table;
6277      int *single_byte_char_found;
6278 {
6279   int c, len, idx;
6280   Lisp_Object val;
6281
6282   while (p < pend)
6283     {
6284       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6285       p += len;
6286       if (ASCII_BYTE_P (c))
6287         /* We can ignore ASCII characters here.  */
6288         continue;
6289       if (SINGLE_BYTE_CHAR_P (c))
6290         *single_byte_char_found = 1;
6291       if (NILP (safe_codings))
6292         continue;
6293       /* Check the safe coding systems for C.  */
6294       val = char_table_ref_and_index (work_table, c, &idx);
6295       if (EQ (val, Qt))
6296         /* This element was already checked.  Ignore it.  */
6297         continue;
6298       /* Remember that we checked this element.  */
6299       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6300
6301       /* If there are some safe coding systems for C and we have
6302          already found the other set of coding systems for the
6303          different characters, get the intersection of them.  */
6304       if (!EQ (safe_codings, Qt) && !NILP (val))
6305         val = intersection (safe_codings, val);
6306       safe_codings = val;
6307     }
6308   return safe_codings;
6309 }
6310
6311
6312 /* Return a list of coding systems that safely encode the text between
6313    START and END.  If the text contains only ASCII or is unibyte,
6314    return t.  */
6315
6316 DEFUN ("find-coding-systems-region-internal",
6317        Ffind_coding_systems_region_internal,
6318        Sfind_coding_systems_region_internal, 2, 2, 0,
6319   "Internal use only.")
6320   (start, end)
6321      Lisp_Object start, end;
6322 {
6323   Lisp_Object work_table, safe_codings;
6324   int non_ascii_p = 0;
6325   int single_byte_char_found = 0;
6326   unsigned char *p1, *p1end, *p2, *p2end, *p;
6327
6328   if (STRINGP (start))
6329     {
6330       if (!STRING_MULTIBYTE (start))
6331         return Qt;
6332       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
6333       p2 = p2end = p1end;
6334       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6335         non_ascii_p = 1;
6336     }
6337   else
6338     {
6339       int from, to, stop;
6340
6341       CHECK_NUMBER_COERCE_MARKER (start, 0);
6342       CHECK_NUMBER_COERCE_MARKER (end, 1);
6343       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6344         args_out_of_range (start, end);
6345       if (NILP (current_buffer->enable_multibyte_characters))
6346         return Qt;
6347       from = CHAR_TO_BYTE (XINT (start));
6348       to = CHAR_TO_BYTE (XINT (end));
6349       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6350       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6351       if (stop == to)
6352         p2 = p2end = p1end;
6353       else
6354         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6355       if (XINT (end) - XINT (start) != to - from)
6356         non_ascii_p = 1;
6357     }
6358
6359   if (!non_ascii_p)
6360     {
6361       /* We are sure that the text contains no multibyte character.
6362          Check if it contains eight-bit-graphic.  */
6363       p = p1;
6364       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6365       if (p == p1end)
6366         {
6367           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6368           if (p == p2end)
6369             return Qt;
6370         }
6371     }
6372
6373   /* The text contains non-ASCII characters.  */
6374   work_table = Fcopy_sequence (Vchar_coding_system_table);
6375   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6376                                     &single_byte_char_found);
6377   if (p2 < p2end)
6378     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6379                                       &single_byte_char_found);
6380
6381   if (!single_byte_char_found)
6382     {
6383       /* Append generic coding systems.  */
6384       Lisp_Object args[2];
6385       args[0] = safe_codings;
6386       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6387                                         make_number (0));
6388       safe_codings = Fappend (2, args);
6389     }
6390   else
6391     safe_codings = Fcons (Qraw_text,
6392                           Fcons (Qemacs_mule,
6393                                  Fcons (Qno_conversion, safe_codings)));
6394   return safe_codings;
6395 }
6396
6397
6398 Lisp_Object
6399 code_convert_region1 (start, end, coding_system, encodep)
6400      Lisp_Object start, end, coding_system;
6401      int encodep;
6402 {
6403   struct coding_system coding;
6404   int from, to;
6405
6406   CHECK_NUMBER_COERCE_MARKER (start, 0);
6407   CHECK_NUMBER_COERCE_MARKER (end, 1);
6408   CHECK_SYMBOL (coding_system, 2);
6409
6410   validate_region (&start, &end);
6411   from = XFASTINT (start);
6412   to = XFASTINT (end);
6413
6414   if (NILP (coding_system))
6415     return make_number (to - from);
6416
6417   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6418     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6419
6420   coding.mode |= CODING_MODE_LAST_BLOCK;
6421   coding.src_multibyte = coding.dst_multibyte
6422     = !NILP (current_buffer->enable_multibyte_characters);
6423   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6424                        &coding, encodep, 1);
6425   Vlast_coding_system_used = coding.symbol;
6426   return make_number (coding.produced_char);
6427 }
6428
6429 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6430        3, 3, "r\nzCoding system: ",
6431   "Decode the current region from the specified coding system.\n\
6432 When called from a program, takes three arguments:\n\
6433 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6434 This function sets `last-coding-system-used' to the precise coding system\n\
6435 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6436 not fully specified.)\n\
6437 It returns the length of the decoded text.")
6438   (start, end, coding_system)
6439      Lisp_Object start, end, coding_system;
6440 {
6441   return code_convert_region1 (start, end, coding_system, 0);
6442 }
6443
6444 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6445        3, 3, "r\nzCoding system: ",
6446   "Encode the current region into the specified coding system.\n\
6447 When called from a program, takes three arguments:\n\
6448 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6449 This function sets `last-coding-system-used' to the precise coding system\n\
6450 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6451 not fully specified.)\n\
6452 It returns the length of the encoded text.")
6453   (start, end, coding_system)
6454      Lisp_Object start, end, coding_system;
6455 {
6456   return code_convert_region1 (start, end, coding_system, 1);
6457 }
6458
6459 Lisp_Object
6460 code_convert_string1 (string, coding_system, nocopy, encodep)
6461      Lisp_Object string, coding_system, nocopy;
6462      int encodep;
6463 {
6464   struct coding_system coding;
6465
6466   CHECK_STRING (string, 0);
6467   CHECK_SYMBOL (coding_system, 1);
6468
6469   if (NILP (coding_system))
6470     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6471
6472   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6473     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6474
6475   coding.mode |= CODING_MODE_LAST_BLOCK;
6476   string = (encodep
6477             ? encode_coding_string (string, &coding, !NILP (nocopy))
6478             : decode_coding_string (string, &coding, !NILP (nocopy)));
6479   Vlast_coding_system_used = coding.symbol;
6480
6481   return string;
6482 }
6483
6484 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6485        2, 3, 0,
6486   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
6487 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
6488 if the decoding operation is trivial.\n\
6489 This function sets `last-coding-system-used' to the precise coding system\n\
6490 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6491 not fully specified.)")
6492   (string, coding_system, nocopy)
6493      Lisp_Object string, coding_system, nocopy;
6494 {
6495   return code_convert_string1 (string, coding_system, nocopy, 0);
6496 }
6497
6498 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6499        2, 3, 0,
6500   "Encode STRING to CODING-SYSTEM, and return the result.\n\
6501 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
6502 if the encoding operation is trivial.\n\
6503 This function sets `last-coding-system-used' to the precise coding system\n\
6504 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6505 not fully specified.)")
6506   (string, coding_system, nocopy)
6507      Lisp_Object string, coding_system, nocopy;
6508 {
6509   return code_convert_string1 (string, coding_system, nocopy, 1);
6510 }
6511
6512 /* Encode or decode STRING according to CODING_SYSTEM.
6513    Do not set Vlast_coding_system_used.
6514
6515    This function is called only from macros DECODE_FILE and
6516    ENCODE_FILE, thus we ignore character composition.  */
6517
6518 Lisp_Object
6519 code_convert_string_norecord (string, coding_system, encodep)
6520      Lisp_Object string, coding_system;
6521      int encodep;
6522 {
6523   struct coding_system coding;
6524
6525   CHECK_STRING (string, 0);
6526   CHECK_SYMBOL (coding_system, 1);
6527
6528   if (NILP (coding_system))
6529     return string;
6530
6531   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6532     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6533
6534   coding.composing = COMPOSITION_DISABLED;
6535   coding.mode |= CODING_MODE_LAST_BLOCK;
6536   return (encodep
6537           ? encode_coding_string (string, &coding, 1)
6538           : decode_coding_string (string, &coding, 1));
6539 }
6540 \f
6541 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6542   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6543 Return the corresponding character.")
6544   (code)
6545      Lisp_Object code;
6546 {
6547   unsigned char c1, c2, s1, s2;
6548   Lisp_Object val;
6549
6550   CHECK_NUMBER (code, 0);
6551   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6552   if (s1 == 0)
6553     {
6554       if (s2 < 0x80)
6555         XSETFASTINT (val, s2);
6556       else if (s2 >= 0xA0 || s2 <= 0xDF)
6557         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6558       else
6559         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6560     }
6561   else
6562     {
6563       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6564           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6565         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6566       DECODE_SJIS (s1, s2, c1, c2);
6567       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6568     }
6569   return val;
6570 }
6571
6572 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6573   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6574 Return the corresponding code in SJIS.")
6575   (ch)
6576      Lisp_Object ch;
6577 {
6578   int charset, c1, c2, s1, s2;
6579   Lisp_Object val;
6580
6581   CHECK_NUMBER (ch, 0);
6582   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6583   if (charset == CHARSET_ASCII)
6584     {
6585       val = ch;
6586     }
6587   else if (charset == charset_jisx0208
6588            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6589     {
6590       ENCODE_SJIS (c1, c2, s1, s2);
6591       XSETFASTINT (val, (s1 << 8) | s2);
6592     }
6593   else if (charset == charset_katakana_jisx0201
6594            && c1 > 0x20 && c2 < 0xE0)
6595     {
6596       XSETFASTINT (val, c1 | 0x80);
6597     }
6598   else
6599     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6600   return val;
6601 }
6602
6603 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6604   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6605 Return the corresponding character.")
6606   (code)
6607      Lisp_Object code;
6608 {
6609   int charset;
6610   unsigned char b1, b2, c1, c2;
6611   Lisp_Object val;
6612
6613   CHECK_NUMBER (code, 0);
6614   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6615   if (b1 == 0)
6616     {
6617       if (b2 >= 0x80)
6618         error ("Invalid BIG5 code: %x", XFASTINT (code));
6619       val = code;
6620     }
6621   else
6622     {
6623       if ((b1 < 0xA1 || b1 > 0xFE)
6624           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6625         error ("Invalid BIG5 code: %x", XFASTINT (code));
6626       DECODE_BIG5 (b1, b2, charset, c1, c2);
6627       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6628     }
6629   return val;
6630 }
6631
6632 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6633   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6634 Return the corresponding character code in Big5.")
6635   (ch)
6636      Lisp_Object ch;
6637 {
6638   int charset, c1, c2, b1, b2;
6639   Lisp_Object val;
6640
6641   CHECK_NUMBER (ch, 0);
6642   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6643   if (charset == CHARSET_ASCII)
6644     {
6645       val = ch;
6646     }
6647   else if ((charset == charset_big5_1
6648             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6649            || (charset == charset_big5_2
6650                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6651     {
6652       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6653       XSETFASTINT (val, (b1 << 8) | b2);
6654     }
6655   else
6656     error ("Can't encode to Big5: %d", XFASTINT (ch));
6657   return val;
6658 }
6659 \f
6660 DEFUN ("set-terminal-coding-system-internal",
6661        Fset_terminal_coding_system_internal,
6662        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6663   (coding_system)
6664      Lisp_Object coding_system;
6665 {
6666   CHECK_SYMBOL (coding_system, 0);
6667   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6668   /* We had better not send unsafe characters to terminal.  */
6669   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6670   /* Characer composition should be disabled.  */
6671   terminal_coding.composing = COMPOSITION_DISABLED;
6672   /* Error notification should be suppressed.  */
6673   terminal_coding.suppress_error = 1;
6674   terminal_coding.src_multibyte = 1;
6675   terminal_coding.dst_multibyte = 0;
6676   return Qnil;
6677 }
6678
6679 DEFUN ("set-safe-terminal-coding-system-internal",
6680        Fset_safe_terminal_coding_system_internal,
6681        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6682   (coding_system)
6683      Lisp_Object coding_system;
6684 {
6685   CHECK_SYMBOL (coding_system, 0);
6686   setup_coding_system (Fcheck_coding_system (coding_system),
6687                        &safe_terminal_coding);
6688   /* Characer composition should be disabled.  */
6689   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6690   /* Error notification should be suppressed.  */
6691   terminal_coding.suppress_error = 1;
6692   safe_terminal_coding.src_multibyte = 1;
6693   safe_terminal_coding.dst_multibyte = 0;
6694   return Qnil;
6695 }
6696
6697 DEFUN ("terminal-coding-system",
6698        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6699   "Return coding system specified for terminal output.")
6700   ()
6701 {
6702   return terminal_coding.symbol;
6703 }
6704
6705 DEFUN ("set-keyboard-coding-system-internal",
6706        Fset_keyboard_coding_system_internal,
6707        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6708   (coding_system)
6709      Lisp_Object coding_system;
6710 {
6711   CHECK_SYMBOL (coding_system, 0);
6712   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6713   /* Characer composition should be disabled.  */
6714   keyboard_coding.composing = COMPOSITION_DISABLED;
6715   return Qnil;
6716 }
6717
6718 DEFUN ("keyboard-coding-system",
6719        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6720   "Return coding system specified for decoding keyboard input.")
6721   ()
6722 {
6723   return keyboard_coding.symbol;
6724 }
6725
6726 \f
6727 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6728        Sfind_operation_coding_system,  1, MANY, 0,
6729   "Choose a coding system for an operation based on the target name.\n\
6730 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6731 DECODING-SYSTEM is the coding system to use for decoding\n\
6732 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6733 for encoding (in case OPERATION does encoding).\n\
6734 \n\
6735 The first argument OPERATION specifies an I/O primitive:\n\
6736   For file I/O, `insert-file-contents' or `write-region'.\n\
6737   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6738   For network I/O, `open-network-stream'.\n\
6739 \n\
6740 The remaining arguments should be the same arguments that were passed\n\
6741 to the primitive.  Depending on which primitive, one of those arguments\n\
6742 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6743 whichever argument specifies the file name is TARGET.\n\
6744 \n\
6745 TARGET has a meaning which depends on OPERATION:\n\
6746   For file I/O, TARGET is a file name.\n\
6747   For process I/O, TARGET is a process name.\n\
6748   For network I/O, TARGET is a service name or a port number\n\
6749 \n\
6750 This function looks up what specified for TARGET in,\n\
6751 `file-coding-system-alist', `process-coding-system-alist',\n\
6752 or `network-coding-system-alist' depending on OPERATION.\n\
6753 They may specify a coding system, a cons of coding systems,\n\
6754 or a function symbol to call.\n\
6755 In the last case, we call the function with one argument,\n\
6756 which is a list of all the arguments given to this function.")
6757   (nargs, args)
6758      int nargs;
6759      Lisp_Object *args;
6760 {
6761   Lisp_Object operation, target_idx, target, val;
6762   register Lisp_Object chain;
6763
6764   if (nargs < 2)
6765     error ("Too few arguments");
6766   operation = args[0];
6767   if (!SYMBOLP (operation)
6768       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6769     error ("Invalid first arguement");
6770   if (nargs < 1 + XINT (target_idx))
6771     error ("Too few arguments for operation: %s",
6772            XSYMBOL (operation)->name->data);
6773   target = args[XINT (target_idx) + 1];
6774   if (!(STRINGP (target)
6775         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6776     error ("Invalid %dth argument", XINT (target_idx) + 1);
6777
6778   chain = ((EQ (operation, Qinsert_file_contents)
6779             || EQ (operation, Qwrite_region))
6780            ? Vfile_coding_system_alist
6781            : (EQ (operation, Qopen_network_stream)
6782               ? Vnetwork_coding_system_alist
6783               : Vprocess_coding_system_alist));
6784   if (NILP (chain))
6785     return Qnil;
6786
6787   for (; CONSP (chain); chain = XCDR (chain))
6788     {
6789       Lisp_Object elt;
6790       elt = XCAR (chain);
6791
6792       if (CONSP (elt)
6793           && ((STRINGP (target)
6794                && STRINGP (XCAR (elt))
6795                && fast_string_match (XCAR (elt), target) >= 0)
6796               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6797         {
6798           val = XCDR (elt);
6799           /* Here, if VAL is both a valid coding system and a valid
6800              function symbol, we return VAL as a coding system.  */
6801           if (CONSP (val))
6802             return val;
6803           if (! SYMBOLP (val))
6804             return Qnil;
6805           if (! NILP (Fcoding_system_p (val)))
6806             return Fcons (val, val);
6807           if (! NILP (Ffboundp (val)))
6808             {
6809               val = call1 (val, Flist (nargs, args));
6810               if (CONSP (val))
6811                 return val;
6812               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6813                 return Fcons (val, val);
6814             }
6815           return Qnil;
6816         }
6817     }
6818   return Qnil;
6819 }
6820
6821 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6822        Supdate_coding_systems_internal, 0, 0, 0,
6823   "Update internal database for ISO2022 and CCL based coding systems.\n\
6824 When values of any coding categories are changed, you must\n\
6825 call this function")
6826   ()
6827 {
6828   int i;
6829
6830   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6831     {
6832       Lisp_Object val;
6833
6834       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6835       if (!NILP (val))
6836         {
6837           if (! coding_system_table[i])
6838             coding_system_table[i] = ((struct coding_system *)
6839                                       xmalloc (sizeof (struct coding_system)));
6840           setup_coding_system (val, coding_system_table[i]);
6841         }
6842       else if (coding_system_table[i])
6843         {
6844           xfree (coding_system_table[i]);
6845           coding_system_table[i] = NULL;
6846         }
6847     }
6848
6849   return Qnil;
6850 }
6851
6852 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6853        Sset_coding_priority_internal, 0, 0, 0,
6854   "Update internal database for the current value of `coding-category-list'.\n\
6855 This function is internal use only.")
6856   ()
6857 {
6858   int i = 0, idx;
6859   Lisp_Object val;
6860
6861   val = Vcoding_category_list;
6862
6863   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6864     {
6865       if (! SYMBOLP (XCAR (val)))
6866         break;
6867       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6868       if (idx >= CODING_CATEGORY_IDX_MAX)
6869         break;
6870       coding_priorities[i++] = (1 << idx);
6871       val = XCDR (val);
6872     }
6873   /* If coding-category-list is valid and contains all coding
6874      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6875      the following code saves Emacs from crashing.  */
6876   while (i < CODING_CATEGORY_IDX_MAX)
6877     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6878
6879   return Qnil;
6880 }
6881
6882 #endif /* emacs */
6883
6884 \f
6885 /*** 9. Post-amble ***/
6886
6887 void
6888 init_coding_once ()
6889 {
6890   int i;
6891
6892   /* Emacs' internal format specific initialize routine.  */
6893   for (i = 0; i <= 0x20; i++)
6894     emacs_code_class[i] = EMACS_control_code;
6895   emacs_code_class[0x0A] = EMACS_linefeed_code;
6896   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6897   for (i = 0x21 ; i < 0x7F; i++)
6898     emacs_code_class[i] = EMACS_ascii_code;
6899   emacs_code_class[0x7F] = EMACS_control_code;
6900   for (i = 0x80; i < 0xFF; i++)
6901     emacs_code_class[i] = EMACS_invalid_code;
6902   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6903   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6904   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6905   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6906
6907   /* ISO2022 specific initialize routine.  */
6908   for (i = 0; i < 0x20; i++)
6909     iso_code_class[i] = ISO_control_0;
6910   for (i = 0x21; i < 0x7F; i++)
6911     iso_code_class[i] = ISO_graphic_plane_0;
6912   for (i = 0x80; i < 0xA0; i++)
6913     iso_code_class[i] = ISO_control_1;
6914   for (i = 0xA1; i < 0xFF; i++)
6915     iso_code_class[i] = ISO_graphic_plane_1;
6916   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6917   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6918   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6919   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6920   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6921   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6922   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6923   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6924   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6925   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6926
6927   setup_coding_system (Qnil, &keyboard_coding);
6928   setup_coding_system (Qnil, &terminal_coding);
6929   setup_coding_system (Qnil, &safe_terminal_coding);
6930   setup_coding_system (Qnil, &default_buffer_file_coding);
6931
6932   bzero (coding_system_table, sizeof coding_system_table);
6933
6934   bzero (ascii_skip_code, sizeof ascii_skip_code);
6935   for (i = 0; i < 128; i++)
6936     ascii_skip_code[i] = 1;
6937
6938 #if defined (MSDOS) || defined (WINDOWSNT)
6939   system_eol_type = CODING_EOL_CRLF;
6940 #else
6941   system_eol_type = CODING_EOL_LF;
6942 #endif
6943
6944   inhibit_pre_post_conversion = 0;
6945 }
6946
6947 #ifdef emacs
6948
6949 void
6950 syms_of_coding ()
6951 {
6952   Qtarget_idx = intern ("target-idx");
6953   staticpro (&Qtarget_idx);
6954
6955   Qcoding_system_history = intern ("coding-system-history");
6956   staticpro (&Qcoding_system_history);
6957   Fset (Qcoding_system_history, Qnil);
6958
6959   /* Target FILENAME is the first argument.  */
6960   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6961   /* Target FILENAME is the third argument.  */
6962   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6963
6964   Qcall_process = intern ("call-process");
6965   staticpro (&Qcall_process);
6966   /* Target PROGRAM is the first argument.  */
6967   Fput (Qcall_process, Qtarget_idx, make_number (0));
6968
6969   Qcall_process_region = intern ("call-process-region");
6970   staticpro (&Qcall_process_region);
6971   /* Target PROGRAM is the third argument.  */
6972   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6973
6974   Qstart_process = intern ("start-process");
6975   staticpro (&Qstart_process);
6976   /* Target PROGRAM is the third argument.  */
6977   Fput (Qstart_process, Qtarget_idx, make_number (2));
6978
6979   Qopen_network_stream = intern ("open-network-stream");
6980   staticpro (&Qopen_network_stream);
6981   /* Target SERVICE is the fourth argument.  */
6982   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6983
6984   Qcoding_system = intern ("coding-system");
6985   staticpro (&Qcoding_system);
6986
6987   Qeol_type = intern ("eol-type");
6988   staticpro (&Qeol_type);
6989
6990   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6991   staticpro (&Qbuffer_file_coding_system);
6992
6993   Qpost_read_conversion = intern ("post-read-conversion");
6994   staticpro (&Qpost_read_conversion);
6995
6996   Qpre_write_conversion = intern ("pre-write-conversion");
6997   staticpro (&Qpre_write_conversion);
6998
6999   Qno_conversion = intern ("no-conversion");
7000   staticpro (&Qno_conversion);
7001
7002   Qundecided = intern ("undecided");
7003   staticpro (&Qundecided);
7004
7005   Qcoding_system_p = intern ("coding-system-p");
7006   staticpro (&Qcoding_system_p);
7007
7008   Qcoding_system_error = intern ("coding-system-error");
7009   staticpro (&Qcoding_system_error);
7010
7011   Fput (Qcoding_system_error, Qerror_conditions,
7012         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7013   Fput (Qcoding_system_error, Qerror_message,
7014         build_string ("Invalid coding system"));
7015
7016   Qcoding_category = intern ("coding-category");
7017   staticpro (&Qcoding_category);
7018   Qcoding_category_index = intern ("coding-category-index");
7019   staticpro (&Qcoding_category_index);
7020
7021   Vcoding_category_table
7022     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7023   staticpro (&Vcoding_category_table);
7024   {
7025     int i;
7026     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7027       {
7028         XVECTOR (Vcoding_category_table)->contents[i]
7029           = intern (coding_category_name[i]);
7030         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7031               Qcoding_category_index, make_number (i));
7032       }
7033   }
7034
7035   Qtranslation_table = intern ("translation-table");
7036   staticpro (&Qtranslation_table);
7037   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7038
7039   Qtranslation_table_id = intern ("translation-table-id");
7040   staticpro (&Qtranslation_table_id);
7041
7042   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7043   staticpro (&Qtranslation_table_for_decode);
7044
7045   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7046   staticpro (&Qtranslation_table_for_encode);
7047
7048   Qsafe_chars = intern ("safe-chars");
7049   staticpro (&Qsafe_chars);
7050
7051   Qchar_coding_system = intern ("char-coding-system");
7052   staticpro (&Qchar_coding_system);
7053
7054   /* Intern this now in case it isn't already done.
7055      Setting this variable twice is harmless.
7056      But don't staticpro it here--that is done in alloc.c.  */
7057   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7058   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7059   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
7060
7061   Qvalid_codes = intern ("valid-codes");
7062   staticpro (&Qvalid_codes);
7063
7064   Qemacs_mule = intern ("emacs-mule");
7065   staticpro (&Qemacs_mule);
7066
7067   Qraw_text = intern ("raw-text");
7068   staticpro (&Qraw_text);
7069
7070   defsubr (&Scoding_system_p);
7071   defsubr (&Sread_coding_system);
7072   defsubr (&Sread_non_nil_coding_system);
7073   defsubr (&Scheck_coding_system);
7074   defsubr (&Sdetect_coding_region);
7075   defsubr (&Sdetect_coding_string);
7076   defsubr (&Sfind_coding_systems_region_internal);
7077   defsubr (&Sdecode_coding_region);
7078   defsubr (&Sencode_coding_region);
7079   defsubr (&Sdecode_coding_string);
7080   defsubr (&Sencode_coding_string);
7081   defsubr (&Sdecode_sjis_char);
7082   defsubr (&Sencode_sjis_char);
7083   defsubr (&Sdecode_big5_char);
7084   defsubr (&Sencode_big5_char);
7085   defsubr (&Sset_terminal_coding_system_internal);
7086   defsubr (&Sset_safe_terminal_coding_system_internal);
7087   defsubr (&Sterminal_coding_system);
7088   defsubr (&Sset_keyboard_coding_system_internal);
7089   defsubr (&Skeyboard_coding_system);
7090   defsubr (&Sfind_operation_coding_system);
7091   defsubr (&Supdate_coding_systems_internal);
7092   defsubr (&Sset_coding_priority_internal);
7093
7094   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7095     "List of coding systems.\n\
7096 \n\
7097 Do not alter the value of this variable manually.  This variable should be\n\
7098 updated by the functions `make-coding-system' and\n\
7099 `define-coding-system-alias'.");
7100   Vcoding_system_list = Qnil;
7101
7102   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7103     "Alist of coding system names.\n\
7104 Each element is one element list of coding system name.\n\
7105 This variable is given to `completing-read' as TABLE argument.\n\
7106 \n\
7107 Do not alter the value of this variable manually.  This variable should be\n\
7108 updated by the functions `make-coding-system' and\n\
7109 `define-coding-system-alias'.");
7110   Vcoding_system_alist = Qnil;
7111
7112   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7113     "List of coding-categories (symbols) ordered by priority.");
7114   {
7115     int i;
7116
7117     Vcoding_category_list = Qnil;
7118     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7119       Vcoding_category_list
7120         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7121                  Vcoding_category_list);
7122   }
7123
7124   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7125     "Specify the coding system for read operations.\n\
7126 It is useful to bind this variable with `let', but do not set it globally.\n\
7127 If the value is a coding system, it is used for decoding on read operation.\n\
7128 If not, an appropriate element is used from one of the coding system alists:\n\
7129 There are three such tables, `file-coding-system-alist',\n\
7130 `process-coding-system-alist', and `network-coding-system-alist'.");
7131   Vcoding_system_for_read = Qnil;
7132
7133   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7134     "Specify the coding system for write operations.\n\
7135 Programs bind this variable with `let', but you should not set it globally.\n\
7136 If the value is a coding system, it is used for encoding of output,\n\
7137 when writing it to a file and when sending it to a file or subprocess.\n\
7138 \n\
7139 If this does not specify a coding system, an appropriate element\n\
7140 is used from one of the coding system alists:\n\
7141 There are three such tables, `file-coding-system-alist',\n\
7142 `process-coding-system-alist', and `network-coding-system-alist'.\n\
7143 For output to files, if the above procedure does not specify a coding system,\n\
7144 the value of `buffer-file-coding-system' is used.");
7145   Vcoding_system_for_write = Qnil;
7146
7147   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7148     "Coding system used in the latest file or process I/O.");
7149   Vlast_coding_system_used = Qnil;
7150
7151   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7152     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
7153 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
7154 such conversion.");
7155   inhibit_eol_conversion = 0;
7156
7157   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7158     "Non-nil means process buffer inherits coding system of process output.\n\
7159 Bind it to t if the process output is to be treated as if it were a file\n\
7160 read from some filesystem.");
7161   inherit_process_coding_system = 0;
7162
7163   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7164     "Alist to decide a coding system to use for a file I/O operation.\n\
7165 The format is ((PATTERN . VAL) ...),\n\
7166 where PATTERN is a regular expression matching a file name,\n\
7167 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7168 If VAL is a coding system, it is used for both decoding and encoding\n\
7169 the file contents.\n\
7170 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7171 and the cdr part is used for encoding.\n\
7172 If VAL is a function symbol, the function must return a coding system\n\
7173 or a cons of coding systems which are used as above.\n\
7174 \n\
7175 See also the function `find-operation-coding-system'\n\
7176 and the variable `auto-coding-alist'.");
7177   Vfile_coding_system_alist = Qnil;
7178
7179   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7180     "Alist to decide a coding system to use for a process I/O operation.\n\
7181 The format is ((PATTERN . VAL) ...),\n\
7182 where PATTERN is a regular expression matching a program name,\n\
7183 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7184 If VAL is a coding system, it is used for both decoding what received\n\
7185 from the program and encoding what sent to the program.\n\
7186 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7187 and the cdr part is used for encoding.\n\
7188 If VAL is a function symbol, the function must return a coding system\n\
7189 or a cons of coding systems which are used as above.\n\
7190 \n\
7191 See also the function `find-operation-coding-system'.");
7192   Vprocess_coding_system_alist = Qnil;
7193
7194   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7195     "Alist to decide a coding system to use for a network I/O operation.\n\
7196 The format is ((PATTERN . VAL) ...),\n\
7197 where PATTERN is a regular expression matching a network service name\n\
7198 or is a port number to connect to,\n\
7199 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7200 If VAL is a coding system, it is used for both decoding what received\n\
7201 from the network stream and encoding what sent to the network stream.\n\
7202 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7203 and the cdr part is used for encoding.\n\
7204 If VAL is a function symbol, the function must return a coding system\n\
7205 or a cons of coding systems which are used as above.\n\
7206 \n\
7207 See also the function `find-operation-coding-system'.");
7208   Vnetwork_coding_system_alist = Qnil;
7209
7210   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7211     "Coding system to use with system messages.");
7212   Vlocale_coding_system = Qnil;
7213
7214   /* The eol mnemonics are reset in startup.el system-dependently.  */
7215   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7216     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
7217   eol_mnemonic_unix = build_string (":");
7218
7219   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7220     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
7221   eol_mnemonic_dos = build_string ("\\");
7222
7223   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7224     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
7225   eol_mnemonic_mac = build_string ("/");
7226
7227   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7228     "*String displayed in mode line when end-of-line format is not yet determined.");
7229   eol_mnemonic_undecided = build_string (":");
7230
7231   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7232     "*Non-nil enables character translation while encoding and decoding.");
7233   Venable_character_translation = Qt;
7234
7235   DEFVAR_LISP ("standard-translation-table-for-decode",
7236     &Vstandard_translation_table_for_decode,
7237     "Table for translating characters while decoding.");
7238   Vstandard_translation_table_for_decode = Qnil;
7239
7240   DEFVAR_LISP ("standard-translation-table-for-encode",
7241     &Vstandard_translation_table_for_encode,
7242     "Table for translationg characters while encoding.");
7243   Vstandard_translation_table_for_encode = Qnil;
7244
7245   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7246     "Alist of charsets vs revision numbers.\n\
7247 While encoding, if a charset (car part of an element) is found,\n\
7248 designate it with the escape sequence identifing revision (cdr part of the element).");
7249   Vcharset_revision_alist = Qnil;
7250
7251   DEFVAR_LISP ("default-process-coding-system",
7252                &Vdefault_process_coding_system,
7253     "Cons of coding systems used for process I/O by default.\n\
7254 The car part is used for decoding a process output,\n\
7255 the cdr part is used for encoding a text to be sent to a process.");
7256   Vdefault_process_coding_system = Qnil;
7257
7258   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7259     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
7260 This is a vector of length 256.\n\
7261 If Nth element is non-nil, the existence of code N in a file\n\
7262 \(or output of subprocess) doesn't prevent it to be detected as\n\
7263 a coding system of ISO 2022 variant which has a flag\n\
7264 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
7265 or reading output of a subprocess.\n\
7266 Only 128th through 159th elements has a meaning.");
7267   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7268
7269   DEFVAR_LISP ("select-safe-coding-system-function",
7270                &Vselect_safe_coding_system_function,
7271     "Function to call to select safe coding system for encoding a text.\n\
7272 \n\
7273 If set, this function is called to force a user to select a proper\n\
7274 coding system which can encode the text in the case that a default\n\
7275 coding system used in each operation can't encode the text.\n\
7276 \n\
7277 The default value is `select-safe-coding-system' (which see).");
7278   Vselect_safe_coding_system_function = Qnil;
7279
7280   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7281     "Char-table containing safe coding systems of each characters.\n\
7282 Each element doesn't include such generic coding systems that can\n\
7283 encode any characters.   They are in the first extra slot.");
7284   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7285
7286   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7287                &inhibit_iso_escape_detection,
7288     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
7289 \n\
7290 By default, on reading a file, Emacs tries to detect how the text is\n\
7291 encoded.  This code detection is sensitive to escape sequences.  If\n\
7292 the sequence is valid as ISO2022, the code is determined as one of\n\
7293 the ISO2022 encodings, and the file is decoded by the corresponding\n\
7294 coding system (e.g. `iso-2022-7bit').\n\
7295 \n\
7296 However, there may be a case that you want to read escape sequences in\n\
7297 a file as is.  In such a case, you can set this variable to non-nil.\n\
7298 Then, as the code detection ignores any escape sequences, no file is\n\
7299 detected as encoded in some ISO2022 encoding.  The result is that all\n\
7300 escape sequences become visible in a buffer.\n\
7301 \n\
7302 The default value is nil, and it is strongly recommended not to change\n\
7303 it.  That is because many Emacs Lisp source files that contain\n\
7304 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
7305 in Emacs's distribution, and they won't be decoded correctly on\n\
7306 reading if you suppress escape sequence detection.\n\
7307 \n\
7308 The other way to read escape sequences in a file without decoding is\n\
7309 to explicitly specify some coding system that doesn't use ISO2022's\n\
7310 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
7311   inhibit_iso_escape_detection = 0;
7312 }
7313
7314 char *
7315 emacs_strerror (error_number)
7316      int error_number;
7317 {
7318   char *str;
7319
7320   synchronize_system_messages_locale ();
7321   str = strerror (error_number);
7322
7323   if (! NILP (Vlocale_coding_system))
7324     {
7325       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7326                                                       Vlocale_coding_system,
7327                                                       0);
7328       str = (char *) XSTRING (dec)->data;
7329     }
7330
7331   return str;
7332 }
7333
7334 #endif /* emacs */
7335