src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Flag to tell if we look up unification table on character code
 371    conversion.  */
 372 Lisp_Object Venable_character_unification;
 373 /* Standard unification table to look up on decoding (reading).  */
 374 Lisp_Object Vstandard_character_unification_table_for_decode;
 375 /* Standard unification table to look up on encoding (writing).  */
 376 Lisp_Object Vstandard_character_unification_table_for_encode;
 377
 378 Lisp_Object Qcharacter_unification_table;
 379 Lisp_Object Qcharacter_unification_table_for_decode;
 380 Lisp_Object Qcharacter_unification_table_for_encode;
 381
 382 /* Alist of charsets vs revision number.  */
 383 Lisp_Object Vcharset_revision_alist;
 384
 385 /* Default coding systems used for process I/O.  */
 386 Lisp_Object Vdefault_process_coding_system;
 387
 388 \f
 389 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 390
 391 /* Emacs' internal format for encoding multiple character sets is a
 392    kind of multi-byte encoding, i.e. characters are encoded by
 393    variable-length sequences of one-byte codes.  ASCII characters
 394    and control characters (e.g. `tab', `newline') are represented by
 395    one-byte sequences which are their ASCII codes, in the range 0x00
 396    through 0x7F.  The other characters are represented by a sequence
 397    of `base leading-code', optional `extended leading-code', and one
 398    or two `position-code's.  The length of the sequence is determined
 399    by the base leading-code.  Leading-code takes the range 0x80
 400    through 0x9F, whereas extended leading-code and position-code take
 401    the range 0xA0 through 0xFF.  See `charset.h' for more details
 402    about leading-code and position-code.
 403
 404    There's one exception to this rule.  Special leading-code
 405    `leading-code-composition' denotes that the following several
 406    characters should be composed into one character.  Leading-codes of
 407    components (except for ASCII) are added 0x20.  An ASCII character
 408    component is represented by a 2-byte sequence of `0xA0' and
 409    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 410    details of composite character.  Hence, we can summarize the code
 411    range as follows:
 412
 413    --- CODE RANGE of Emacs' internal format ---
 414    (character set)      (range)
 415    ASCII                0x00 .. 0x7F
 416    ELSE (1st byte)      0x80 .. 0x9F
 417         (rest bytes)    0xA0 .. 0xFF
 418    ---------------------------------------------
 419
 420   */
 421
 422 enum emacs_code_class_type emacs_code_class[256];
 423
 424 /* Go to the next statement only if *SRC is accessible and the code is
 425    greater than 0xA0.  */
 426 #define CHECK_CODE_RANGE_A0_FF  \
 427   do {                          \
 428     if (src >= src_end)         \
 429       goto label_end_of_switch; \
 430     else if (*src++ < 0xA0)     \
 431       return 0;                 \
 432   } while (0)
 433
 434 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 435    Check if a text is encoded in Emacs' internal format.  If it is,
 436    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 437
 438 int
 439 detect_coding_emacs_mule (src, src_end)
 440      unsigned char *src, *src_end;
 441 {
 442   unsigned char c;
 443   int composing = 0;
 444
 445   while (src < src_end)
 446     {
 447       c = *src++;
 448
 449       if (composing)
 450         {
 451           if (c < 0xA0)
 452             composing = 0;
 453           else
 454             c -= 0x20;
 455         }
 456
 457       switch (emacs_code_class[c])
 458         {
 459         case EMACS_ascii_code:
 460         case EMACS_linefeed_code:
 461           break;
 462
 463         case EMACS_control_code:
 464           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 465             return 0;
 466           break;
 467
 468         case EMACS_invalid_code:
 469           return 0;
 470
 471         case EMACS_leading_code_composition: /* c == 0x80 */
 472           if (composing)
 473             CHECK_CODE_RANGE_A0_FF;
 474           else
 475             composing = 1;
 476           break;
 477
 478         case EMACS_leading_code_4:
 479           CHECK_CODE_RANGE_A0_FF;
 480           /* fall down to check it two more times ...  */
 481
 482         case EMACS_leading_code_3:
 483           CHECK_CODE_RANGE_A0_FF;
 484           /* fall down to check it one more time ...  */
 485
 486         case EMACS_leading_code_2:
 487           CHECK_CODE_RANGE_A0_FF;
 488           break;
 489
 490         default:
 491         label_end_of_switch:
 492           break;
 493         }
 494     }
 495   return CODING_CATEGORY_MASK_EMACS_MULE;
 496 }
 497
 498 \f
 499 /*** 3. ISO2022 handlers ***/
 500
 501 /* The following note describes the coding system ISO2022 briefly.
 502    Since the intention of this note is to help in understanding of
 503    the programs in this file, some parts are NOT ACCURATE or OVERLY
 504    SIMPLIFIED.  For the thorough understanding, please refer to the
 505    original document of ISO2022.
 506
 507    ISO2022 provides many mechanisms to encode several character sets
 508    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 509    all text is encoded by codes of less than 128.  This may make the
 510    encoded text a little bit longer, but the text gets more stability
 511    to pass through several gateways (some of them strip off the MSB).
 512
 513    There are two kinds of character set: control character set and
 514    graphic character set.  The former contains control characters such
 515    as `newline' and `escape' to provide control functions (control
 516    functions are provided also by escape sequences).  The latter
 517    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 518    two control character sets and many graphic character sets.
 519
 520    Graphic character sets are classified into one of the following
 521    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 522    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 523    bytes (DIMENSION) and the number of characters in one dimension
 524    (CHARS) of the set.  In addition, each character set is assigned an
 525    identification tag (called "final character" and denoted as <F>
 526    here after) which is unique in each class.  <F> of each character
 527    set is decided by ECMA(*) when it is registered in ISO.  Code range
 528    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 529
 530    Note (*): ECMA = European Computer Manufacturers Association
 531
 532    Here are examples of graphic character set [NAME(<F>)]:
 533         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 534         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 535         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 536         o DIMENSION2_CHARS96 -- none for the moment
 537
 538    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 539         C0 [0x00..0x1F] -- control character plane 0
 540         GL [0x20..0x7F] -- graphic character plane 0
 541         C1 [0x80..0x9F] -- control character plane 1
 542         GR [0xA0..0xFF] -- graphic character plane 1
 543
 544    A control character set is directly designated and invoked to C0 or
 545    C1 by an escape sequence.  The most common case is that ISO646's
 546    control character set is designated/invoked to C0 and ISO6429's
 547    control character set is designated/invoked to C1, and usually
 548    these designations/invocations are omitted in a coded text.  With
 549    7-bit environment, only C0 can be used, and a control character for
 550    C1 is encoded by an appropriate escape sequence to fit in the
 551    environment.  All control characters for C1 are defined the
 552    corresponding escape sequences.
 553
 554    A graphic character set is at first designated to one of four
 555    graphic registers (G0 through G3), then these graphic registers are
 556    invoked to GL or GR.  These designations and invocations can be
 557    done independently.  The most common case is that G0 is invoked to
 558    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 559    these invocations and designations are omitted in a coded text.
 560    With 7-bit environment, only GL can be used.
 561
 562    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 563    and 0x7F of GL area work as control characters SPACE and DEL
 564    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 565
 566    There are two ways of invocation: locking-shift and single-shift.
 567    With locking-shift, the invocation lasts until the next different
 568    invocation, whereas with single-shift, the invocation works only
 569    for the following character and doesn't affect locking-shift.
 570    Invocations are done by the following control characters or escape
 571    sequences.
 572
 573    ----------------------------------------------------------------------
 574    function             control char    escape sequence description
 575    ----------------------------------------------------------------------
 576    SI  (shift-in)               0x0F    none            invoke G0 to GL
 577    SO  (shift-out)              0x0E    none            invoke G1 to GL
 578    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 579    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 580    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 581    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 582    ----------------------------------------------------------------------
 583    The first four are for locking-shift.  Control characters for these
 584    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 585
 586    Designations are done by the following escape sequences.
 587    ----------------------------------------------------------------------
 588    escape sequence      description
 589    ----------------------------------------------------------------------
 590    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 591    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 592    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 593    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 594    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 595    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 596    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 597    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 598    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 599    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 600    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 601    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 602    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 603    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 604    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 605    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 606    ----------------------------------------------------------------------
 607
 608    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 609    of dimension 1, chars 94, and final character <F>, and etc.
 610
 611    Note (*): Although these designations are not allowed in ISO2022,
 612    Emacs accepts them on decoding, and produces them on encoding
 613    CHARS96 character set in a coding system which is characterized as
 614    7-bit environment, non-locking-shift, and non-single-shift.
 615
 616    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 617    '(' can be omitted.  We call this as "short-form" here after.
 618
 619    Now you may notice that there are a lot of ways for encoding the
 620    same multilingual text in ISO2022.  Actually, there exists many
 621    coding systems such as Compound Text (used in X's inter client
 622    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 623    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 624    localized platforms), and all of these are variants of ISO2022.
 625
 626    In addition to the above, Emacs handles two more kinds of escape
 627    sequences: ISO6429's direction specification and Emacs' private
 628    sequence for specifying character composition.
 629
 630    ISO6429's direction specification takes the following format:
 631         o CSI ']'      -- end of the current direction
 632         o CSI '0' ']'  -- end of the current direction
 633         o CSI '1' ']'  -- start of left-to-right text
 634         o CSI '2' ']'  -- start of right-to-left text
 635    The control character CSI (0x9B: control sequence introducer) is
 636    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 637
 638    Character composition specification takes the following format:
 639         o ESC '0' -- start character composition
 640         o ESC '1' -- end character composition
 641    Since these are not standard escape sequences of any ISO, the use
 642    of them for these meaning is restricted to Emacs only.  */
 643
 644 enum iso_code_class_type iso_code_class[256];
 645
 646 #define CHARSET_OK(idx, charset)                        \
 647   (coding_system_table[idx]->safe_charsets[charset]     \
 648    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 649        (coding_system_table[idx], charset)              \
 650        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 651
 652 #define SHIFT_OUT_OK(idx) \
 653   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 654
 655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 656    Check if a text is encoded in ISO2022.  If it is, returns an
 657    integer in which appropriate flag bits any of:
 658         CODING_CATEGORY_MASK_ISO_7
 659         CODING_CATEGORY_MASK_ISO_7_TIGHT
 660         CODING_CATEGORY_MASK_ISO_8_1
 661         CODING_CATEGORY_MASK_ISO_8_2
 662         CODING_CATEGORY_MASK_ISO_7_ELSE
 663         CODING_CATEGORY_MASK_ISO_8_ELSE
 664    are set.  If a code which should never appear in ISO2022 is found,
 665    returns 0.  */
 666
 667 int
 668 detect_coding_iso2022 (src, src_end)
 669      unsigned char *src, *src_end;
 670 {
 671   int mask = CODING_CATEGORY_MASK_ISO;
 672   int mask_found = 0;
 673   int reg[4], shift_out = 0;
 674   int c, c1, i, charset;
 675
 676   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 677   while (mask && src < src_end)
 678     {
 679       c = *src++;
 680       switch (c)
 681         {
 682         case ISO_CODE_ESC:
 683           if (src >= src_end)
 684             break;
 685           c = *src++;
 686           if (c >= '(' && c <= '/')
 687             {
 688               /* Designation sequence for a charset of dimension 1.  */
 689               if (src >= src_end)
 690                 break;
 691               c1 = *src++;
 692               if (c1 < ' ' || c1 >= 0x80
 693                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 694                 /* Invalid designation sequence.  Just ignore.  */
 695                 break;
 696               reg[(c - '(') % 4] = charset;
 697             }
 698           else if (c == '$')
 699             {
 700               /* Designation sequence for a charset of dimension 2.  */
 701               if (src >= src_end)
 702                 break;
 703               c = *src++;
 704               if (c >= '@' && c <= 'B')
 705                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 706                 reg[0] = charset = iso_charset_table[1][0][c];
 707               else if (c >= '(' && c <= '/')
 708                 {
 709                   if (src >= src_end)
 710                     break;
 711                   c1 = *src++;
 712                   if (c1 < ' ' || c1 >= 0x80
 713                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 714                     /* Invalid designation sequence.  Just ignore.  */
 715                     break;
 716                   reg[(c - '(') % 4] = charset;
 717                 }
 718               else
 719                 /* Invalid designation sequence.  Just ignore.  */
 720                 break;
 721             }
 722           else if (c == 'N' || c == 'n')
 723             {
 724               if (shift_out == 0
 725                   && (reg[1] >= 0
 726                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 727                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 728                 {
 729                   /* Locking shift out.  */
 730                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 731                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 732                   shift_out = 1;
 733                 }
 734               break;
 735             }
 736           else if (c == 'O' || c == 'o')
 737             {
 738               if (shift_out == 1)
 739                 {
 740                   /* Locking shift in.  */
 741                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 742                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 743                   shift_out = 0;
 744                 }
 745               break;
 746             }
 747           else if (c == '0' || c == '1' || c == '2')
 748             /* Start/end composition.  Just ignore.  */
 749             break;
 750           else
 751             /* Invalid escape sequence.  Just ignore.  */
 752             break;
 753
 754           /* We found a valid designation sequence for CHARSET.  */
 755           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 756           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 757             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 758           else
 759             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 760           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 761             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 762           else
 763             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 764           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 766           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 767             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 768           break;
 769
 770         case ISO_CODE_SO:
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           if (shift_out == 1)
 784             {
 785               /* Locking shift in.  */
 786               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 787               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 788             }
 789           break;
 790
 791         case ISO_CODE_CSI:
 792         case ISO_CODE_SS2:
 793         case ISO_CODE_SS3:
 794           {
 795             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 796
 797             if (c != ISO_CODE_CSI)
 798               {
 799                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 800                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 801                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 802                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 803                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 804                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 805               }
 806             if (VECTORP (Vlatin_extra_code_table)
 807                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 808               {
 809                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 810                     & CODING_FLAG_ISO_LATIN_EXTRA)
 811                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 815               }
 816             mask &= newmask;
 817             mask_found |= newmask;
 818           }
 819           break;
 820
 821         default:
 822           if (c < 0x80)
 823             break;
 824           else if (c < 0xA0)
 825             {
 826               if (VECTORP (Vlatin_extra_code_table)
 827                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 828                 {
 829                   int newmask = 0;
 830
 831                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 832                       & CODING_FLAG_ISO_LATIN_EXTRA)
 833                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 834                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 835                       & CODING_FLAG_ISO_LATIN_EXTRA)
 836                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 837                   mask &= newmask;
 838                   mask_found |= newmask;
 839                 }
 840               else
 841                 return 0;
 842             }
 843           else
 844             {
 845               unsigned char *src_begin = src;
 846
 847               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 848                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 849               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 850               while (src < src_end && *src >= 0xA0)
 851                 src++;
 852               if ((src - src_begin - 1) & 1 && src < src_end)
 853                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 854               else
 855                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 856             }
 857           break;
 858         }
 859     }
 860
 861   return (mask & mask_found);
 862 }
 863
 864 /* Decode a character of which charset is CHARSET and the 1st position
 865    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 866    fetched from SRC and set to C2.  If CHARSET is negative, it means
 867    that we are decoding ill formed text, and what we can do is just to
 868    read C1 as is.  */
 869
 870 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 871   do {                                                                  \
 872     int c_alt, charset_alt = (charset);                                 \
 873     if (COMPOSING_HEAD_P (coding->composing))                           \
 874       {                                                                 \
 875         *dst++ = LEADING_CODE_COMPOSITION;                              \
 876         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 877           /* To tell composition rules are embeded.  */                 \
 878           *dst++ = 0xFF;                                                \
 879         coding->composing += 2;                                         \
 880       }                                                                 \
 881     if ((charset) >= 0)                                                 \
 882       {                                                                 \
 883         if (CHARSET_DIMENSION (charset) == 2)                           \
 884           {                                                             \
 885             ONE_MORE_BYTE (c2);                                         \
 886             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 887                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 888               {                                                         \
 889                 src--;                                                  \
 890                 c2 = ' ';                                               \
 891               }                                                         \
 892           }                                                             \
 893         if (!NILP (unification_table)                                   \
 894             && ((c_alt = unify_char (unification_table,                 \
 895                                      -1, (charset), c1, c2)) >= 0))     \
 896           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 897       }                                                                 \
 898     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 899       DECODE_CHARACTER_ASCII (c1);                                      \
 900     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 901       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 902     else                                                                \
 903       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 904     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 905       /* To tell a composition rule follows.  */                        \
 906       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 907   } while (0)
 908
 909 /* Set designation state into CODING.  */
 910 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 911   do {                                                                     \
 912     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 913                                      make_number (chars),                  \
 914                                      make_number (final_char));            \
 915     if (charset >= 0                                                       \
 916         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 917             || coding->safe_charsets[charset]))                            \
 918       {                                                                    \
 919         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 920             && reg == 0                                                    \
 921             && charset == CHARSET_ASCII)                                   \
 922           {                                                                \
 923             /* We should insert this designation sequence as is so         \
 924                that it is surely written back to a file.  */               \
 925             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 926             goto label_invalid_code;                                       \
 927           }                                                                \
 928         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 929         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 930             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 931           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 932         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 933       }                                                                    \
 934     else                                                                   \
 935       {                                                                    \
 936         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 937         goto label_invalid_code;                                           \
 938       }                                                                    \
 939   } while (0)
 940
 941 /* Check if the current composing sequence contains only valid codes.
 942    If the composing sequence doesn't end before SRC_END, return -1.
 943    Else, if it contains only valid codes, return 0.
 944    Else return the length of the composing sequence.  */
 945
 946 int check_composing_code (coding, src, src_end)
 947      struct coding_system *coding;
 948      unsigned char *src, *src_end;
 949 {
 950   unsigned char *src_start = src;
 951   int invalid_code_found = 0;
 952   int charset, c, c1, dim;
 953
 954   while (src < src_end)
 955     {
 956       if (*src++ != ISO_CODE_ESC) continue;
 957       if (src >= src_end) break;
 958       if ((c = *src++) == '1') /* end of compsition */
 959         return (invalid_code_found ? src - src_start : 0);
 960       if (src + 2 >= src_end) break;
 961       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 962         invalid_code_found = 1;
 963       else
 964         {
 965           dim = 0;
 966           if (c == '$')
 967             {
 968               dim = 1;
 969               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 970             }
 971           if (c >= '(' && c <= '/')
 972             {
 973               c1 = *src++;
 974               if ((c1 < ' ' || c1 >= 0x80)
 975                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 976                   || ! coding->safe_charsets[charset]
 977                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 978                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 979                 invalid_code_found = 1;
 980             }
 981           else
 982             invalid_code_found = 1;
 983         }
 984     }
 985   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 986 }
 987
 988 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 989
 990 int
 991 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 992      struct coding_system *coding;
 993      unsigned char *source, *destination;
 994      int src_bytes, dst_bytes;
 995 {
 996   unsigned char *src = source;
 997   unsigned char *src_end = source + src_bytes;
 998   unsigned char *dst = destination;
 999   unsigned char *dst_end = destination + dst_bytes;
1000   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1001      from DST_END to assure that overflow checking is necessary only
1002      at the head of loop.  */
1003   unsigned char *adjusted_dst_end = dst_end - 6;
1004   int charset;
1005   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1006   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1007   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1008   Lisp_Object unification_table
1009     = coding->character_unification_table_for_decode;
1010   int result = CODING_FINISH_NORMAL;
1011
1012   if (!NILP (Venable_character_unification) && NILP (unification_table))
1013     unification_table = Vstandard_character_unification_table_for_decode;
1014
1015   coding->produced_char = 0;
1016   coding->fake_multibyte = 0;
1017   while (src < src_end && (dst_bytes
1018                            ? (dst < adjusted_dst_end)
1019                            : (dst < src - 6)))
1020     {
1021       /* SRC_BASE remembers the start position in source in each loop.
1022          The loop will be exited when there's not enough source text
1023          to analyze long escape sequence or 2-byte code (within macros
1024          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1025          to SRC_BASE before exiting.  */
1026       unsigned char *src_base = src;
1027       int c1 = *src++, c2;
1028
1029       switch (iso_code_class [c1])
1030         {
1031         case ISO_0x20_or_0x7F:
1032           if (!coding->composing
1033               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1034             {
1035               /* This is SPACE or DEL.  */
1036               *dst++ = c1;
1037               coding->produced_char++;
1038               break;
1039             }
1040           /* This is a graphic character, we fall down ...  */
1041
1042         case ISO_graphic_plane_0:
1043           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1044             {
1045               /* This is a composition rule.  */
1046               *dst++ = c1 | 0x80;
1047               coding->composing = COMPOSING_WITH_RULE_TAIL;
1048             }
1049           else
1050             DECODE_ISO_CHARACTER (charset0, c1);
1051           break;
1052
1053         case ISO_0xA0_or_0xFF:
1054           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1055               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1056             goto label_invalid_code;
1057           /* This is a graphic character, we fall down ... */
1058
1059         case ISO_graphic_plane_1:
1060           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1061             goto label_invalid_code;
1062           else
1063             DECODE_ISO_CHARACTER (charset1, c1);
1064           break;
1065
1066         case ISO_control_code:
1067           /* All ISO2022 control characters in this class have the
1068              same representation in Emacs internal format.  */
1069           if (c1 == '\n'
1070               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1071               && (coding->eol_type == CODING_EOL_CR
1072                   || coding->eol_type == CODING_EOL_CRLF))
1073             {
1074               result = CODING_FINISH_INCONSISTENT_EOL;
1075               goto label_end_of_loop_2;
1076             }
1077           *dst++ = c1;
1078           coding->produced_char++;
1079           break;
1080
1081         case ISO_carriage_return:
1082           if (coding->eol_type == CODING_EOL_CR)
1083             *dst++ = '\n';
1084           else if (coding->eol_type == CODING_EOL_CRLF)
1085             {
1086               ONE_MORE_BYTE (c1);
1087               if (c1 == ISO_CODE_LF)
1088                 *dst++ = '\n';
1089               else
1090                 {
1091                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1092                     {
1093                       result = CODING_FINISH_INCONSISTENT_EOL;
1094                       goto label_end_of_loop_2;
1095                     }
1096                   src--;
1097                   *dst++ = '\r';
1098                 }
1099             }
1100           else
1101             *dst++ = c1;
1102           coding->produced_char++;
1103           break;
1104
1105         case ISO_shift_out:
1106           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1107               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1108             goto label_invalid_code;
1109           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1110           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1111           break;
1112
1113         case ISO_shift_in:
1114           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1115             goto label_invalid_code;
1116           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1117           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1118           break;
1119
1120         case ISO_single_shift_2_7:
1121         case ISO_single_shift_2:
1122           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1123             goto label_invalid_code;
1124           /* SS2 is handled as an escape sequence of ESC 'N' */
1125           c1 = 'N';
1126           goto label_escape_sequence;
1127
1128         case ISO_single_shift_3:
1129           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1130             goto label_invalid_code;
1131           /* SS2 is handled as an escape sequence of ESC 'O' */
1132           c1 = 'O';
1133           goto label_escape_sequence;
1134
1135         case ISO_control_sequence_introducer:
1136           /* CSI is handled as an escape sequence of ESC '[' ...  */
1137           c1 = '[';
1138           goto label_escape_sequence;
1139
1140         case ISO_escape:
1141           ONE_MORE_BYTE (c1);
1142         label_escape_sequence:
1143           /* Escape sequences handled by Emacs are invocation,
1144              designation, direction specification, and character
1145              composition specification.  */
1146           switch (c1)
1147             {
1148             case '&':           /* revision of following character set */
1149               ONE_MORE_BYTE (c1);
1150               if (!(c1 >= '@' && c1 <= '~'))
1151                 goto label_invalid_code;
1152               ONE_MORE_BYTE (c1);
1153               if (c1 != ISO_CODE_ESC)
1154                 goto label_invalid_code;
1155               ONE_MORE_BYTE (c1);
1156               goto label_escape_sequence;
1157
1158             case '$':           /* designation of 2-byte character set */
1159               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 >= '@' && c1 <= 'B')
1163                 {       /* designation of JISX0208.1978, GB2312.1980,
1164                                    or JISX0208.1980 */
1165                   DECODE_DESIGNATION (0, 2, 94, c1);
1166                 }
1167               else if (c1 >= 0x28 && c1 <= 0x2B)
1168                 {       /* designation of DIMENSION2_CHARS94 character set */
1169                   ONE_MORE_BYTE (c2);
1170                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1171                 }
1172               else if (c1 >= 0x2C && c1 <= 0x2F)
1173                 {       /* designation of DIMENSION2_CHARS96 character set */
1174                   ONE_MORE_BYTE (c2);
1175                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1176                 }
1177               else
1178                 goto label_invalid_code;
1179               break;
1180
1181             case 'n':           /* invocation of locking-shift-2 */
1182               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1183                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1184                 goto label_invalid_code;
1185               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1186               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1187               break;
1188
1189             case 'o':           /* invocation of locking-shift-3 */
1190               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1191                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1192                 goto label_invalid_code;
1193               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1194               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1195               break;
1196
1197             case 'N':           /* invocation of single-shift-2 */
1198               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1199                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1200                 goto label_invalid_code;
1201               ONE_MORE_BYTE (c1);
1202               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1203               DECODE_ISO_CHARACTER (charset, c1);
1204               break;
1205
1206             case 'O':           /* invocation of single-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case '0': case '2': /* start composing */
1216               /* Before processing composing, we must be sure that all
1217                  characters being composed are supported by CODING.
1218                  If not, we must give up composing and insert the
1219                  bunch of codes for composing as is without decoding.  */
1220               {
1221                 int result1;
1222
1223                 result1 = check_composing_code (coding, src, src_end);
1224                 if (result1 == 0)
1225                   coding->composing = (c1 == '0'
1226                                        ? COMPOSING_NO_RULE_HEAD
1227                                        : COMPOSING_WITH_RULE_HEAD);
1228                 else if (result1 > 0)
1229                   {
1230                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1231                       {
1232                         bcopy (src_base, dst, result1 + 2);
1233                         src += result1;
1234                         dst += result1 + 2;
1235                         coding->produced_char += result1 + 2;
1236                       }
1237                     else
1238                       {
1239                         result = CODING_FINISH_INSUFFICIENT_DST;
1240                         goto label_end_of_loop_2;
1241                       }
1242                   }
1243                 else
1244                   goto label_end_of_loop;
1245               }
1246               break;
1247
1248             case '1':           /* end composing */
1249               coding->composing = COMPOSING_NO;
1250               coding->produced_char++;
1251               break;
1252
1253             case '[':           /* specification of direction */
1254               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1255                 goto label_invalid_code;
1256               /* For the moment, nested direction is not supported.
1257                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1258                  left-to-right, and nozero means right-to-left.  */
1259               ONE_MORE_BYTE (c1);
1260               switch (c1)
1261                 {
1262                 case ']':       /* end of the current direction */
1263                   coding->mode &= ~CODING_MODE_DIRECTION;
1264
1265                 case '0':       /* end of the current direction */
1266                 case '1':       /* start of left-to-right direction */
1267                   ONE_MORE_BYTE (c1);
1268                   if (c1 == ']')
1269                     coding->mode &= ~CODING_MODE_DIRECTION;
1270                   else
1271                     goto label_invalid_code;
1272                   break;
1273
1274                 case '2':       /* start of right-to-left direction */
1275                   ONE_MORE_BYTE (c1);
1276                   if (c1 == ']')
1277                     coding->mode |= CODING_MODE_DIRECTION;
1278                   else
1279                     goto label_invalid_code;
1280                   break;
1281
1282                 default:
1283                   goto label_invalid_code;
1284                 }
1285               break;
1286
1287             default:
1288               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1289                 goto label_invalid_code;
1290               if (c1 >= 0x28 && c1 <= 0x2B)
1291                 {       /* designation of DIMENSION1_CHARS94 character set */
1292                   ONE_MORE_BYTE (c2);
1293                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1294                 }
1295               else if (c1 >= 0x2C && c1 <= 0x2F)
1296                 {       /* designation of DIMENSION1_CHARS96 character set */
1297                   ONE_MORE_BYTE (c2);
1298                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1299                 }
1300               else
1301                 {
1302                   goto label_invalid_code;
1303                 }
1304             }
1305           /* We must update these variables now.  */
1306           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1307           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1308           break;
1309
1310         label_invalid_code:
1311           while (src_base < src)
1312             *dst++ = *src_base++;
1313           coding->fake_multibyte = 1;
1314         }
1315       continue;
1316
1317     label_end_of_loop:
1318       result = CODING_FINISH_INSUFFICIENT_SRC;
1319     label_end_of_loop_2:
1320       src = src_base;
1321       break;
1322     }
1323
1324   if (src < src_end)
1325     {
1326       if (result == CODING_FINISH_NORMAL)
1327         result = CODING_FINISH_INSUFFICIENT_DST;
1328       else if (result != CODING_FINISH_INCONSISTENT_EOL
1329                && coding->mode & CODING_MODE_LAST_BLOCK)
1330         {
1331           /* This is the last block of the text to be decoded.  We had
1332              better just flush out all remaining codes in the text
1333              although they are not valid characters.  */
1334           src_bytes = src_end - src;
1335           if (dst_bytes && (dst_end - dst < src_bytes))
1336             src_bytes = dst_end - dst;
1337           bcopy (src, dst, src_bytes);
1338           dst += src_bytes;
1339           src += src_bytes;
1340           coding->fake_multibyte = 1;
1341         }
1342     }
1343
1344   coding->consumed = coding->consumed_char = src - source;
1345   coding->produced = dst - destination;
1346   return result;
1347 }
1348
1349 /* ISO2022 encoding stuff.  */
1350
1351 /*
1352    It is not enough to say just "ISO2022" on encoding, we have to
1353    specify more details.  In Emacs, each coding system of ISO2022
1354    variant has the following specifications:
1355         1. Initial designation to G0 thru G3.
1356         2. Allows short-form designation?
1357         3. ASCII should be designated to G0 before control characters?
1358         4. ASCII should be designated to G0 at end of line?
1359         5. 7-bit environment or 8-bit environment?
1360         6. Use locking-shift?
1361         7. Use Single-shift?
1362    And the following two are only for Japanese:
1363         8. Use ASCII in place of JIS0201-1976-Roman?
1364         9. Use JISX0208-1983 in place of JISX0208-1978?
1365    These specifications are encoded in `coding->flags' as flag bits
1366    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1367    details.
1368 */
1369
1370 /* Produce codes (escape sequence) for designating CHARSET to graphic
1371    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1372    the coding system CODING allows, produce designation sequence of
1373    short-form.  */
1374
1375 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1376   do {                                                                  \
1377     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1378     char *intermediate_char_94 = "()*+";                                \
1379     char *intermediate_char_96 = ",-./";                                \
1380     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1381     if (revision < 255)                                                 \
1382       {                                                                 \
1383         *dst++ = ISO_CODE_ESC;                                          \
1384         *dst++ = '&';                                                   \
1385         *dst++ = '@' + revision;                                        \
1386       }                                                                 \
1387     *dst++ = ISO_CODE_ESC;                                              \
1388     if (CHARSET_DIMENSION (charset) == 1)                               \
1389       {                                                                 \
1390         if (CHARSET_CHARS (charset) == 94)                              \
1391           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1392         else                                                            \
1393           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1394       }                                                                 \
1395     else                                                                \
1396       {                                                                 \
1397         *dst++ = '$';                                                   \
1398         if (CHARSET_CHARS (charset) == 94)                              \
1399           {                                                             \
1400             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1401                 || reg != 0                                             \
1402                 || final_char < '@' || final_char > 'B')                \
1403               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1404           }                                                             \
1405         else                                                            \
1406           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1407       }                                                                 \
1408     *dst++ = final_char;                                                \
1409     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1410   } while (0)
1411
1412 /* The following two macros produce codes (control character or escape
1413    sequence) for ISO2022 single-shift functions (single-shift-2 and
1414    single-shift-3).  */
1415
1416 #define ENCODE_SINGLE_SHIFT_2                           \
1417   do {                                                  \
1418     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1419       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1420     else                                                \
1421       {                                                 \
1422         *dst++ = ISO_CODE_SS2;                          \
1423         coding->fake_multibyte = 1;                     \
1424       }                                                 \
1425     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1426   } while (0)
1427
1428 #define ENCODE_SINGLE_SHIFT_3                           \
1429   do {                                                  \
1430     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1431       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1432     else                                                \
1433       {                                                 \
1434         *dst++ = ISO_CODE_SS3;                          \
1435         coding->fake_multibyte = 1;                     \
1436       }                                                 \
1437     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1438   } while (0)
1439
1440 /* The following four macros produce codes (control character or
1441    escape sequence) for ISO2022 locking-shift functions (shift-in,
1442    shift-out, locking-shift-2, and locking-shift-3).  */
1443
1444 #define ENCODE_SHIFT_IN                         \
1445   do {                                          \
1446     *dst++ = ISO_CODE_SI;                       \
1447     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1448   } while (0)
1449
1450 #define ENCODE_SHIFT_OUT                        \
1451   do {                                          \
1452     *dst++ = ISO_CODE_SO;                       \
1453     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1454   } while (0)
1455
1456 #define ENCODE_LOCKING_SHIFT_2                  \
1457   do {                                          \
1458     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1459     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1460   } while (0)
1461
1462 #define ENCODE_LOCKING_SHIFT_3                  \
1463   do {                                          \
1464     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1465     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1466   } while (0)
1467
1468 /* Produce codes for a DIMENSION1 character whose character set is
1469    CHARSET and whose position-code is C1.  Designation and invocation
1470    sequences are also produced in advance if necessary.  */
1471
1472
1473 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1474   do {                                                                  \
1475     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1476       {                                                                 \
1477         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1478           *dst++ = c1 & 0x7F;                                           \
1479         else                                                            \
1480           *dst++ = c1 | 0x80;                                           \
1481         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1482         break;                                                          \
1483       }                                                                 \
1484     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1485       {                                                                 \
1486         *dst++ = c1 & 0x7F;                                             \
1487         break;                                                          \
1488       }                                                                 \
1489     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1490       {                                                                 \
1491         *dst++ = c1 | 0x80;                                             \
1492         break;                                                          \
1493       }                                                                 \
1494     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1495              && !coding->safe_charsets[charset])                        \
1496       {                                                                 \
1497         /* We should not encode this character, instead produce one or  \
1498            two `?'s.  */                                                \
1499         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1500         if (CHARSET_WIDTH (charset) == 2)                               \
1501           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1502         break;                                                          \
1503       }                                                                 \
1504     else                                                                \
1505       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1506          must invoke it, or, at first, designate it to some graphic     \
1507          register.  Then repeat the loop to actually produce the        \
1508          character.  */                                                 \
1509       dst = encode_invocation_designation (charset, coding, dst);       \
1510   } while (1)
1511
1512 /* Produce codes for a DIMENSION2 character whose character set is
1513    CHARSET and whose position-codes are C1 and C2.  Designation and
1514    invocation codes are also produced in advance if necessary.  */
1515
1516 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1517   do {                                                                  \
1518     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1519       {                                                                 \
1520         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1521           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1522         else                                                            \
1523           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1524         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1525         break;                                                          \
1526       }                                                                 \
1527     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1528       {                                                                 \
1529         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1530         break;                                                          \
1531       }                                                                 \
1532     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1533       {                                                                 \
1534         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1535         break;                                                          \
1536       }                                                                 \
1537     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1538              && !coding->safe_charsets[charset])                        \
1539       {                                                                 \
1540         /* We should not encode this character, instead produce one or  \
1541            two `?'s.  */                                                \
1542         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1543         if (CHARSET_WIDTH (charset) == 2)                               \
1544           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1545         break;                                                          \
1546       }                                                                 \
1547     else                                                                \
1548       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1549          must invoke it, or, at first, designate it to some graphic     \
1550          register.  Then repeat the loop to actually produce the        \
1551          character.  */                                                 \
1552       dst = encode_invocation_designation (charset, coding, dst);       \
1553   } while (1)
1554
1555 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1556   do {                                                                    \
1557     int c_alt, charset_alt;                                               \
1558     if (!NILP (unification_table)                                         \
1559         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1560             >= 0))                                                        \
1561       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1562     else                                                                  \
1563       charset_alt = charset;                                              \
1564     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1565       {                                                                   \
1566         if (charset == CHARSET_ASCII                                      \
1567             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1568           charset_alt = charset_latin_jisx0201;                           \
1569         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1570       }                                                                   \
1571     else                                                                  \
1572       {                                                                   \
1573         if (charset == charset_jisx0208                                   \
1574             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1575           charset_alt = charset_jisx0208_1978;                            \
1576         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1577       }                                                                   \
1578     if (! COMPOSING_P (coding->composing))                                \
1579       coding->consumed_char++;                                            \
1580      } while (0)
1581
1582 /* Produce designation and invocation codes at a place pointed by DST
1583    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1584    Return new DST.  */
1585
1586 unsigned char *
1587 encode_invocation_designation (charset, coding, dst)
1588      int charset;
1589      struct coding_system *coding;
1590      unsigned char *dst;
1591 {
1592   int reg;                      /* graphic register number */
1593
1594   /* At first, check designations.  */
1595   for (reg = 0; reg < 4; reg++)
1596     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1597       break;
1598
1599   if (reg >= 4)
1600     {
1601       /* CHARSET is not yet designated to any graphic registers.  */
1602       /* At first check the requested designation.  */
1603       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1604       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1605         /* Since CHARSET requests no special designation, designate it
1606            to graphic register 0.  */
1607         reg = 0;
1608
1609       ENCODE_DESIGNATION (charset, reg, coding);
1610     }
1611
1612   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1613       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1614     {
1615       /* Since the graphic register REG is not invoked to any graphic
1616          planes, invoke it to graphic plane 0.  */
1617       switch (reg)
1618         {
1619         case 0:                 /* graphic register 0 */
1620           ENCODE_SHIFT_IN;
1621           break;
1622
1623         case 1:                 /* graphic register 1 */
1624           ENCODE_SHIFT_OUT;
1625           break;
1626
1627         case 2:                 /* graphic register 2 */
1628           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1629             ENCODE_SINGLE_SHIFT_2;
1630           else
1631             ENCODE_LOCKING_SHIFT_2;
1632           break;
1633
1634         case 3:                 /* graphic register 3 */
1635           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1636             ENCODE_SINGLE_SHIFT_3;
1637           else
1638             ENCODE_LOCKING_SHIFT_3;
1639           break;
1640         }
1641     }
1642   return dst;
1643 }
1644
1645 /* The following two macros produce codes for indicating composition.  */
1646 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1647 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1648 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1649
1650 /* The following three macros produce codes for indicating direction
1651    of text.  */
1652 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1653   do {                                                  \
1654     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1655       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1656     else                                                \
1657       *dst++ = ISO_CODE_CSI;                            \
1658   } while (0)
1659
1660 #define ENCODE_DIRECTION_R2L    \
1661   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1662
1663 #define ENCODE_DIRECTION_L2R    \
1664   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1665
1666 /* Produce codes for designation and invocation to reset the graphic
1667    planes and registers to initial state.  */
1668 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1669   do {                                                                      \
1670     int reg;                                                                \
1671     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1672       ENCODE_SHIFT_IN;                                                      \
1673     for (reg = 0; reg < 4; reg++)                                           \
1674       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1675           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1676               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1677         ENCODE_DESIGNATION                                                  \
1678           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1679   } while (0)
1680
1681 /* Produce designation sequences of charsets in the line started from
1682    SRC to a place pointed by *DSTP, and update DSTP.
1683
1684    If the current block ends before any end-of-line, we may fail to
1685    find all the necessary designations.  */
1686
1687 void
1688 encode_designation_at_bol (coding, table, src, src_end, dstp)
1689      struct coding_system *coding;
1690      Lisp_Object table;
1691      unsigned char *src, *src_end, **dstp;
1692 {
1693   int charset, c, found = 0, reg;
1694   /* Table of charsets to be designated to each graphic register.  */
1695   int r[4];
1696   unsigned char *dst = *dstp;
1697
1698   for (reg = 0; reg < 4; reg++)
1699     r[reg] = -1;
1700
1701   while (src < src_end && *src != '\n' && found < 4)
1702     {
1703       int bytes = BYTES_BY_CHAR_HEAD (*src);
1704
1705       if (NILP (table))
1706         charset = CHARSET_AT (src);
1707       else
1708         {
1709           int c_alt;
1710           unsigned char c1, c2;
1711
1712           SPLIT_STRING(src, bytes, charset, c1, c2);
1713           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1714             charset = CHAR_CHARSET (c_alt);
1715         }
1716
1717       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1718       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1719         {
1720           found++;
1721           r[reg] = charset;
1722         }
1723
1724       src += bytes;
1725     }
1726
1727   if (found)
1728     {
1729       for (reg = 0; reg < 4; reg++)
1730         if (r[reg] >= 0
1731             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1732           ENCODE_DESIGNATION (r[reg], reg, coding);
1733       *dstp = dst;
1734     }
1735 }
1736
1737 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1738
1739 int
1740 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1741      struct coding_system *coding;
1742      unsigned char *source, *destination;
1743      int src_bytes, dst_bytes;
1744 {
1745   unsigned char *src = source;
1746   unsigned char *src_end = source + src_bytes;
1747   unsigned char *dst = destination;
1748   unsigned char *dst_end = destination + dst_bytes;
1749   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1750      from DST_END to assure overflow checking is necessary only at the
1751      head of loop.  */
1752   unsigned char *adjusted_dst_end = dst_end - 19;
1753   Lisp_Object unification_table
1754       = coding->character_unification_table_for_encode;
1755   int result = CODING_FINISH_NORMAL;
1756
1757   if (!NILP (Venable_character_unification) && NILP (unification_table))
1758     unification_table = Vstandard_character_unification_table_for_encode;
1759
1760   coding->consumed_char = 0;
1761   coding->fake_multibyte = 0;
1762   while (src < src_end && (dst_bytes
1763                            ? (dst < adjusted_dst_end)
1764                            : (dst < src - 19)))
1765     {
1766       /* SRC_BASE remembers the start position in source in each loop.
1767          The loop will be exited when there's not enough source text
1768          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1769          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1770          reset to SRC_BASE before exiting.  */
1771       unsigned char *src_base = src;
1772       int charset, c1, c2, c3, c4;
1773
1774       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1775           && CODING_SPEC_ISO_BOL (coding))
1776         {
1777           /* We have to produce designation sequences if any now.  */
1778           encode_designation_at_bol (coding, unification_table,
1779                                      src, src_end, &dst);
1780           CODING_SPEC_ISO_BOL (coding) = 0;
1781         }
1782
1783       c1 = *src++;
1784       /* If we are seeing a component of a composite character, we are
1785          seeing a leading-code encoded irregularly for composition, or
1786          a composition rule if composing with rule.  We must set C1 to
1787          a normal leading-code or an ASCII code.  If we are not seeing
1788          a composite character, we must reset composition,
1789          designation, and invocation states.  */
1790       if (COMPOSING_P (coding->composing))
1791         {
1792           if (c1 < 0xA0)
1793             {
1794               /* We are not in a composite character any longer.  */
1795               coding->composing = COMPOSING_NO;
1796               ENCODE_RESET_PLANE_AND_REGISTER;
1797               ENCODE_COMPOSITION_END;
1798             }
1799           else
1800             {
1801               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1802                 {
1803                   *dst++ = c1 & 0x7F;
1804                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1805                   continue;
1806                 }
1807               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1808                 coding->composing = COMPOSING_WITH_RULE_RULE;
1809               if (c1 == 0xA0)
1810                 {
1811                   /* This is an ASCII component.  */
1812                   ONE_MORE_BYTE (c1);
1813                   c1 &= 0x7F;
1814                 }
1815               else
1816                 /* This is a leading-code of non ASCII component.  */
1817                 c1 -= 0x20;
1818             }
1819         }
1820
1821       /* Now encode one character.  C1 is a control character, an
1822          ASCII character, or a leading-code of multi-byte character.  */
1823       switch (emacs_code_class[c1])
1824         {
1825         case EMACS_ascii_code:
1826           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1827           break;
1828
1829         case EMACS_control_code:
1830           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1831             ENCODE_RESET_PLANE_AND_REGISTER;
1832           *dst++ = c1;
1833           coding->consumed_char++;
1834           break;
1835
1836         case EMACS_carriage_return_code:
1837           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1838             {
1839               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1840                 ENCODE_RESET_PLANE_AND_REGISTER;
1841               *dst++ = c1;
1842               coding->consumed_char++;
1843               break;
1844             }
1845           /* fall down to treat '\r' as '\n' ...  */
1846
1847         case EMACS_linefeed_code:
1848           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1849             ENCODE_RESET_PLANE_AND_REGISTER;
1850           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1851             bcopy (coding->spec.iso2022.initial_designation,
1852                    coding->spec.iso2022.current_designation,
1853                    sizeof coding->spec.iso2022.initial_designation);
1854           if (coding->eol_type == CODING_EOL_LF
1855               || coding->eol_type == CODING_EOL_UNDECIDED)
1856             *dst++ = ISO_CODE_LF;
1857           else if (coding->eol_type == CODING_EOL_CRLF)
1858             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1859           else
1860             *dst++ = ISO_CODE_CR;
1861           CODING_SPEC_ISO_BOL (coding) = 1;
1862           coding->consumed_char++;
1863           break;
1864
1865         case EMACS_leading_code_2:
1866           ONE_MORE_BYTE (c2);
1867           if (c2 < 0xA0)
1868             {
1869               /* invalid sequence */
1870               *dst++ = c1;
1871               *dst++ = c2;
1872               coding->consumed_char += 2;
1873             }
1874           else
1875             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1876           break;
1877
1878         case EMACS_leading_code_3:
1879           TWO_MORE_BYTES (c2, c3);
1880           if (c2 < 0xA0 || c3 < 0xA0)
1881             {
1882               /* invalid sequence */
1883               *dst++ = c1;
1884               *dst++ = c2;
1885               *dst++ = c3;
1886               coding->consumed_char += 3;
1887             }
1888           else if (c1 < LEADING_CODE_PRIVATE_11)
1889             ENCODE_ISO_CHARACTER (c1, c2, c3);
1890           else
1891             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1892           break;
1893
1894         case EMACS_leading_code_4:
1895           THREE_MORE_BYTES (c2, c3, c4);
1896           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1897             {
1898               /* invalid sequence */
1899               *dst++ = c1;
1900               *dst++ = c2;
1901               *dst++ = c3;
1902               *dst++ = c4;
1903               coding->consumed_char += 4;
1904             }
1905           else
1906             ENCODE_ISO_CHARACTER (c2, c3, c4);
1907           break;
1908
1909         case EMACS_leading_code_composition:
1910           ONE_MORE_BYTE (c2);
1911           if (c2 < 0xA0)
1912             {
1913               /* invalid sequence */
1914               *dst++ = c1;
1915               *dst++ = c2;
1916               coding->consumed_char += 2;
1917             }
1918           else if (c2 == 0xFF)
1919             {
1920               ENCODE_RESET_PLANE_AND_REGISTER;
1921               coding->composing = COMPOSING_WITH_RULE_HEAD;
1922               ENCODE_COMPOSITION_WITH_RULE_START;
1923               coding->consumed_char++;
1924             }
1925           else
1926             {
1927               ENCODE_RESET_PLANE_AND_REGISTER;
1928               /* Rewind one byte because it is a character code of
1929                  composition elements.  */
1930               src--;
1931               coding->composing = COMPOSING_NO_RULE_HEAD;
1932               ENCODE_COMPOSITION_NO_RULE_START;
1933               coding->consumed_char++;
1934             }
1935           break;
1936
1937         case EMACS_invalid_code:
1938           *dst++ = c1;
1939           coding->consumed_char++;
1940           break;
1941         }
1942       continue;
1943     label_end_of_loop:
1944       result = CODING_FINISH_INSUFFICIENT_SRC;
1945       src = src_base;
1946       break;
1947     }
1948
1949   if (src < src_end && result == CODING_FINISH_NORMAL)
1950     result = CODING_FINISH_INSUFFICIENT_DST;
1951
1952   /* If this is the last block of the text to be encoded, we must
1953      reset graphic planes and registers to the initial state, and
1954      flush out the carryover if any.  */
1955   if (coding->mode & CODING_MODE_LAST_BLOCK)
1956     ENCODE_RESET_PLANE_AND_REGISTER;
1957
1958   coding->consumed = src - source;
1959   coding->produced = coding->produced_char = dst - destination;
1960   return result;
1961 }
1962
1963 \f
1964 /*** 4. SJIS and BIG5 handlers ***/
1965
1966 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1967    quite widely.  So, for the moment, Emacs supports them in the bare
1968    C code.  But, in the future, they may be supported only by CCL.  */
1969
1970 /* SJIS is a coding system encoding three character sets: ASCII, right
1971    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1972    as is.  A character of charset katakana-jisx0201 is encoded by
1973    "position-code + 0x80".  A character of charset japanese-jisx0208
1974    is encoded in 2-byte but two position-codes are divided and shifted
1975    so that it fit in the range below.
1976
1977    --- CODE RANGE of SJIS ---
1978    (character set)      (range)
1979    ASCII                0x00 .. 0x7F
1980    KATAKANA-JISX0201    0xA0 .. 0xDF
1981    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1982             (2nd byte)  0x40 .. 0xFF
1983    -------------------------------
1984
1985 */
1986
1987 /* BIG5 is a coding system encoding two character sets: ASCII and
1988    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1989    character set and is encoded in two-byte.
1990
1991    --- CODE RANGE of BIG5 ---
1992    (character set)      (range)
1993    ASCII                0x00 .. 0x7F
1994    Big5 (1st byte)      0xA1 .. 0xFE
1995         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1996    --------------------------
1997
1998    Since the number of characters in Big5 is larger than maximum
1999    characters in Emacs' charset (96x96), it can't be handled as one
2000    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2001    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2002    contains frequently used characters and the latter contains less
2003    frequently used characters.  */
2004
2005 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2006    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2007    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2008    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2009
2010 /* Number of Big5 characters which have the same code in 1st byte.  */
2011 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2012
2013 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2014   do {                                                                  \
2015     unsigned int temp                                                   \
2016       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2017     if (b1 < 0xC9)                                                      \
2018       charset = charset_big5_1;                                         \
2019     else                                                                \
2020       {                                                                 \
2021         charset = charset_big5_2;                                       \
2022         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2023       }                                                                 \
2024     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2025     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2026   } while (0)
2027
2028 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2029   do {                                                                  \
2030     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2031     if (charset == charset_big5_2)                                      \
2032       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2033     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2034     b2 = temp % BIG5_SAME_ROW;                                          \
2035     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2036   } while (0)
2037
2038 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2039   do {                                                                  \
2040     int c_alt, charset_alt = (charset);                                 \
2041     if (!NILP (unification_table)                                       \
2042         && ((c_alt = unify_char (unification_table,                     \
2043                                  -1, (charset), c1, c2)) >= 0))         \
2044           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2045     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2046       DECODE_CHARACTER_ASCII (c1);                                      \
2047     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2048       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2049     else                                                                \
2050       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2051   } while (0)
2052
2053 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2054   do {                                                                    \
2055     int c_alt, charset_alt;                                               \
2056     if (!NILP (unification_table)                                         \
2057         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2058             >= 0))                                                        \
2059       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2060     else                                                                  \
2061       charset_alt = charset;                                              \
2062     if (charset_alt == charset_ascii)                                     \
2063       *dst++ = c1;                                                        \
2064     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2065       {                                                                   \
2066         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2067           *dst++ = c1;                                                    \
2068         else                                                              \
2069           {                                                               \
2070             *dst++ = charset_alt, *dst++ = c1;                            \
2071             coding->fake_multibyte = 1;                                   \
2072           }                                                               \
2073       }                                                                   \
2074     else                                                                  \
2075       {                                                                   \
2076         c1 &= 0x7F, c2 &= 0x7F;                                           \
2077         if (sjis_p && charset_alt == charset_jisx0208)                    \
2078           {                                                               \
2079             unsigned char s1, s2;                                         \
2080                                                                           \
2081             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2082             *dst++ = s1, *dst++ = s2;                                     \
2083             coding->fake_multibyte = 1;                                   \
2084           }                                                               \
2085         else if (!sjis_p                                                  \
2086                  && (charset_alt == charset_big5_1                        \
2087                      || charset_alt == charset_big5_2))                   \
2088           {                                                               \
2089             unsigned char b1, b2;                                         \
2090                                                                           \
2091             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2092             *dst++ = b1, *dst++ = b2;                                     \
2093           }                                                               \
2094         else                                                              \
2095           {                                                               \
2096             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
2097             coding->fake_multibyte = 1;                                   \
2098           }                                                               \
2099       }                                                                   \
2100     coding->consumed_char++;                                              \
2101   } while (0);
2102
2103 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2104    Check if a text is encoded in SJIS.  If it is, return
2105    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2106
2107 int
2108 detect_coding_sjis (src, src_end)
2109      unsigned char *src, *src_end;
2110 {
2111   unsigned char c;
2112
2113   while (src < src_end)
2114     {
2115       c = *src++;
2116       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2117         {
2118           if (src < src_end && *src++ < 0x40)
2119             return 0;
2120         }
2121     }
2122   return CODING_CATEGORY_MASK_SJIS;
2123 }
2124
2125 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2126    Check if a text is encoded in BIG5.  If it is, return
2127    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2128
2129 int
2130 detect_coding_big5 (src, src_end)
2131      unsigned char *src, *src_end;
2132 {
2133   unsigned char c;
2134
2135   while (src < src_end)
2136     {
2137       c = *src++;
2138       if (c >= 0xA1)
2139         {
2140           if (src >= src_end)
2141             break;
2142           c = *src++;
2143           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2144             return 0;
2145         }
2146     }
2147   return CODING_CATEGORY_MASK_BIG5;
2148 }
2149
2150 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2151    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2152
2153 int
2154 decode_coding_sjis_big5 (coding, source, destination,
2155                          src_bytes, dst_bytes, sjis_p)
2156      struct coding_system *coding;
2157      unsigned char *source, *destination;
2158      int src_bytes, dst_bytes;
2159      int sjis_p;
2160 {
2161   unsigned char *src = source;
2162   unsigned char *src_end = source + src_bytes;
2163   unsigned char *dst = destination;
2164   unsigned char *dst_end = destination + dst_bytes;
2165   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2166      from DST_END to assure overflow checking is necessary only at the
2167      head of loop.  */
2168   unsigned char *adjusted_dst_end = dst_end - 3;
2169   Lisp_Object unification_table
2170       = coding->character_unification_table_for_decode;
2171   int result = CODING_FINISH_NORMAL;
2172
2173   if (!NILP (Venable_character_unification) && NILP (unification_table))
2174     unification_table = Vstandard_character_unification_table_for_decode;
2175
2176   coding->produced_char = 0;
2177   coding->fake_multibyte = 0;
2178   while (src < src_end && (dst_bytes
2179                            ? (dst < adjusted_dst_end)
2180                            : (dst < src - 3)))
2181     {
2182       /* SRC_BASE remembers the start position in source in each loop.
2183          The loop will be exited when there's not enough source text
2184          to analyze two-byte character (within macro ONE_MORE_BYTE).
2185          In that case, SRC is reset to SRC_BASE before exiting.  */
2186       unsigned char *src_base = src;
2187       unsigned char c1 = *src++, c2, c3, c4;
2188
2189       if (c1 < 0x20)
2190         {
2191           if (c1 == '\r')
2192             {
2193               if (coding->eol_type == CODING_EOL_CRLF)
2194                 {
2195                   ONE_MORE_BYTE (c2);
2196                   if (c2 == '\n')
2197                     *dst++ = c2;
2198                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2199                     {
2200                       result = CODING_FINISH_INCONSISTENT_EOL;
2201                       goto label_end_of_loop_2;
2202                     }
2203                   else
2204                     /* To process C2 again, SRC is subtracted by 1.  */
2205                     *dst++ = c1, src--;
2206                 }
2207               else if (coding->eol_type == CODING_EOL_CR)
2208                 *dst++ = '\n';
2209               else
2210                 *dst++ = c1;
2211             }
2212           else if (c1 == '\n'
2213                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2214                    && (coding->eol_type == CODING_EOL_CR
2215                        || coding->eol_type == CODING_EOL_CRLF))
2216             {
2217               result = CODING_FINISH_INCONSISTENT_EOL;
2218               goto label_end_of_loop_2;
2219             }
2220           else
2221             *dst++ = c1;
2222           coding->produced_char++;
2223         }
2224       else if (c1 < 0x80)
2225         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2226       else if (c1 < 0xA0)
2227         {
2228           /* SJIS -> JISX0208 */
2229           if (sjis_p)
2230             {
2231               ONE_MORE_BYTE (c2);
2232               if (c2 >= 0x40)
2233                 {
2234                   DECODE_SJIS (c1, c2, c3, c4);
2235                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2236                 }
2237               else
2238                 goto label_invalid_code_2;
2239             }
2240           else
2241             goto label_invalid_code_1;
2242         }
2243       else if (c1 < 0xE0)
2244         {
2245           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2246           if (sjis_p)
2247             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2248                                         /* dummy */ c2);
2249           else
2250             {
2251               int charset;
2252
2253               ONE_MORE_BYTE (c2);
2254               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2255                 {
2256                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2257                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2258                 }
2259               else
2260                 goto label_invalid_code_2;
2261             }
2262         }
2263       else                      /* C1 >= 0xE0 */
2264         {
2265           /* SJIS -> JISX0208, BIG5 -> Big5 */
2266           if (sjis_p)
2267             {
2268               ONE_MORE_BYTE (c2);
2269               if (c2 >= 0x40)
2270                 {
2271                   DECODE_SJIS (c1, c2, c3, c4);
2272                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2273                 }
2274               else
2275                 goto label_invalid_code_2;
2276             }
2277           else
2278             {
2279               int charset;
2280
2281               ONE_MORE_BYTE (c2);
2282               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2283                 {
2284                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2285                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2286                 }
2287               else
2288                 goto label_invalid_code_2;
2289             }
2290         }
2291       continue;
2292
2293     label_invalid_code_1:
2294       *dst++ = c1;
2295       coding->produced_char++;
2296       coding->fake_multibyte = 1;
2297       continue;
2298
2299     label_invalid_code_2:
2300       *dst++ = c1; *dst++= c2;
2301       coding->produced_char += 2;
2302       coding->fake_multibyte = 1;
2303       continue;
2304
2305     label_end_of_loop:
2306       result = CODING_FINISH_INSUFFICIENT_SRC;
2307     label_end_of_loop_2:
2308       src = src_base;
2309       break;
2310     }
2311
2312   if (src < src_end)
2313     {
2314       if (result == CODING_FINISH_NORMAL)
2315         result = CODING_FINISH_INSUFFICIENT_DST;
2316       else if (result != CODING_FINISH_INCONSISTENT_EOL
2317                && coding->mode & CODING_MODE_LAST_BLOCK)
2318         {
2319           src_bytes = src_end - src;
2320           if (dst_bytes && (dst_end - dst < src_bytes))
2321             src_bytes = dst_end - dst;
2322           bcopy (dst, src, src_bytes);
2323           src += src_bytes;
2324           dst += src_bytes;
2325           coding->fake_multibyte = 1;
2326         }
2327     }
2328
2329   coding->consumed = coding->consumed_char = src - source;
2330   coding->produced = dst - destination;
2331   return result;
2332 }
2333
2334 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2335    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2336    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2337    sure that all these charsets are registered as official charset
2338    (i.e. do not have extended leading-codes).  Characters of other
2339    charsets are produced without any encoding.  If SJIS_P is 1, encode
2340    SJIS text, else encode BIG5 text.  */
2341
2342 int
2343 encode_coding_sjis_big5 (coding, source, destination,
2344                          src_bytes, dst_bytes, sjis_p)
2345      struct coding_system *coding;
2346      unsigned char *source, *destination;
2347      int src_bytes, dst_bytes;
2348      int sjis_p;
2349 {
2350   unsigned char *src = source;
2351   unsigned char *src_end = source + src_bytes;
2352   unsigned char *dst = destination;
2353   unsigned char *dst_end = destination + dst_bytes;
2354   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2355      from DST_END to assure overflow checking is necessary only at the
2356      head of loop.  */
2357   unsigned char *adjusted_dst_end = dst_end - 1;
2358   Lisp_Object unification_table
2359       = coding->character_unification_table_for_encode;
2360   int result = CODING_FINISH_NORMAL;
2361
2362   if (!NILP (Venable_character_unification) && NILP (unification_table))
2363     unification_table = Vstandard_character_unification_table_for_encode;
2364
2365   coding->consumed_char = 0;
2366   coding->fake_multibyte = 0;
2367   while (src < src_end && (dst_bytes
2368                            ? (dst < adjusted_dst_end)
2369                            : (dst < src - 1)))
2370     {
2371       /* SRC_BASE remembers the start position in source in each loop.
2372          The loop will be exited when there's not enough source text
2373          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2374          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2375          before exiting.  */
2376       unsigned char *src_base = src;
2377       unsigned char c1 = *src++, c2, c3, c4;
2378
2379       if (coding->composing)
2380         {
2381           if (c1 == 0xA0)
2382             {
2383               ONE_MORE_BYTE (c1);
2384               c1 &= 0x7F;
2385             }
2386           else if (c1 >= 0xA0)
2387             c1 -= 0x20;
2388           else
2389             coding->composing = 0;
2390         }
2391
2392       switch (emacs_code_class[c1])
2393         {
2394         case EMACS_ascii_code:
2395           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2396           break;
2397
2398         case EMACS_control_code:
2399           *dst++ = c1;
2400           coding->consumed_char++;
2401           break;
2402
2403         case EMACS_carriage_return_code:
2404           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2405             {
2406               *dst++ = c1;
2407               coding->consumed_char++;
2408               break;
2409             }
2410           /* fall down to treat '\r' as '\n' ...  */
2411
2412         case EMACS_linefeed_code:
2413           if (coding->eol_type == CODING_EOL_LF
2414               || coding->eol_type == CODING_EOL_UNDECIDED)
2415             *dst++ = '\n';
2416           else if (coding->eol_type == CODING_EOL_CRLF)
2417             *dst++ = '\r', *dst++ = '\n';
2418           else
2419             *dst++ = '\r';
2420           coding->consumed_char++;
2421           break;
2422
2423         case EMACS_leading_code_2:
2424           ONE_MORE_BYTE (c2);
2425           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2426           break;
2427
2428         case EMACS_leading_code_3:
2429           TWO_MORE_BYTES (c2, c3);
2430           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2431           break;
2432
2433         case EMACS_leading_code_4:
2434           THREE_MORE_BYTES (c2, c3, c4);
2435           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2436           break;
2437
2438         case EMACS_leading_code_composition:
2439           coding->composing = 1;
2440           break;
2441
2442         default:                /* i.e. case EMACS_invalid_code: */
2443           *dst++ = c1;
2444           coding->consumed_char++;
2445         }
2446       continue;
2447
2448     label_end_of_loop:
2449       result = CODING_FINISH_INSUFFICIENT_SRC;
2450       src = src_base;
2451       break;
2452     }
2453
2454   if (result == CODING_FINISH_NORMAL
2455       && src < src_end)
2456     result = CODING_FINISH_INSUFFICIENT_DST;
2457   coding->consumed = src - source;
2458   coding->produced = coding->produced_char = dst - destination;
2459   return result;
2460 }
2461
2462 \f
2463 /*** 5. End-of-line handlers ***/
2464
2465 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2466    This function is called only when `coding->eol_type' is
2467    CODING_EOL_CRLF or CODING_EOL_CR.  */
2468
2469 int
2470 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2471      struct coding_system *coding;
2472      unsigned char *source, *destination;
2473      int src_bytes, dst_bytes;
2474 {
2475   unsigned char *src = source;
2476   unsigned char *src_end = source + src_bytes;
2477   unsigned char *dst = destination;
2478   unsigned char *dst_end = destination + dst_bytes;
2479   unsigned char c;
2480   int result = CODING_FINISH_NORMAL;
2481
2482   coding->fake_multibyte = 0;
2483
2484   if (src_bytes <= 0)
2485     return result;
2486
2487   switch (coding->eol_type)
2488     {
2489     case CODING_EOL_CRLF:
2490       {
2491         /* Since the maximum bytes produced by each loop is 2, we
2492            subtract 1 from DST_END to assure overflow checking is
2493            necessary only at the head of loop.  */
2494         unsigned char *adjusted_dst_end = dst_end - 1;
2495
2496         while (src < src_end && (dst_bytes
2497                                  ? (dst < adjusted_dst_end)
2498                                  : (dst < src - 1)))
2499           {
2500             unsigned char *src_base = src;
2501
2502             c = *src++;
2503             if (c == '\r')
2504               {
2505                 ONE_MORE_BYTE (c);
2506                 if (c != '\n')
2507                   {
2508                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2509                       {
2510                         result = CODING_FINISH_INCONSISTENT_EOL;
2511                         goto label_end_of_loop_2;
2512                       }
2513                     *dst++ = '\r';
2514                     if (BASE_LEADING_CODE_P (c))
2515                       coding->fake_multibyte = 1;
2516                   }
2517                 *dst++ = c;
2518               }
2519             else if (c == '\n'
2520                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2521               {
2522                 result = CODING_FINISH_INCONSISTENT_EOL;
2523                 goto label_end_of_loop_2;
2524               }
2525             else
2526               {
2527                 *dst++ = c;
2528                 if (BASE_LEADING_CODE_P (c))
2529                   coding->fake_multibyte = 1;
2530               }
2531             continue;
2532
2533           label_end_of_loop:
2534             result = CODING_FINISH_INSUFFICIENT_SRC;
2535           label_end_of_loop_2:
2536             src = src_base;
2537             break;
2538           }
2539         if (result == CODING_FINISH_NORMAL
2540             && src < src_end)
2541           result = CODING_FINISH_INSUFFICIENT_DST;
2542       }
2543       break;
2544
2545     case CODING_EOL_CR:
2546       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2547         {
2548           while (src < src_end)
2549             {
2550               if ((c = *src++) == '\n')
2551                 break;
2552               if (BASE_LEADING_CODE_P (c))
2553                 coding->fake_multibyte = 1;
2554             }
2555           if (*--src == '\n')
2556             {
2557               src_bytes = src - source;
2558               result = CODING_FINISH_INCONSISTENT_EOL;
2559             }
2560         }
2561       if (dst_bytes && src_bytes > dst_bytes)
2562         {
2563           result = CODING_FINISH_INSUFFICIENT_DST;
2564           src_bytes = dst_bytes;
2565         }
2566       if (dst_bytes)
2567         bcopy (source, destination, src_bytes);
2568       else
2569         safe_bcopy (source, destination, src_bytes);
2570       src = source + src_bytes;
2571       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2572       break;
2573
2574     default:                    /* i.e. case: CODING_EOL_LF */
2575       if (dst_bytes && src_bytes > dst_bytes)
2576         {
2577           result = CODING_FINISH_INSUFFICIENT_DST;
2578           src_bytes = dst_bytes;
2579         }
2580       if (dst_bytes)
2581         bcopy (source, destination, src_bytes);
2582       else
2583         safe_bcopy (source, destination, src_bytes);
2584       src += src_bytes;
2585       dst += dst_bytes;
2586       coding->fake_multibyte = 1;
2587       break;
2588     }
2589
2590   coding->consumed = coding->consumed_char = src - source;
2591   coding->produced = coding->produced_char = dst - destination;
2592   return result;
2593 }
2594
2595 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2596    format of end-of-line according to `coding->eol_type'.  If
2597    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2598    '\r' in source text also means end-of-line.  */
2599
2600 int
2601 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2602      struct coding_system *coding;
2603      unsigned char *source, *destination;
2604      int src_bytes, dst_bytes;
2605 {
2606   unsigned char *src = source;
2607   unsigned char *dst = destination;
2608   int result = CODING_FINISH_NORMAL;
2609
2610   coding->fake_multibyte = 0;
2611
2612   if (coding->eol_type == CODING_EOL_CRLF)
2613     {
2614       unsigned char c;
2615       unsigned char *src_end = source + src_bytes;
2616       unsigned char *dst_end = destination + dst_bytes;
2617       /* Since the maximum bytes produced by each loop is 2, we
2618          subtract 1 from DST_END to assure overflow checking is
2619          necessary only at the head of loop.  */
2620       unsigned char *adjusted_dst_end = dst_end - 1;
2621
2622       while (src < src_end && (dst_bytes
2623                                ? (dst < adjusted_dst_end)
2624                                : (dst < src - 1)))
2625         {
2626           c = *src++;
2627           if (c == '\n'
2628               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2629             *dst++ = '\r', *dst++ = '\n';
2630           else
2631             {
2632               *dst++ = c;
2633               if (BASE_LEADING_CODE_P (c))
2634                 coding->fake_multibyte = 1;
2635             }
2636         }
2637       if (src < src_end)
2638         result = CODING_FINISH_INSUFFICIENT_DST;
2639     }
2640   else
2641     {
2642       unsigned char c;
2643
2644       if (dst_bytes && src_bytes > dst_bytes)
2645         {
2646           src_bytes = dst_bytes;
2647           result = CODING_FINISH_INSUFFICIENT_DST;
2648         }
2649       if (dst_bytes)
2650         bcopy (source, destination, src_bytes);
2651       else
2652         {
2653           safe_bcopy (source, destination, src_bytes);
2654           dst_bytes = src_bytes;
2655         }
2656       if (coding->eol_type == CODING_EOL_CRLF)
2657         {
2658           while (src_bytes--)
2659             {
2660               if ((c = *dst++) == '\n')
2661                 dst[-1] = '\r';
2662               else if (BASE_LEADING_CODE_P (c))
2663                   coding->fake_multibyte = 1;
2664             }
2665         }
2666       else
2667         {
2668           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2669             {
2670               while (src_bytes--)
2671                 if (*dst++ == '\r') dst[-1] = '\n';
2672             }
2673           coding->fake_multibyte = 1;
2674         }
2675       src = source + dst_bytes;
2676       dst = destination + dst_bytes;
2677     }
2678
2679   coding->consumed = coding->consumed_char = src - source;
2680   coding->produced = coding->produced_char = dst - destination;
2681   return result;
2682 }
2683
2684 \f
2685 /*** 6. C library functions ***/
2686
2687 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2688    has a property `coding-system'.  The value of this property is a
2689    vector of length 5 (called as coding-vector).  Among elements of
2690    this vector, the first (element[0]) and the fifth (element[4])
2691    carry important information for decoding/encoding.  Before
2692    decoding/encoding, this information should be set in fields of a
2693    structure of type `coding_system'.
2694
2695    A value of property `coding-system' can be a symbol of another
2696    subsidiary coding-system.  In that case, Emacs gets coding-vector
2697    from that symbol.
2698
2699    `element[0]' contains information to be set in `coding->type'.  The
2700    value and its meaning is as follows:
2701
2702    0 -- coding_type_emacs_mule
2703    1 -- coding_type_sjis
2704    2 -- coding_type_iso2022
2705    3 -- coding_type_big5
2706    4 -- coding_type_ccl encoder/decoder written in CCL
2707    nil -- coding_type_no_conversion
2708    t -- coding_type_undecided (automatic conversion on decoding,
2709                                no-conversion on encoding)
2710
2711    `element[4]' contains information to be set in `coding->flags' and
2712    `coding->spec'.  The meaning varies by `coding->type'.
2713
2714    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2715    of length 32 (of which the first 13 sub-elements are used now).
2716    Meanings of these sub-elements are:
2717
2718    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2719         If the value is an integer of valid charset, the charset is
2720         assumed to be designated to graphic register N initially.
2721
2722         If the value is minus, it is a minus value of charset which
2723         reserves graphic register N, which means that the charset is
2724         not designated initially but should be designated to graphic
2725         register N just before encoding a character in that charset.
2726
2727         If the value is nil, graphic register N is never used on
2728         encoding.
2729
2730    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2731         Each value takes t or nil.  See the section ISO2022 of
2732         `coding.h' for more information.
2733
2734    If `coding->type' is `coding_type_big5', element[4] is t to denote
2735    BIG5-ETen or nil to denote BIG5-HKU.
2736
2737    If `coding->type' takes the other value, element[4] is ignored.
2738
2739    Emacs Lisp's coding system also carries information about format of
2740    end-of-line in a value of property `eol-type'.  If the value is
2741    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2742    means CODING_EOL_CR.  If it is not integer, it should be a vector
2743    of subsidiary coding systems of which property `eol-type' has one
2744    of above values.
2745
2746 */
2747
2748 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2749    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2750    is setup so that no conversion is necessary and return -1, else
2751    return 0.  */
2752
2753 int
2754 setup_coding_system (coding_system, coding)
2755      Lisp_Object coding_system;
2756      struct coding_system *coding;
2757 {
2758   Lisp_Object coding_spec, coding_type, eol_type, plist;
2759   Lisp_Object val;
2760   int i;
2761
2762   /* Initialize some fields required for all kinds of coding systems.  */
2763   coding->symbol = coding_system;
2764   coding->common_flags = 0;
2765   coding->mode = 0;
2766   coding->heading_ascii = -1;
2767   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2768   coding_spec = Fget (coding_system, Qcoding_system);
2769   if (!VECTORP (coding_spec)
2770       || XVECTOR (coding_spec)->size != 5
2771       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2772     goto label_invalid_coding_system;
2773
2774   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2775   if (VECTORP (eol_type))
2776     {
2777       coding->eol_type = CODING_EOL_UNDECIDED;
2778       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2779     }
2780   else if (XFASTINT (eol_type) == 1)
2781     {
2782       coding->eol_type = CODING_EOL_CRLF;
2783       coding->common_flags
2784         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2785     }
2786   else if (XFASTINT (eol_type) == 2)
2787     {
2788       coding->eol_type = CODING_EOL_CR;
2789       coding->common_flags
2790         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2791     }
2792   else
2793     coding->eol_type = CODING_EOL_LF;
2794
2795   coding_type = XVECTOR (coding_spec)->contents[0];
2796   /* Try short cut.  */
2797   if (SYMBOLP (coding_type))
2798     {
2799       if (EQ (coding_type, Qt))
2800         {
2801           coding->type = coding_type_undecided;
2802           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2803         }
2804       else
2805         coding->type = coding_type_no_conversion;
2806       return 0;
2807     }
2808
2809   /* Initialize remaining fields.  */
2810   coding->composing = 0;
2811   coding->character_unification_table_for_decode = Qnil;
2812   coding->character_unification_table_for_encode = Qnil;
2813
2814   /* Get values of coding system properties:
2815      `post-read-conversion', `pre-write-conversion',
2816      `character-unification-table-for-decode',
2817      `character-unification-table-for-encode'.  */
2818   plist = XVECTOR (coding_spec)->contents[3];
2819   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2820   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2821   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2822   if (SYMBOLP (val))
2823     val = Fget (val, Qcharacter_unification_table_for_decode);
2824   coding->character_unification_table_for_decode
2825     = CHAR_TABLE_P (val) ? val : Qnil;
2826   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2827   if (SYMBOLP (val))
2828     val = Fget (val, Qcharacter_unification_table_for_encode);
2829   coding->character_unification_table_for_encode
2830     = CHAR_TABLE_P (val) ? val : Qnil;
2831   val = Fplist_get (plist, Qcoding_category);
2832   if (!NILP (val))
2833     {
2834       val = Fget (val, Qcoding_category_index);
2835       if (INTEGERP (val))
2836         coding->category_idx = XINT (val);
2837       else
2838         goto label_invalid_coding_system;
2839     }
2840   else
2841     goto label_invalid_coding_system;
2842
2843   val = Fplist_get (plist, Qsafe_charsets);
2844   if (EQ (val, Qt))
2845     {
2846       for (i = 0; i <= MAX_CHARSET; i++)
2847         coding->safe_charsets[i] = 1;
2848     }
2849   else
2850     {
2851       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2852       while (CONSP (val))
2853         {
2854           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2855             coding->safe_charsets[i] = 1;
2856           val = XCONS (val)->cdr;
2857         }
2858     }
2859
2860   switch (XFASTINT (coding_type))
2861     {
2862     case 0:
2863       coding->type = coding_type_emacs_mule;
2864       if (!NILP (coding->post_read_conversion))
2865         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2866       if (!NILP (coding->pre_write_conversion))
2867         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2868       break;
2869
2870     case 1:
2871       coding->type = coding_type_sjis;
2872       coding->common_flags
2873         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2874       break;
2875
2876     case 2:
2877       coding->type = coding_type_iso2022;
2878       coding->common_flags
2879         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2880       {
2881         Lisp_Object val, temp;
2882         Lisp_Object *flags;
2883         int i, charset, reg_bits = 0;
2884
2885         val = XVECTOR (coding_spec)->contents[4];
2886
2887         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2888           goto label_invalid_coding_system;
2889
2890         flags = XVECTOR (val)->contents;
2891         coding->flags
2892           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2893              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2894              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2895              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2896              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2897              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2898              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2899              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2900              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2901              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2902              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2903              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2904              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2905              );
2906
2907         /* Invoke graphic register 0 to plane 0.  */
2908         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2909         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2910         CODING_SPEC_ISO_INVOCATION (coding, 1)
2911           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2912         /* Not single shifting at first.  */
2913         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2914         /* Beginning of buffer should also be regarded as bol. */
2915         CODING_SPEC_ISO_BOL (coding) = 1;
2916
2917         for (charset = 0; charset <= MAX_CHARSET; charset++)
2918           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2919         val = Vcharset_revision_alist;
2920         while (CONSP (val))
2921           {
2922             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2923             if (charset >= 0
2924                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2925                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2926               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2927             val = XCONS (val)->cdr;
2928           }
2929
2930         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2931            FLAGS[REG] can be one of below:
2932                 integer CHARSET: CHARSET occupies register I,
2933                 t: designate nothing to REG initially, but can be used
2934                   by any charsets,
2935                 list of integer, nil, or t: designate the first
2936                   element (if integer) to REG initially, the remaining
2937                   elements (if integer) is designated to REG on request,
2938                   if an element is t, REG can be used by any charsets,
2939                 nil: REG is never used.  */
2940         for (charset = 0; charset <= MAX_CHARSET; charset++)
2941           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2942             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2943         for (i = 0; i < 4; i++)
2944           {
2945             if (INTEGERP (flags[i])
2946                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2947                 || (charset = get_charset_id (flags[i])) >= 0)
2948               {
2949                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2950                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2951               }
2952             else if (EQ (flags[i], Qt))
2953               {
2954                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2955                 reg_bits |= 1 << i;
2956                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2957               }
2958             else if (CONSP (flags[i]))
2959               {
2960                 Lisp_Object tail = flags[i];
2961
2962                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2963                 if (INTEGERP (XCONS (tail)->car)
2964                     && (charset = XINT (XCONS (tail)->car),
2965                         CHARSET_VALID_P (charset))
2966                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2967                   {
2968                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2969                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2970                   }
2971                 else
2972                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2973                 tail = XCONS (tail)->cdr;
2974                 while (CONSP (tail))
2975                   {
2976                     if (INTEGERP (XCONS (tail)->car)
2977                         && (charset = XINT (XCONS (tail)->car),
2978                             CHARSET_VALID_P (charset))
2979                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2980                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2981                         = i;
2982                     else if (EQ (XCONS (tail)->car, Qt))
2983                       reg_bits |= 1 << i;
2984                     tail = XCONS (tail)->cdr;
2985                   }
2986               }
2987             else
2988               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2989
2990             CODING_SPEC_ISO_DESIGNATION (coding, i)
2991               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2992           }
2993
2994         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2995           {
2996             /* REG 1 can be used only by locking shift in 7-bit env.  */
2997             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2998               reg_bits &= ~2;
2999             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3000               /* Without any shifting, only REG 0 and 1 can be used.  */
3001               reg_bits &= 3;
3002           }
3003
3004         if (reg_bits)
3005           for (charset = 0; charset <= MAX_CHARSET; charset++)
3006             {
3007               if (CHARSET_VALID_P (charset))
3008                 {
3009                   /* There exist some default graphic registers to be
3010                      used CHARSET.  */
3011
3012                   /* We had better avoid designating a charset of
3013                      CHARS96 to REG 0 as far as possible.  */
3014                   if (CHARSET_CHARS (charset) == 96)
3015                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3016                       = (reg_bits & 2
3017                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3018                   else
3019                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3020                       = (reg_bits & 1
3021                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3022                 }
3023             }
3024       }
3025       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3026       coding->spec.iso2022.last_invalid_designation_register = -1;
3027       break;
3028
3029     case 3:
3030       coding->type = coding_type_big5;
3031       coding->common_flags
3032         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3033       coding->flags
3034         = (NILP (XVECTOR (coding_spec)->contents[4])
3035            ? CODING_FLAG_BIG5_HKU
3036            : CODING_FLAG_BIG5_ETEN);
3037       break;
3038
3039     case 4:
3040       coding->type = coding_type_ccl;
3041       coding->common_flags
3042         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3043       {
3044         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3045         Lisp_Object decoder, encoder;
3046
3047         if (CONSP  (val)
3048             && SYMBOLP (XCONS (val)->car)
3049             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3050             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3051             && SYMBOLP (XCONS (val)->cdr)
3052             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3053             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3054           {
3055             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3056             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3057           }
3058         else
3059           goto label_invalid_coding_system;
3060       }
3061       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3062       break;
3063
3064     case 5:
3065       coding->type = coding_type_raw_text;
3066       break;
3067
3068     default:
3069       goto label_invalid_coding_system;
3070     }
3071   return 0;
3072
3073  label_invalid_coding_system:
3074   coding->type = coding_type_no_conversion;
3075   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3076   coding->common_flags = 0;
3077   coding->eol_type = CODING_EOL_LF;
3078   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3079   return -1;
3080 }
3081
3082 /* Emacs has a mechanism to automatically detect a coding system if it
3083    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3084    it's impossible to distinguish some coding systems accurately
3085    because they use the same range of codes.  So, at first, coding
3086    systems are categorized into 7, those are:
3087
3088    o coding-category-emacs-mule
3089
3090         The category for a coding system which has the same code range
3091         as Emacs' internal format.  Assigned the coding-system (Lisp
3092         symbol) `emacs-mule' by default.
3093
3094    o coding-category-sjis
3095
3096         The category for a coding system which has the same code range
3097         as SJIS.  Assigned the coding-system (Lisp
3098         symbol) `japanese-shift-jis' by default.
3099
3100    o coding-category-iso-7
3101
3102         The category for a coding system which has the same code range
3103         as ISO2022 of 7-bit environment.  This doesn't use any locking
3104         shift and single shift functions.  This can encode/decode all
3105         charsets.  Assigned the coding-system (Lisp symbol)
3106         `iso-2022-7bit' by default.
3107
3108    o coding-category-iso-7-tight
3109
3110         Same as coding-category-iso-7 except that this can
3111         encode/decode only the specified charsets.
3112
3113    o coding-category-iso-8-1
3114
3115         The category for a coding system which has the same code range
3116         as ISO2022 of 8-bit environment and graphic plane 1 used only
3117         for DIMENSION1 charset.  This doesn't use any locking shift
3118         and single shift functions.  Assigned the coding-system (Lisp
3119         symbol) `iso-latin-1' by default.
3120
3121    o coding-category-iso-8-2
3122
3123         The category for a coding system which has the same code range
3124         as ISO2022 of 8-bit environment and graphic plane 1 used only
3125         for DIMENSION2 charset.  This doesn't use any locking shift
3126         and single shift functions.  Assigned the coding-system (Lisp
3127         symbol) `japanese-iso-8bit' by default.
3128
3129    o coding-category-iso-7-else
3130
3131         The category for a coding system which has the same code range
3132         as ISO2022 of 7-bit environemnt but uses locking shift or
3133         single shift functions.  Assigned the coding-system (Lisp
3134         symbol) `iso-2022-7bit-lock' by default.
3135
3136    o coding-category-iso-8-else
3137
3138         The category for a coding system which has the same code range
3139         as ISO2022 of 8-bit environemnt but uses locking shift or
3140         single shift functions.  Assigned the coding-system (Lisp
3141         symbol) `iso-2022-8bit-ss2' by default.
3142
3143    o coding-category-big5
3144
3145         The category for a coding system which has the same code range
3146         as BIG5.  Assigned the coding-system (Lisp symbol)
3147         `cn-big5' by default.
3148
3149    o coding-category-binary
3150
3151         The category for a coding system not categorized in any of the
3152         above.  Assigned the coding-system (Lisp symbol)
3153         `no-conversion' by default.
3154
3155    Each of them is a Lisp symbol and the value is an actual
3156    `coding-system's (this is also a Lisp symbol) assigned by a user.
3157    What Emacs does actually is to detect a category of coding system.
3158    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3159    decide only one possible category, it selects a category of the
3160    highest priority.  Priorities of categories are also specified by a
3161    user in a Lisp variable `coding-category-list'.
3162
3163 */
3164
3165 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3166    If it detects possible coding systems, return an integer in which
3167    appropriate flag bits are set.  Flag bits are defined by macros
3168    CODING_CATEGORY_MASK_XXX in `coding.h'.
3169
3170    How many ASCII characters are at the head is returned as *SKIP.  */
3171
3172 static int
3173 detect_coding_mask (source, src_bytes, priorities, skip)
3174      unsigned char *source;
3175      int src_bytes, *priorities, *skip;
3176 {
3177   register unsigned char c;
3178   unsigned char *src = source, *src_end = source + src_bytes;
3179   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3180                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3181   int i;
3182
3183   /* At first, skip all ASCII characters and control characters except
3184      for three ISO2022 specific control characters.  */
3185  label_loop_detect_coding:
3186   while (src < src_end)
3187     {
3188       c = *src;
3189       if (c >= 0x80
3190           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3191               && c == ISO_CODE_ESC)
3192           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3193               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3194         break;
3195       src++;
3196     }
3197   *skip = src - source;
3198
3199   if (src >= src_end)
3200     /* We found nothing other than ASCII.  There's nothing to do.  */
3201     return 0;
3202
3203   /* The text seems to be encoded in some multilingual coding system.
3204      Now, try to find in which coding system the text is encoded.  */
3205   if (c < 0x80)
3206     {
3207       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3208       /* C is an ISO2022 specific control code of C0.  */
3209       mask = detect_coding_iso2022 (src, src_end);
3210       if (mask == 0)
3211         {
3212           /* No valid ISO2022 code follows C.  Try again.  */
3213           src++;
3214           mask = (c != ISO_CODE_ESC
3215                   ? CODING_CATEGORY_MASK_ISO_7BIT
3216                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3217           goto label_loop_detect_coding;
3218         }
3219       if (priorities)
3220         goto label_return_highest_only;
3221     }
3222   else
3223     {
3224       int try;
3225
3226       if (c < 0xA0)
3227         {
3228           /* C is the first byte of SJIS character code,
3229              or a leading-code of Emacs' internal format (emacs-mule).  */
3230           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3231
3232           /* Or, if C is a special latin extra code,
3233              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3234              or is an ISO2022 control-sequence-introducer (CSI),
3235              we should also consider the possibility of ISO2022 codings.  */
3236           if ((VECTORP (Vlatin_extra_code_table)
3237                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3238               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3239               || (c == ISO_CODE_CSI
3240                   && (src < src_end
3241                       && (*src == ']'
3242                           || ((*src == '0' || *src == '1' || *src == '2')
3243                               && src + 1 < src_end
3244                               && src[1] == ']')))))
3245             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3246                      | CODING_CATEGORY_MASK_ISO_8BIT);
3247         }
3248       else
3249         /* C is a character of ISO2022 in graphic plane right,
3250            or a SJIS's 1-byte character code (i.e. JISX0201),
3251            or the first byte of BIG5's 2-byte code.  */
3252         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3253                 | CODING_CATEGORY_MASK_ISO_8BIT
3254                 | CODING_CATEGORY_MASK_SJIS
3255                 | CODING_CATEGORY_MASK_BIG5);
3256
3257       mask = 0;
3258       if (priorities)
3259         {
3260           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3261             {
3262               priorities[i] &= try;
3263               if (priorities[i] & CODING_CATEGORY_MASK_ISO)
3264                 mask = detect_coding_iso2022 (src, src_end);
3265               else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
3266                 mask = detect_coding_sjis (src, src_end);
3267               else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
3268                 mask = detect_coding_big5 (src, src_end);
3269               else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
3270                 mask = detect_coding_emacs_mule (src, src_end);
3271               if (mask)
3272                 goto label_return_highest_only;
3273             }
3274           return CODING_CATEGORY_MASK_RAW_TEXT;
3275         }
3276       if (try & CODING_CATEGORY_MASK_ISO)
3277         mask |= detect_coding_iso2022 (src, src_end);
3278       if (try & CODING_CATEGORY_MASK_SJIS)
3279         mask |= detect_coding_sjis (src, src_end);
3280       if (try & CODING_CATEGORY_MASK_BIG5)
3281         mask |= detect_coding_big5 (src, src_end);
3282       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3283         mask |= detect_coding_emacs_mule (src, src_end);
3284     }
3285   return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
3286
3287  label_return_highest_only:
3288   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3289     {
3290       if (mask & priorities[i])
3291         return priorities[i];
3292     }
3293   return CODING_CATEGORY_MASK_RAW_TEXT;
3294 }
3295
3296 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3297    The information of the detected coding system is set in CODING.  */
3298
3299 void
3300 detect_coding (coding, src, src_bytes)
3301      struct coding_system *coding;
3302      unsigned char *src;
3303      int src_bytes;
3304 {
3305   unsigned int idx;
3306   int skip, mask, i;
3307   int priorities[CODING_CATEGORY_IDX_MAX];
3308   Lisp_Object val = Vcoding_category_list;
3309
3310   i = 0;
3311   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3312     {
3313       if (! SYMBOLP (XCONS (val)->car))
3314         break;
3315       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3316       if (idx >= CODING_CATEGORY_IDX_MAX)
3317         break;
3318       priorities[i++] = (1 << idx);
3319       val = XCONS (val)->cdr;
3320     }
3321   /* If coding-category-list is valid and contains all coding
3322      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3323      the following code saves Emacs from craching.  */
3324   while (i < CODING_CATEGORY_IDX_MAX)
3325     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3326
3327   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3328   coding->heading_ascii = skip;
3329
3330   if (!mask) return;
3331
3332   /* We found a single coding system of the highest priority in MASK.  */
3333   idx = 0;
3334   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3335   if (! mask)
3336     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3337
3338   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3339
3340   if (coding->eol_type != CODING_EOL_UNDECIDED)
3341     {
3342       Lisp_Object tmp = Fget (val, Qeol_type);
3343
3344       if (VECTORP (tmp))
3345         val = XVECTOR (tmp)->contents[coding->eol_type];
3346     }
3347   setup_coding_system (val, coding);
3348   /* Set this again because setup_coding_system reset this member.  */
3349   coding->heading_ascii = skip;
3350 }
3351
3352 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3353    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3354    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3355
3356    How many non-eol characters are at the head is returned as *SKIP.  */
3357
3358 #define MAX_EOL_CHECK_COUNT 3
3359
3360 static int
3361 detect_eol_type (source, src_bytes, skip)
3362      unsigned char *source;
3363      int src_bytes, *skip;
3364 {
3365   unsigned char *src = source, *src_end = src + src_bytes;
3366   unsigned char c;
3367   int total = 0;                /* How many end-of-lines are found so far.  */
3368   int eol_type = CODING_EOL_UNDECIDED;
3369   int this_eol_type;
3370
3371   *skip = 0;
3372
3373   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3374     {
3375       c = *src++;
3376       if (c == '\n' || c == '\r')
3377         {
3378           if (*skip == 0)
3379             *skip = src - 1 - source;
3380           total++;
3381           if (c == '\n')
3382             this_eol_type = CODING_EOL_LF;
3383           else if (src >= src_end || *src != '\n')
3384             this_eol_type = CODING_EOL_CR;
3385           else
3386             this_eol_type = CODING_EOL_CRLF, src++;
3387
3388           if (eol_type == CODING_EOL_UNDECIDED)
3389             /* This is the first end-of-line.  */
3390             eol_type = this_eol_type;
3391           else if (eol_type != this_eol_type)
3392             {
3393               /* The found type is different from what found before.  */
3394               eol_type = CODING_EOL_INCONSISTENT;
3395               break;
3396             }
3397         }
3398     }
3399
3400   if (*skip == 0)
3401     *skip = src_end - source;
3402   return eol_type;
3403 }
3404
3405 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3406    is encoded.  If it detects an appropriate format of end-of-line, it
3407    sets the information in *CODING.  */
3408
3409 void
3410 detect_eol (coding, src, src_bytes)
3411      struct coding_system *coding;
3412      unsigned char *src;
3413      int src_bytes;
3414 {
3415   Lisp_Object val;
3416   int skip;
3417   int eol_type = detect_eol_type (src, src_bytes, &skip);
3418
3419   if (coding->heading_ascii > skip)
3420     coding->heading_ascii = skip;
3421   else
3422     skip = coding->heading_ascii;
3423
3424   if (eol_type == CODING_EOL_UNDECIDED)
3425     return;
3426   if (eol_type == CODING_EOL_INCONSISTENT)
3427     {
3428 #if 0
3429       /* This code is suppressed until we find a better way to
3430          distinguish raw text file and binary file.  */
3431
3432       /* If we have already detected that the coding is raw-text, the
3433          coding should actually be no-conversion.  */
3434       if (coding->type == coding_type_raw_text)
3435         {
3436           setup_coding_system (Qno_conversion, coding);
3437           return;
3438         }
3439       /* Else, let's decode only text code anyway.  */
3440 #endif /* 0 */
3441       eol_type = CODING_EOL_LF;
3442     }
3443
3444   val = Fget (coding->symbol, Qeol_type);
3445   if (VECTORP (val) && XVECTOR (val)->size == 3)
3446     {
3447       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3448       coding->heading_ascii = skip;
3449     }
3450 }
3451
3452 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3453
3454 #define DECODING_BUFFER_MAG(coding)                                          \
3455   (coding->type == coding_type_iso2022                                       \
3456    ? 3                                                                       \
3457    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3458       ? 2                                                                    \
3459       : (coding->type == coding_type_raw_text                                \
3460          ? 1                                                                 \
3461          : (coding->type == coding_type_ccl                                  \
3462             ? coding->spec.ccl.decoder.buf_magnification                     \
3463             : 2))))
3464
3465 /* Return maximum size (bytes) of a buffer enough for decoding
3466    SRC_BYTES of text encoded in CODING.  */
3467
3468 int
3469 decoding_buffer_size (coding, src_bytes)
3470      struct coding_system *coding;
3471      int src_bytes;
3472 {
3473   return (src_bytes * DECODING_BUFFER_MAG (coding)
3474           + CONVERSION_BUFFER_EXTRA_ROOM);
3475 }
3476
3477 /* Return maximum size (bytes) of a buffer enough for encoding
3478    SRC_BYTES of text to CODING.  */
3479
3480 int
3481 encoding_buffer_size (coding, src_bytes)
3482      struct coding_system *coding;
3483      int src_bytes;
3484 {
3485   int magnification;
3486
3487   if (coding->type == coding_type_ccl)
3488     magnification = coding->spec.ccl.encoder.buf_magnification;
3489   else
3490     magnification = 3;
3491
3492   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3493 }
3494
3495 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3496 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3497 #endif
3498
3499 char *conversion_buffer;
3500 int conversion_buffer_size;
3501
3502 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3503    or decoding.  Sufficient memory is allocated automatically.  If we
3504    run out of memory, return NULL.  */
3505
3506 char *
3507 get_conversion_buffer (size)
3508      int size;
3509 {
3510   if (size > conversion_buffer_size)
3511     {
3512       char *buf;
3513       int real_size = conversion_buffer_size * 2;
3514
3515       while (real_size < size) real_size *= 2;
3516       buf = (char *) xmalloc (real_size);
3517       xfree (conversion_buffer);
3518       conversion_buffer = buf;
3519       conversion_buffer_size = real_size;
3520     }
3521   return conversion_buffer;
3522 }
3523
3524 int
3525 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3526      struct coding_system *coding;
3527      unsigned char *source, *destination;
3528      int src_bytes, dst_bytes, encodep;
3529 {
3530   struct ccl_program *ccl
3531     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3532   int result;
3533
3534   coding->produced = ccl_driver (ccl, source, destination,
3535                                  src_bytes, dst_bytes, &(coding->consumed));
3536   if (encodep)
3537     {
3538       coding->produced_char = coding->produced;
3539       coding->consumed_char
3540         = multibyte_chars_in_text (source, coding->consumed);
3541     }
3542   else
3543     {
3544       coding->produced_char
3545         = multibyte_chars_in_text (destination, coding->produced);
3546       coding->consumed_char = coding->consumed;
3547     }
3548   switch (ccl->status)
3549     {
3550     case CCL_STAT_SUSPEND_BY_SRC:
3551       result = CODING_FINISH_INSUFFICIENT_SRC;
3552       break;
3553     case CCL_STAT_SUSPEND_BY_DST:
3554       result = CODING_FINISH_INSUFFICIENT_DST;
3555       break;
3556     default:
3557       result = CODING_FINISH_NORMAL;
3558       break;
3559     }
3560   return result;
3561 }
3562
3563 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3564    decoding, it may detect coding system and format of end-of-line if
3565    those are not yet decided.  */
3566
3567 int
3568 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3569      struct coding_system *coding;
3570      unsigned char *source, *destination;
3571      int src_bytes, dst_bytes;
3572 {
3573   int result;
3574
3575   if (src_bytes <= 0)
3576     {
3577       coding->produced = coding->produced_char = 0;
3578       coding->consumed = coding->consumed_char = 0;
3579       coding->fake_multibyte = 0;
3580       return CODING_FINISH_NORMAL;
3581     }
3582
3583   if (coding->type == coding_type_undecided)
3584     detect_coding (coding, source, src_bytes);
3585
3586   if (coding->eol_type == CODING_EOL_UNDECIDED)
3587     detect_eol (coding, source, src_bytes);
3588
3589   switch (coding->type)
3590     {
3591     case coding_type_emacs_mule:
3592     case coding_type_undecided:
3593     case coding_type_raw_text:
3594       if (coding->eol_type == CODING_EOL_LF
3595           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3596         goto label_no_conversion;
3597       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3598       break;
3599
3600     case coding_type_sjis:
3601       result = decode_coding_sjis_big5 (coding, source, destination,
3602                                         src_bytes, dst_bytes, 1);
3603       break;
3604
3605     case coding_type_iso2022:
3606       result = decode_coding_iso2022 (coding, source, destination,
3607                                       src_bytes, dst_bytes);
3608       break;
3609
3610     case coding_type_big5:
3611       result = decode_coding_sjis_big5 (coding, source, destination,
3612                                         src_bytes, dst_bytes, 0);
3613       break;
3614
3615     case coding_type_ccl:
3616       result = ccl_coding_driver (coding, source, destination,
3617                                   src_bytes, dst_bytes, 0);
3618       break;
3619
3620     default:                    /* i.e. case coding_type_no_conversion: */
3621     label_no_conversion:
3622       if (dst_bytes && src_bytes > dst_bytes)
3623         {
3624           coding->produced = dst_bytes;
3625           result = CODING_FINISH_INSUFFICIENT_DST;
3626         }
3627       else
3628         {
3629           coding->produced = src_bytes;
3630           result = CODING_FINISH_NORMAL;
3631         }
3632       if (dst_bytes)
3633         bcopy (source, destination, coding->produced);
3634       else
3635         safe_bcopy (source, destination, coding->produced);
3636       coding->fake_multibyte = 1;
3637       coding->consumed
3638         = coding->consumed_char = coding->produced_char = coding->produced;
3639       break;
3640     }
3641
3642   return result;
3643 }
3644
3645 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3646
3647 int
3648 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3649      struct coding_system *coding;
3650      unsigned char *source, *destination;
3651      int src_bytes, dst_bytes;
3652 {
3653   int result;
3654
3655   if (src_bytes <= 0)
3656     {
3657       coding->produced = coding->produced_char = 0;
3658       coding->consumed = coding->consumed_char = 0;
3659       coding->fake_multibyte = 0;
3660       return CODING_FINISH_NORMAL;
3661     }
3662
3663   switch (coding->type)
3664     {
3665     case coding_type_emacs_mule:
3666     case coding_type_undecided:
3667     case coding_type_raw_text:
3668       if (coding->eol_type == CODING_EOL_LF
3669           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3670         goto label_no_conversion;
3671       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3672       break;
3673
3674     case coding_type_sjis:
3675       result = encode_coding_sjis_big5 (coding, source, destination,
3676                                         src_bytes, dst_bytes, 1);
3677       break;
3678
3679     case coding_type_iso2022:
3680       result = encode_coding_iso2022 (coding, source, destination,
3681                                       src_bytes, dst_bytes);
3682       break;
3683
3684     case coding_type_big5:
3685       result = encode_coding_sjis_big5 (coding, source, destination,
3686                                         src_bytes, dst_bytes, 0);
3687       break;
3688
3689     case coding_type_ccl:
3690       result = ccl_coding_driver (coding, source, destination,
3691                                   src_bytes, dst_bytes, 1);
3692       break;
3693
3694     default:                    /* i.e. case coding_type_no_conversion: */
3695     label_no_conversion:
3696       if (dst_bytes && src_bytes > dst_bytes)
3697         {
3698           coding->produced = dst_bytes;
3699           result = CODING_FINISH_INSUFFICIENT_DST;
3700         }
3701       else
3702         {
3703           coding->produced = src_bytes;
3704           result = CODING_FINISH_NORMAL;
3705         }
3706       if (dst_bytes)
3707         bcopy (source, destination, coding->produced);
3708       else
3709         safe_bcopy (source, destination, coding->produced);
3710       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3711         {
3712           unsigned char *p = destination, *pend = p + coding->produced;
3713           while (p < pend)
3714             if (*p++ == '\015') p[-1] = '\n';
3715         }
3716       coding->fake_multibyte = 1;
3717       coding->consumed
3718         = coding->consumed_char = coding->produced_char = coding->produced;
3719       break;
3720     }
3721
3722   return result;
3723 }
3724
3725 /* Scan text in the region between *BEG and *END (byte positions),
3726    skip characters which we don't have to decode by coding system
3727    CODING at the head and tail, then set *BEG and *END to the region
3728    of the text we actually have to convert.  The caller should move
3729    the gap out of the region in advance.
3730
3731    If STR is not NULL, *BEG and *END are indices into STR.  */
3732
3733 static void
3734 shrink_decoding_region (beg, end, coding, str)
3735      int *beg, *end;
3736      struct coding_system *coding;
3737      unsigned char *str;
3738 {
3739   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3740   int eol_conversion;
3741
3742   if (coding->type == coding_type_ccl
3743       || coding->type == coding_type_undecided
3744       || !NILP (coding->post_read_conversion))
3745     {
3746       /* We can't skip any data.  */
3747       return;
3748     }
3749   else if (coding->type == coding_type_no_conversion)
3750     {
3751       /* We need no conversion, but don't have to skip any data here.
3752          Decoding routine handles them effectively anyway.  */
3753       return;
3754     }
3755
3756   if (coding->heading_ascii >= 0)
3757     /* Detection routine has already found how much we can skip at the
3758        head.  */
3759     *beg += coding->heading_ascii;
3760
3761   if (str)
3762     {
3763       begp_orig = begp = str + *beg;
3764       endp_orig = endp = str + *end;
3765     }
3766   else
3767     {
3768       begp_orig = begp = BYTE_POS_ADDR (*beg);
3769       endp_orig = endp = begp + *end - *beg;
3770     }
3771
3772   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3773
3774   switch (coding->type)
3775     {
3776     case coding_type_emacs_mule:
3777     case coding_type_raw_text:
3778       if (eol_conversion)
3779         {
3780           if (coding->heading_ascii < 0)
3781             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3782           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3783             endp--;
3784           /* Do not consider LF as ascii if preceded by CR, since that
3785              confuses eol decoding. */
3786           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3787             endp++;
3788         }
3789       else
3790         begp = endp;
3791       break;
3792
3793     case coding_type_sjis:
3794     case coding_type_big5:
3795       /* We can skip all ASCII characters at the head.  */
3796       if (coding->heading_ascii < 0)
3797         {
3798           if (eol_conversion)
3799             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3800           else
3801             while (begp < endp && *begp < 0x80) begp++;
3802         }
3803       /* We can skip all ASCII characters at the tail except for the
3804          second byte of SJIS or BIG5 code.  */
3805       if (eol_conversion)
3806         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3807       else
3808         while (begp < endp && endp[-1] < 0x80) endp--;
3809       /* Do not consider LF as ascii if preceded by CR, since that
3810          confuses eol decoding. */
3811       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3812         endp++;
3813       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3814         endp++;
3815       break;
3816
3817     default:            /* i.e. case coding_type_iso2022: */
3818       if (coding->heading_ascii < 0)
3819         {
3820           /* We can skip all ASCII characters at the head except for a
3821              few control codes.  */
3822           while (begp < endp && (c = *begp) < 0x80
3823                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3824                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3825                  && (!eol_conversion || c != ISO_CODE_LF))
3826             begp++;
3827         }
3828       switch (coding->category_idx)
3829         {
3830         case CODING_CATEGORY_IDX_ISO_8_1:
3831         case CODING_CATEGORY_IDX_ISO_8_2:
3832           /* We can skip all ASCII characters at the tail.  */
3833           if (eol_conversion)
3834             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3835           else
3836             while (begp < endp && endp[-1] < 0x80) endp--;
3837           /* Do not consider LF as ascii if preceded by CR, since that
3838              confuses eol decoding. */
3839           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3840             endp++;
3841           break;
3842
3843         case CODING_CATEGORY_IDX_ISO_7:
3844         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3845           /* We can skip all charactes at the tail except for ESC and
3846              the following 2-byte at the tail.  */
3847           if (eol_conversion)
3848             while (begp < endp
3849                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3850               endp--;
3851           else
3852             while (begp < endp
3853                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3854               endp--;
3855           /* Do not consider LF as ascii if preceded by CR, since that
3856              confuses eol decoding. */
3857           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3858             endp++;
3859           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3860             {
3861               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3862                 /* This is an ASCII designation sequence.  We can
3863                     surely skip the tail.  */
3864                 endp += 2;
3865               else
3866                 /* Hmmm, we can't skip the tail.  */
3867                 endp = endp_orig;
3868             }
3869         }
3870     }
3871   *beg += begp - begp_orig;
3872   *end += endp - endp_orig;
3873   return;
3874 }
3875
3876 /* Like shrink_decoding_region but for encoding.  */
3877
3878 static void
3879 shrink_encoding_region (beg, end, coding, str)
3880      int *beg, *end;
3881      struct coding_system *coding;
3882      unsigned char *str;
3883 {
3884   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3885   int eol_conversion;
3886
3887   if (coding->type == coding_type_ccl)
3888     /* We can't skip any data.  */
3889     return;
3890   else if (coding->type == coding_type_no_conversion)
3891     {
3892       /* We need no conversion.  */
3893       *beg = *end;
3894       return;
3895     }
3896
3897   if (str)
3898     {
3899       begp_orig = begp = str + *beg;
3900       endp_orig = endp = str + *end;
3901     }
3902   else
3903     {
3904       begp_orig = begp = BYTE_POS_ADDR (*beg);
3905       endp_orig = endp = begp + *end - *beg;
3906     }
3907
3908   eol_conversion = (coding->eol_type == CODING_EOL_CR
3909                     || coding->eol_type == CODING_EOL_CRLF);
3910
3911   /* Here, we don't have to check coding->pre_write_conversion because
3912      the caller is expected to have handled it already.  */
3913   switch (coding->type)
3914     {
3915     case coding_type_undecided:
3916     case coding_type_emacs_mule:
3917     case coding_type_raw_text:
3918       if (eol_conversion)
3919         {
3920           while (begp < endp && *begp != '\n') begp++;
3921           while (begp < endp && endp[-1] != '\n') endp--;
3922         }
3923       else
3924         begp = endp;
3925       break;
3926
3927     case coding_type_iso2022:
3928       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3929         {
3930           unsigned char *bol = begp;
3931           while (begp < endp && *begp < 0x80)
3932             {
3933               begp++;
3934               if (begp[-1] == '\n')
3935                 bol = begp;
3936             }
3937           begp = bol;
3938           goto label_skip_tail;
3939         }
3940       /* fall down ... */
3941
3942     default:
3943       /* We can skip all ASCII characters at the head and tail.  */
3944       if (eol_conversion)
3945         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3946       else
3947         while (begp < endp && *begp < 0x80) begp++;
3948     label_skip_tail:
3949       if (eol_conversion)
3950         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3951       else
3952         while (begp < endp && *(endp - 1) < 0x80) endp--;
3953       break;
3954     }
3955
3956   *beg += begp - begp_orig;
3957   *end += endp - endp_orig;
3958   return;
3959 }
3960
3961 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3962    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3963    coding system CODING, and return the status code of code conversion
3964    (currently, this value has no meaning).
3965
3966    How many characters (and bytes) are converted to how many
3967    characters (and bytes) are recorded in members of the structure
3968    CODING.
3969
3970    If REPLACE is nonzero, we do various things as if the original text
3971    is deleted and a new text is inserted.  See the comments in
3972    replace_range (insdel.c) to know what we are doing.  */
3973
3974 int
3975 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3976      int from, from_byte, to, to_byte, encodep, replace;
3977      struct coding_system *coding;
3978 {
3979   int len = to - from, len_byte = to_byte - from_byte;
3980   int require, inserted, inserted_byte;
3981   int head_skip, tail_skip, total_skip;
3982   Lisp_Object saved_coding_symbol = Qnil;
3983   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3984   int first = 1;
3985   int fake_multibyte = 0;
3986   unsigned char *src, *dst;
3987   Lisp_Object deletion = Qnil;
3988
3989   if (from < PT && PT < to)
3990     SET_PT_BOTH (from, from_byte);
3991
3992   if (replace)
3993     {
3994       int saved_from = from;
3995
3996       prepare_to_modify_buffer (from, to, &from);
3997       if (saved_from != from)
3998         {
3999           to = from + len;
4000           if (multibyte)
4001             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4002           else
4003             from_byte = from, to_byte = to;
4004           len_byte = to_byte - from_byte;
4005         }
4006     }
4007
4008   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4009     {
4010       /* We must detect encoding of text and eol format.  */
4011
4012       if (from < GPT && to > GPT)
4013         move_gap_both (from, from_byte);
4014       if (coding->type == coding_type_undecided)
4015         {
4016           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4017           if (coding->type == coding_type_undecided)
4018             /* It seems that the text contains only ASCII, but we
4019                should not left it undecided because the deeper
4020                decoding routine (decode_coding) tries to detect the
4021                encodings again in vain.  */
4022             coding->type = coding_type_emacs_mule;
4023         }
4024       if (coding->eol_type == CODING_EOL_UNDECIDED)
4025         {
4026           saved_coding_symbol = coding->symbol;
4027           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4028           if (coding->eol_type == CODING_EOL_UNDECIDED)
4029             coding->eol_type = CODING_EOL_LF;
4030           /* We had better recover the original eol format if we
4031              encounter an inconsitent eol format while decoding.  */
4032           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4033         }
4034     }
4035
4036   coding->consumed_char = len, coding->consumed = len_byte;
4037
4038   if (encodep
4039       ? ! CODING_REQUIRE_ENCODING (coding)
4040       : ! CODING_REQUIRE_DECODING (coding))
4041     {
4042       coding->produced = len_byte;
4043       if (multibyte
4044           && ! replace
4045           /* See the comment of the member heading_ascii in coding.h.  */
4046           && coding->heading_ascii < len_byte)
4047         {
4048           /* We still may have to combine byte at the head and the
4049              tail of the text in the region.  */
4050           if (from < GPT && GPT < to)
4051             move_gap_both (to, to_byte);
4052           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4053           adjust_after_insert (from, from_byte, to, to_byte, len);
4054           coding->produced_char = len;
4055         }
4056       else
4057         {
4058           if (!replace)
4059             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4060           coding->produced_char = len_byte;
4061         }
4062       return 0;
4063     }
4064
4065   /* Now we convert the text.  */
4066
4067   /* For encoding, we must process pre-write-conversion in advance.  */
4068   if (encodep
4069       && ! NILP (coding->pre_write_conversion)
4070       && SYMBOLP (coding->pre_write_conversion)
4071       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4072     {
4073       /* The function in pre-write-conversion may put a new text in a
4074          new buffer.  */
4075       struct buffer *prev = current_buffer, *new;
4076
4077       call2 (coding->pre_write_conversion,
4078              make_number (from), make_number (to));
4079       if (current_buffer != prev)
4080         {
4081           len = ZV - BEGV;
4082           new = current_buffer;
4083           set_buffer_internal_1 (prev);
4084           del_range_2 (from, from_byte, to, to_byte);
4085           insert_from_buffer (new, BEG, len, 0);
4086           to = from + len;
4087           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4088           len_byte = to_byte - from_byte;
4089         }
4090     }
4091
4092   if (replace)
4093     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4094
4095   /* Try to skip the heading and tailing ASCIIs.  */
4096   {
4097     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4098
4099     if (from < GPT && GPT < to)
4100       move_gap_both (from, from_byte);
4101     if (encodep)
4102       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4103     else
4104       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4105     if (from_byte == to_byte)
4106       {
4107         coding->produced = len_byte;
4108         coding->produced_char = multibyte ? len : len_byte;
4109         if (!replace)
4110           /* We must record and adjust for this new text now.  */
4111           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4112         return 0;
4113       }
4114
4115     head_skip = from_byte - from_byte_orig;
4116     tail_skip = to_byte_orig - to_byte;
4117     total_skip = head_skip + tail_skip;
4118     from += head_skip;
4119     to -= tail_skip;
4120     len -= total_skip; len_byte -= total_skip;
4121   }
4122
4123   /* For converion, we must put the gap before the text in addition to
4124      making the gap larger for efficient decoding.  The required gap
4125      size starts from 2000 which is the magic number used in make_gap.
4126      But, after one batch of conversion, it will be incremented if we
4127      find that it is not enough .  */
4128   require = 2000;
4129
4130   if (GAP_SIZE  < require)
4131     make_gap (require - GAP_SIZE);
4132   move_gap_both (from, from_byte);
4133
4134   if (GPT - BEG < beg_unchanged)
4135     beg_unchanged = GPT - BEG;
4136   if (Z - GPT < end_unchanged)
4137     end_unchanged = Z - GPT;
4138
4139   inserted = inserted_byte = 0;
4140   src = GAP_END_ADDR, dst = GPT_ADDR;
4141
4142   GAP_SIZE += len_byte;
4143   ZV -= len;
4144   Z -= len;
4145   ZV_BYTE -= len_byte;
4146   Z_BYTE -= len_byte;
4147
4148   for (;;)
4149     {
4150       int result;
4151
4152       /* The buffer memory is changed from:
4153          +--------+converted-text+---------+-------original-text------+---+
4154          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4155                   |<------------------- GAP_SIZE -------------------->|  */
4156       if (encodep)
4157         result = encode_coding (coding, src, dst, len_byte, 0);
4158       else
4159         result = decode_coding (coding, src, dst, len_byte, 0);
4160       /* to:
4161          +--------+-------converted-text--------+--+---original-text--+---+
4162          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4163                   |<------------------- GAP_SIZE -------------------->|  */
4164       if (coding->fake_multibyte)
4165         fake_multibyte = 1;
4166
4167       if (!encodep && !multibyte)
4168         coding->produced_char = coding->produced;
4169       inserted += coding->produced_char;
4170       inserted_byte += coding->produced;
4171       len_byte -= coding->consumed;
4172       src += coding->consumed;
4173       dst += inserted_byte;
4174
4175       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4176         {
4177           unsigned char *pend = dst, *p = pend - inserted_byte;
4178
4179           /* Encode LFs back to the original eol format (CR or CRLF).  */
4180           if (coding->eol_type == CODING_EOL_CR)
4181             {
4182               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4183             }
4184           else
4185             {
4186               int count = 0;
4187
4188               while (p < pend) if (*p++ == '\n') count++;
4189               if (src - dst < count)
4190                 {
4191                   /* We don't have sufficient room for putting LFs
4192                      back to CRLF.  We must record converted and
4193                      not-yet-converted text back to the buffer
4194                      content, enlarge the gap, then record them out of
4195                      the buffer contents again.  */
4196                   int add = len_byte + inserted_byte;
4197
4198                   GAP_SIZE -= add;
4199                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4200                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4201                   make_gap (count - GAP_SIZE);
4202                   GAP_SIZE += add;
4203                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4204                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4205                   /* Don't forget to update SRC, DST, and PEND.  */
4206                   src = GAP_END_ADDR - len_byte;
4207                   dst = GPT_ADDR + inserted_byte;
4208                   pend = dst;
4209                 }
4210               inserted += count;
4211               inserted_byte += count;
4212               coding->produced += count;
4213               p = dst = pend + count;
4214               while (count)
4215                 {
4216                   *--p = *--pend;
4217                   if (*p == '\n') count--, *--p = '\r';
4218                 }
4219             }
4220
4221           /* Suppress eol-format conversion in the further conversion.  */
4222           coding->eol_type = CODING_EOL_LF;
4223
4224           /* Restore the original symbol.  */
4225           coding->symbol = saved_coding_symbol;
4226
4227           continue;
4228         }
4229       if (len_byte <= 0)
4230         break;
4231       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4232         {
4233           /* The source text ends in invalid codes.  Let's just
4234              make them valid buffer contents, and finish conversion.  */
4235           inserted += len_byte;
4236           inserted_byte += len_byte;
4237           while (len_byte--)
4238             *dst++ = *src++;
4239           fake_multibyte = 1;
4240           break;
4241         }
4242       if (first)
4243         {
4244           /* We have just done the first batch of conversion which was
4245              stoped because of insufficient gap.  Let's reconsider the
4246              required gap size (i.e. SRT - DST) now.
4247
4248              We have converted ORIG bytes (== coding->consumed) into
4249              NEW bytes (coding->produced).  To convert the remaining
4250              LEN bytes, we may need REQUIRE bytes of gap, where:
4251                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4252                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4253              Here, we are sure that NEW >= ORIG.  */
4254           float ratio = coding->produced - coding->consumed;
4255           ratio /= coding->consumed;
4256           require = len_byte * ratio;
4257           first = 0;
4258         }
4259       if ((src - dst) < (require + 2000))
4260         {
4261           /* See the comment above the previous call of make_gap.  */
4262           int add = len_byte + inserted_byte;
4263
4264           GAP_SIZE -= add;
4265           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4266           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4267           make_gap (require + 2000);
4268           GAP_SIZE += add;
4269           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4270           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4271           /* Don't forget to update SRC, DST.  */
4272           src = GAP_END_ADDR - len_byte;
4273           dst = GPT_ADDR + inserted_byte;
4274         }
4275     }
4276   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4277
4278   if (multibyte
4279       && (fake_multibyte
4280           || !encodep && (to - from) != (to_byte - from_byte)))
4281     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4282
4283   /* If we have shrinked the conversion area, adjust it now.  */
4284   if (total_skip > 0)
4285     {
4286       if (tail_skip > 0)
4287         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4288       inserted += total_skip; inserted_byte += total_skip;
4289       GAP_SIZE += total_skip;
4290       GPT -= head_skip; GPT_BYTE -= head_skip;
4291       ZV -= total_skip; ZV_BYTE -= total_skip;
4292       Z -= total_skip; Z_BYTE -= total_skip;
4293       from -= head_skip; from_byte -= head_skip;
4294       to += tail_skip; to_byte += tail_skip;
4295     }
4296
4297   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4298
4299   if (! encodep && ! NILP (coding->post_read_conversion))
4300     {
4301       Lisp_Object val;
4302       int orig_inserted = inserted, pos = PT;
4303
4304       if (from != pos)
4305         temp_set_point_both (current_buffer, from, from_byte);
4306       val = call1 (coding->post_read_conversion, make_number (inserted));
4307       if (! NILP (val))
4308         {
4309           CHECK_NUMBER (val, 0);
4310           inserted = XFASTINT (val);
4311         }
4312       if (pos >= from + orig_inserted)
4313         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4314     }
4315
4316   signal_after_change (from, to - from, inserted);
4317
4318   {
4319     coding->consumed = to_byte - from_byte;
4320     coding->consumed_char = to - from;
4321     coding->produced = inserted_byte;
4322     coding->produced_char = inserted;
4323   }
4324
4325   return 0;
4326 }
4327
4328 Lisp_Object
4329 code_convert_string (str, coding, encodep, nocopy)
4330      Lisp_Object str;
4331      struct coding_system *coding;
4332      int encodep, nocopy;
4333 {
4334   int len;
4335   char *buf;
4336   int from = 0, to = XSTRING (str)->size;
4337   int to_byte = STRING_BYTES (XSTRING (str));
4338   struct gcpro gcpro1;
4339   Lisp_Object saved_coding_symbol = Qnil;
4340   int result;
4341
4342   if (encodep && !NILP (coding->pre_write_conversion)
4343       || !encodep && !NILP (coding->post_read_conversion))
4344     {
4345       /* Since we have to call Lisp functions which assume target text
4346          is in a buffer, after setting a temporary buffer, call
4347          code_convert_region.  */
4348       int count = specpdl_ptr - specpdl;
4349       struct buffer *prev = current_buffer;
4350
4351       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4352       temp_output_buffer_setup (" *code-converting-work*");
4353       set_buffer_internal (XBUFFER (Vstandard_output));
4354       if (encodep)
4355         insert_from_string (str, 0, 0, to, to_byte, 0);
4356       else
4357         {
4358           /* We must insert the contents of STR as is without
4359              unibyte<->multibyte conversion.  */
4360           current_buffer->enable_multibyte_characters = Qnil;
4361           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4362           current_buffer->enable_multibyte_characters = Qt;
4363         }
4364       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4365       if (encodep)
4366         /* We must return the buffer contents as unibyte string.  */
4367         current_buffer->enable_multibyte_characters = Qnil;
4368       str = make_buffer_string (BEGV, ZV, 0);
4369       set_buffer_internal (prev);
4370       return unbind_to (count, str);
4371     }
4372
4373   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4374     {
4375       /* See the comments in code_convert_region.  */
4376       if (coding->type == coding_type_undecided)
4377         {
4378           detect_coding (coding, XSTRING (str)->data, to_byte);
4379           if (coding->type == coding_type_undecided)
4380             coding->type = coding_type_emacs_mule;
4381         }
4382       if (coding->eol_type == CODING_EOL_UNDECIDED)
4383         {
4384           saved_coding_symbol = coding->symbol;
4385           detect_eol (coding, XSTRING (str)->data, to_byte);
4386           if (coding->eol_type == CODING_EOL_UNDECIDED)
4387             coding->eol_type = CODING_EOL_LF;
4388           /* We had better recover the original eol format if we
4389              encounter an inconsitent eol format while decoding.  */
4390           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4391         }
4392     }
4393
4394   if (encodep
4395       ? ! CODING_REQUIRE_ENCODING (coding)
4396       : ! CODING_REQUIRE_DECODING (coding))
4397     from = to_byte;
4398   else
4399     {
4400       /* Try to skip the heading and tailing ASCIIs.  */
4401       if (encodep)
4402         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4403       else
4404         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4405     }
4406   if (from == to_byte)
4407     return (nocopy ? str : Fcopy_sequence (str));
4408
4409   if (encodep)
4410     len = encoding_buffer_size (coding, to_byte - from);
4411   else
4412     len = decoding_buffer_size (coding, to_byte - from);
4413   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4414   GCPRO1 (str);
4415   buf = get_conversion_buffer (len);
4416   UNGCPRO;
4417
4418   if (from > 0)
4419     bcopy (XSTRING (str)->data, buf, from);
4420   result = (encodep
4421             ? encode_coding (coding, XSTRING (str)->data + from,
4422                              buf + from, to_byte - from, len)
4423             : decode_coding (coding, XSTRING (str)->data + from,
4424                              buf + from, to_byte - from, len));
4425   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4426     {
4427       /* We simple try to decode the whole string again but without
4428          eol-conversion this time.  */
4429       coding->eol_type = CODING_EOL_LF;
4430       coding->symbol = saved_coding_symbol;
4431       return code_convert_string (str, coding, encodep, nocopy);
4432     }
4433
4434   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4435          STRING_BYTES (XSTRING (str)) - to_byte);
4436
4437   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4438   if (encodep)
4439     str = make_unibyte_string (buf, len + coding->produced);
4440   else
4441     str = make_string_from_bytes (buf, len + coding->produced_char,
4442                                   len + coding->produced);
4443   return str;
4444 }
4445
4446 \f
4447 #ifdef emacs
4448 /*** 7. Emacs Lisp library functions ***/
4449
4450 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4451   "Return t if OBJECT is nil or a coding-system.\n\
4452 See the documentation of `make-coding-system' for information\n\
4453 about coding-system objects.")
4454   (obj)
4455      Lisp_Object obj;
4456 {
4457   if (NILP (obj))
4458     return Qt;
4459   if (!SYMBOLP (obj))
4460     return Qnil;
4461   /* Get coding-spec vector for OBJ.  */
4462   obj = Fget (obj, Qcoding_system);
4463   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4464           ? Qt : Qnil);
4465 }
4466
4467 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4468        Sread_non_nil_coding_system, 1, 1, 0,
4469   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4470   (prompt)
4471      Lisp_Object prompt;
4472 {
4473   Lisp_Object val;
4474   do
4475     {
4476       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4477                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4478     }
4479   while (XSTRING (val)->size == 0);
4480   return (Fintern (val, Qnil));
4481 }
4482
4483 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4484   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4485 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4486   (prompt, default_coding_system)
4487      Lisp_Object prompt, default_coding_system;
4488 {
4489   Lisp_Object val;
4490   if (SYMBOLP (default_coding_system))
4491     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4492   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4493                           Qt, Qnil, Qcoding_system_history,
4494                           default_coding_system, Qnil);
4495   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4496 }
4497
4498 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4499        1, 1, 0,
4500   "Check validity of CODING-SYSTEM.\n\
4501 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4502 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4503 The value of property should be a vector of length 5.")
4504   (coding_system)
4505      Lisp_Object coding_system;
4506 {
4507   CHECK_SYMBOL (coding_system, 0);
4508   if (!NILP (Fcoding_system_p (coding_system)))
4509     return coding_system;
4510   while (1)
4511     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4512 }
4513 \f
4514 Lisp_Object
4515 detect_coding_system (src, src_bytes, highest)
4516      unsigned char *src;
4517      int src_bytes, highest;
4518 {
4519   int coding_mask, eol_type;
4520   Lisp_Object val, tmp;
4521   int dummy;
4522
4523   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4524   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4525   if (eol_type == CODING_EOL_INCONSISTENT)
4526     eol_type == CODING_EOL_UNDECIDED;
4527
4528   if (!coding_mask)
4529     {
4530       val = Qundecided;
4531       if (eol_type != CODING_EOL_UNDECIDED)
4532         {
4533           Lisp_Object val2;
4534           val2 = Fget (Qundecided, Qeol_type);
4535           if (VECTORP (val2))
4536             val = XVECTOR (val2)->contents[eol_type];
4537         }
4538       return val;
4539     }
4540
4541   /* At first, gather possible coding systems in VAL.  */
4542   val = Qnil;
4543   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4544     {
4545       int idx
4546         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4547       if (coding_mask & (1 << idx))
4548         {
4549           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4550           if (highest)
4551             break;
4552         }
4553     }
4554   if (!highest)
4555     val = Fnreverse (val);
4556
4557   /* Then, substitute the elements by subsidiary coding systems.  */
4558   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4559     {
4560       if (eol_type != CODING_EOL_UNDECIDED)
4561         {
4562           Lisp_Object eol;
4563           eol = Fget (XCONS (tmp)->car, Qeol_type);
4564           if (VECTORP (eol))
4565             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4566         }
4567     }
4568   return (highest ? XCONS (val)->car : val);
4569 }
4570
4571 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4572        2, 3, 0,
4573   "Detect coding system of the text in the region between START and END.\n\
4574 Return a list of possible coding systems ordered by priority.\n\
4575 \n\
4576 If only ASCII characters are found, it returns `undecided'\n\
4577 or its subsidiary coding system according to a detected end-of-line format.\n\
4578 \n\
4579 If optional argument HIGHEST is non-nil, return the coding system of\n\
4580 highest priority.")
4581   (start, end, highest)
4582      Lisp_Object start, end, highest;
4583 {
4584   int from, to;
4585   int from_byte, to_byte;
4586
4587   CHECK_NUMBER_COERCE_MARKER (start, 0);
4588   CHECK_NUMBER_COERCE_MARKER (end, 1);
4589
4590   validate_region (&start, &end);
4591   from = XINT (start), to = XINT (end);
4592   from_byte = CHAR_TO_BYTE (from);
4593   to_byte = CHAR_TO_BYTE (to);
4594
4595   if (from < GPT && to >= GPT)
4596     move_gap_both (to, to_byte);
4597
4598   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4599                                to_byte - from_byte,
4600                                !NILP (highest));
4601 }
4602
4603 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4604        1, 2, 0,
4605   "Detect coding system of the text in STRING.\n\
4606 Return a list of possible coding systems ordered by priority.\n\
4607 \n\
4608 If only ASCII characters are found, it returns `undecided'\n\
4609 or its subsidiary coding system according to a detected end-of-line format.\n\
4610 \n\
4611 If optional argument HIGHEST is non-nil, return the coding system of\n\
4612 highest priority.")
4613   (string, highest)
4614      Lisp_Object string, highest;
4615 {
4616   CHECK_STRING (string, 0);
4617
4618   return detect_coding_system (XSTRING (string)->data,
4619                                STRING_BYTES (XSTRING (string)),
4620                                !NILP (highest));
4621 }
4622
4623 Lisp_Object
4624 code_convert_region1 (start, end, coding_system, encodep)
4625      Lisp_Object start, end, coding_system;
4626      int encodep;
4627 {
4628   struct coding_system coding;
4629   int from, to, len;
4630
4631   CHECK_NUMBER_COERCE_MARKER (start, 0);
4632   CHECK_NUMBER_COERCE_MARKER (end, 1);
4633   CHECK_SYMBOL (coding_system, 2);
4634
4635   validate_region (&start, &end);
4636   from = XFASTINT (start);
4637   to = XFASTINT (end);
4638
4639   if (NILP (coding_system))
4640     return make_number (to - from);
4641
4642   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4643     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4644
4645   coding.mode |= CODING_MODE_LAST_BLOCK;
4646   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4647                        &coding, encodep, 1);
4648   return make_number (coding.produced_char);
4649 }
4650
4651 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4652        3, 3, "r\nzCoding system: ",
4653   "Decode the current region by specified coding system.\n\
4654 When called from a program, takes three arguments:\n\
4655 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4656 Return length of decoded text.")
4657   (start, end, coding_system)
4658      Lisp_Object start, end, coding_system;
4659 {
4660   return code_convert_region1 (start, end, coding_system, 0);
4661 }
4662
4663 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4664        3, 3, "r\nzCoding system: ",
4665   "Encode the current region by specified coding system.\n\
4666 When called from a program, takes three arguments:\n\
4667 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4668 Return length of encoded text.")
4669   (start, end, coding_system)
4670      Lisp_Object start, end, coding_system;
4671 {
4672   return code_convert_region1 (start, end, coding_system, 1);
4673 }
4674
4675 Lisp_Object
4676 code_convert_string1 (string, coding_system, nocopy, encodep)
4677      Lisp_Object string, coding_system, nocopy;
4678      int encodep;
4679 {
4680   struct coding_system coding;
4681
4682   CHECK_STRING (string, 0);
4683   CHECK_SYMBOL (coding_system, 1);
4684
4685   if (NILP (coding_system))
4686     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4687
4688   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4689     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4690
4691   coding.mode |= CODING_MODE_LAST_BLOCK;
4692   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4693 }
4694
4695 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4696        2, 3, 0,
4697   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4698 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4699 if the decoding operation is trivial.")
4700   (string, coding_system, nocopy)
4701      Lisp_Object string, coding_system, nocopy;
4702 {
4703   return code_convert_string1(string, coding_system, nocopy, 0);
4704 }
4705
4706 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4707        2, 3, 0,
4708   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4709 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4710 if the encoding operation is trivial.")
4711   (string, coding_system, nocopy)
4712      Lisp_Object string, coding_system, nocopy;
4713 {
4714   return code_convert_string1(string, coding_system, nocopy, 1);
4715 }
4716
4717 \f
4718 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4719   "Decode a JISX0208 character of shift-jis encoding.\n\
4720 CODE is the character code in SJIS.\n\
4721 Return the corresponding character.")
4722   (code)
4723      Lisp_Object code;
4724 {
4725   unsigned char c1, c2, s1, s2;
4726   Lisp_Object val;
4727
4728   CHECK_NUMBER (code, 0);
4729   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4730   DECODE_SJIS (s1, s2, c1, c2);
4731   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4732   return val;
4733 }
4734
4735 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4736   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4737 Return the corresponding character code in SJIS.")
4738   (ch)
4739      Lisp_Object ch;
4740 {
4741   int charset, c1, c2, s1, s2;
4742   Lisp_Object val;
4743
4744   CHECK_NUMBER (ch, 0);
4745   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4746   if (charset == charset_jisx0208)
4747     {
4748       ENCODE_SJIS (c1, c2, s1, s2);
4749       XSETFASTINT (val, (s1 << 8) | s2);
4750     }
4751   else
4752     XSETFASTINT (val, 0);
4753   return val;
4754 }
4755
4756 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4757   "Decode a Big5 character CODE of BIG5 coding system.\n\
4758 CODE is the character code in BIG5.\n\
4759 Return the corresponding character.")
4760   (code)
4761      Lisp_Object code;
4762 {
4763   int charset;
4764   unsigned char b1, b2, c1, c2;
4765   Lisp_Object val;
4766
4767   CHECK_NUMBER (code, 0);
4768   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4769   DECODE_BIG5 (b1, b2, charset, c1, c2);
4770   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4771   return val;
4772 }
4773
4774 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4775   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4776 Return the corresponding character code in Big5.")
4777   (ch)
4778      Lisp_Object ch;
4779 {
4780   int charset, c1, c2, b1, b2;
4781   Lisp_Object val;
4782
4783   CHECK_NUMBER (ch, 0);
4784   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4785   if (charset == charset_big5_1 || charset == charset_big5_2)
4786     {
4787       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4788       XSETFASTINT (val, (b1 << 8) | b2);
4789     }
4790   else
4791     XSETFASTINT (val, 0);
4792   return val;
4793 }
4794 \f
4795 DEFUN ("set-terminal-coding-system-internal",
4796        Fset_terminal_coding_system_internal,
4797        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4798   (coding_system)
4799      Lisp_Object coding_system;
4800 {
4801   CHECK_SYMBOL (coding_system, 0);
4802   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4803   /* We had better not send unsafe characters to terminal.  */
4804   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4805
4806   return Qnil;
4807 }
4808
4809 DEFUN ("set-safe-terminal-coding-system-internal",
4810        Fset_safe_terminal_coding_system_internal,
4811        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4812   (coding_system)
4813      Lisp_Object coding_system;
4814 {
4815   CHECK_SYMBOL (coding_system, 0);
4816   setup_coding_system (Fcheck_coding_system (coding_system),
4817                        &safe_terminal_coding);
4818   return Qnil;
4819 }
4820
4821 DEFUN ("terminal-coding-system",
4822        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4823   "Return coding system specified for terminal output.")
4824   ()
4825 {
4826   return terminal_coding.symbol;
4827 }
4828
4829 DEFUN ("set-keyboard-coding-system-internal",
4830        Fset_keyboard_coding_system_internal,
4831        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4832   (coding_system)
4833      Lisp_Object coding_system;
4834 {
4835   CHECK_SYMBOL (coding_system, 0);
4836   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4837   return Qnil;
4838 }
4839
4840 DEFUN ("keyboard-coding-system",
4841        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4842   "Return coding system specified for decoding keyboard input.")
4843   ()
4844 {
4845   return keyboard_coding.symbol;
4846 }
4847
4848 \f
4849 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4850        Sfind_operation_coding_system,  1, MANY, 0,
4851   "Choose a coding system for an operation based on the target name.\n\
4852 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4853 DECODING-SYSTEM is the coding system to use for decoding\n\
4854 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4855 for encoding (in case OPERATION does encoding).\n\
4856 \n\
4857 The first argument OPERATION specifies an I/O primitive:\n\
4858   For file I/O, `insert-file-contents' or `write-region'.\n\
4859   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4860   For network I/O, `open-network-stream'.\n\
4861 \n\
4862 The remaining arguments should be the same arguments that were passed\n\
4863 to the primitive.  Depending on which primitive, one of those arguments\n\
4864 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4865 whichever argument specifies the file name is TARGET.\n\
4866 \n\
4867 TARGET has a meaning which depends on OPERATION:\n\
4868   For file I/O, TARGET is a file name.\n\
4869   For process I/O, TARGET is a process name.\n\
4870   For network I/O, TARGET is a service name or a port number\n\
4871 \n\
4872 This function looks up what specified for TARGET in,\n\
4873 `file-coding-system-alist', `process-coding-system-alist',\n\
4874 or `network-coding-system-alist' depending on OPERATION.\n\
4875 They may specify a coding system, a cons of coding systems,\n\
4876 or a function symbol to call.\n\
4877 In the last case, we call the function with one argument,\n\
4878 which is a list of all the arguments given to this function.")
4879   (nargs, args)
4880      int nargs;
4881      Lisp_Object *args;
4882 {
4883   Lisp_Object operation, target_idx, target, val;
4884   register Lisp_Object chain;
4885
4886   if (nargs < 2)
4887     error ("Too few arguments");
4888   operation = args[0];
4889   if (!SYMBOLP (operation)
4890       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4891     error ("Invalid first arguement");
4892   if (nargs < 1 + XINT (target_idx))
4893     error ("Too few arguments for operation: %s",
4894            XSYMBOL (operation)->name->data);
4895   target = args[XINT (target_idx) + 1];
4896   if (!(STRINGP (target)
4897         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4898     error ("Invalid %dth argument", XINT (target_idx) + 1);
4899
4900   chain = ((EQ (operation, Qinsert_file_contents)
4901             || EQ (operation, Qwrite_region))
4902            ? Vfile_coding_system_alist
4903            : (EQ (operation, Qopen_network_stream)
4904               ? Vnetwork_coding_system_alist
4905               : Vprocess_coding_system_alist));
4906   if (NILP (chain))
4907     return Qnil;
4908
4909   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4910     {
4911       Lisp_Object elt;
4912       elt = XCONS (chain)->car;
4913
4914       if (CONSP (elt)
4915           && ((STRINGP (target)
4916                && STRINGP (XCONS (elt)->car)
4917                && fast_string_match (XCONS (elt)->car, target) >= 0)
4918               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4919         {
4920           val = XCONS (elt)->cdr;
4921           /* Here, if VAL is both a valid coding system and a valid
4922              function symbol, we return VAL as a coding system.  */
4923           if (CONSP (val))
4924             return val;
4925           if (! SYMBOLP (val))
4926             return Qnil;
4927           if (! NILP (Fcoding_system_p (val)))
4928             return Fcons (val, val);
4929           if (! NILP (Ffboundp (val)))
4930             {
4931               val = call1 (val, Flist (nargs, args));
4932               if (CONSP (val))
4933                 return val;
4934               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4935                 return Fcons (val, val);
4936             }
4937           return Qnil;
4938         }
4939     }
4940   return Qnil;
4941 }
4942
4943 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4944        Supdate_iso_coding_systems, 0, 0, 0,
4945   "Update internal database for ISO2022 based coding systems.\n\
4946 When values of the following coding categories are changed, you must\n\
4947 call this function:\n\
4948   coding-category-iso-7, coding-category-iso-7-tight,\n\
4949   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4950   coding-category-iso-7-else, coding-category-iso-8-else")
4951   ()
4952 {
4953   int i;
4954
4955   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4956        i++)
4957     {
4958       if (! coding_system_table[i])
4959         coding_system_table[i]
4960           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4961       setup_coding_system
4962         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4963          coding_system_table[i]);
4964     }
4965   return Qnil;
4966 }
4967
4968 #endif /* emacs */
4969
4970 \f
4971 /*** 8. Post-amble ***/
4972
4973 void
4974 init_coding_once ()
4975 {
4976   int i;
4977
4978   /* Emacs' internal format specific initialize routine.  */
4979   for (i = 0; i <= 0x20; i++)
4980     emacs_code_class[i] = EMACS_control_code;
4981   emacs_code_class[0x0A] = EMACS_linefeed_code;
4982   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4983   for (i = 0x21 ; i < 0x7F; i++)
4984     emacs_code_class[i] = EMACS_ascii_code;
4985   emacs_code_class[0x7F] = EMACS_control_code;
4986   emacs_code_class[0x80] = EMACS_leading_code_composition;
4987   for (i = 0x81; i < 0xFF; i++)
4988     emacs_code_class[i] = EMACS_invalid_code;
4989   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
4990   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
4991   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
4992   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
4993
4994   /* ISO2022 specific initialize routine.  */
4995   for (i = 0; i < 0x20; i++)
4996     iso_code_class[i] = ISO_control_code;
4997   for (i = 0x21; i < 0x7F; i++)
4998     iso_code_class[i] = ISO_graphic_plane_0;
4999   for (i = 0x80; i < 0xA0; i++)
5000     iso_code_class[i] = ISO_control_code;
5001   for (i = 0xA1; i < 0xFF; i++)
5002     iso_code_class[i] = ISO_graphic_plane_1;
5003   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5004   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5005   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5006   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5007   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5008   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5009   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5010   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5011   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5012   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5013
5014   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5015   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5016
5017   setup_coding_system (Qnil, &keyboard_coding);
5018   setup_coding_system (Qnil, &terminal_coding);
5019   setup_coding_system (Qnil, &safe_terminal_coding);
5020
5021   bzero (coding_system_table, sizeof coding_system_table);
5022
5023 #if defined (MSDOS) || defined (WINDOWSNT)
5024   system_eol_type = CODING_EOL_CRLF;
5025 #else
5026   system_eol_type = CODING_EOL_LF;
5027 #endif
5028 }
5029
5030 #ifdef emacs
5031
5032 void
5033 syms_of_coding ()
5034 {
5035   Qtarget_idx = intern ("target-idx");
5036   staticpro (&Qtarget_idx);
5037
5038   Qcoding_system_history = intern ("coding-system-history");
5039   staticpro (&Qcoding_system_history);
5040   Fset (Qcoding_system_history, Qnil);
5041
5042   /* Target FILENAME is the first argument.  */
5043   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5044   /* Target FILENAME is the third argument.  */
5045   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5046
5047   Qcall_process = intern ("call-process");
5048   staticpro (&Qcall_process);
5049   /* Target PROGRAM is the first argument.  */
5050   Fput (Qcall_process, Qtarget_idx, make_number (0));
5051
5052   Qcall_process_region = intern ("call-process-region");
5053   staticpro (&Qcall_process_region);
5054   /* Target PROGRAM is the third argument.  */
5055   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5056
5057   Qstart_process = intern ("start-process");
5058   staticpro (&Qstart_process);
5059   /* Target PROGRAM is the third argument.  */
5060   Fput (Qstart_process, Qtarget_idx, make_number (2));
5061
5062   Qopen_network_stream = intern ("open-network-stream");
5063   staticpro (&Qopen_network_stream);
5064   /* Target SERVICE is the fourth argument.  */
5065   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5066
5067   Qcoding_system = intern ("coding-system");
5068   staticpro (&Qcoding_system);
5069
5070   Qeol_type = intern ("eol-type");
5071   staticpro (&Qeol_type);
5072
5073   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5074   staticpro (&Qbuffer_file_coding_system);
5075
5076   Qpost_read_conversion = intern ("post-read-conversion");
5077   staticpro (&Qpost_read_conversion);
5078
5079   Qpre_write_conversion = intern ("pre-write-conversion");
5080   staticpro (&Qpre_write_conversion);
5081
5082   Qno_conversion = intern ("no-conversion");
5083   staticpro (&Qno_conversion);
5084
5085   Qundecided = intern ("undecided");
5086   staticpro (&Qundecided);
5087
5088   Qcoding_system_p = intern ("coding-system-p");
5089   staticpro (&Qcoding_system_p);
5090
5091   Qcoding_system_error = intern ("coding-system-error");
5092   staticpro (&Qcoding_system_error);
5093
5094   Fput (Qcoding_system_error, Qerror_conditions,
5095         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5096   Fput (Qcoding_system_error, Qerror_message,
5097         build_string ("Invalid coding system"));
5098
5099   Qcoding_category = intern ("coding-category");
5100   staticpro (&Qcoding_category);
5101   Qcoding_category_index = intern ("coding-category-index");
5102   staticpro (&Qcoding_category_index);
5103
5104   Vcoding_category_table
5105     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5106   staticpro (&Vcoding_category_table);
5107   {
5108     int i;
5109     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5110       {
5111         XVECTOR (Vcoding_category_table)->contents[i]
5112           = intern (coding_category_name[i]);
5113         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5114               Qcoding_category_index, make_number (i));
5115       }
5116   }
5117
5118   Qcharacter_unification_table = intern ("character-unification-table");
5119   staticpro (&Qcharacter_unification_table);
5120   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
5121         make_number (0));
5122
5123   Qcharacter_unification_table_for_decode
5124     = intern ("character-unification-table-for-decode");
5125   staticpro (&Qcharacter_unification_table_for_decode);
5126
5127   Qcharacter_unification_table_for_encode
5128     = intern ("character-unification-table-for-encode");
5129   staticpro (&Qcharacter_unification_table_for_encode);
5130
5131   Qsafe_charsets = intern ("safe-charsets");
5132   staticpro (&Qsafe_charsets);
5133
5134   Qemacs_mule = intern ("emacs-mule");
5135   staticpro (&Qemacs_mule);
5136
5137   Qraw_text = intern ("raw-text");
5138   staticpro (&Qraw_text);
5139
5140   defsubr (&Scoding_system_p);
5141   defsubr (&Sread_coding_system);
5142   defsubr (&Sread_non_nil_coding_system);
5143   defsubr (&Scheck_coding_system);
5144   defsubr (&Sdetect_coding_region);
5145   defsubr (&Sdetect_coding_string);
5146   defsubr (&Sdecode_coding_region);
5147   defsubr (&Sencode_coding_region);
5148   defsubr (&Sdecode_coding_string);
5149   defsubr (&Sencode_coding_string);
5150   defsubr (&Sdecode_sjis_char);
5151   defsubr (&Sencode_sjis_char);
5152   defsubr (&Sdecode_big5_char);
5153   defsubr (&Sencode_big5_char);
5154   defsubr (&Sset_terminal_coding_system_internal);
5155   defsubr (&Sset_safe_terminal_coding_system_internal);
5156   defsubr (&Sterminal_coding_system);
5157   defsubr (&Sset_keyboard_coding_system_internal);
5158   defsubr (&Skeyboard_coding_system);
5159   defsubr (&Sfind_operation_coding_system);
5160   defsubr (&Supdate_iso_coding_systems);
5161
5162   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5163     "List of coding systems.\n\
5164 \n\
5165 Do not alter the value of this variable manually.  This variable should be\n\
5166 updated by the functions `make-coding-system' and\n\
5167 `define-coding-system-alias'.");
5168   Vcoding_system_list = Qnil;
5169
5170   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5171     "Alist of coding system names.\n\
5172 Each element is one element list of coding system name.\n\
5173 This variable is given to `completing-read' as TABLE argument.\n\
5174 \n\
5175 Do not alter the value of this variable manually.  This variable should be\n\
5176 updated by the functions `make-coding-system' and\n\
5177 `define-coding-system-alias'.");
5178   Vcoding_system_alist = Qnil;
5179
5180   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5181     "List of coding-categories (symbols) ordered by priority.");
5182   {
5183     int i;
5184
5185     Vcoding_category_list = Qnil;
5186     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5187       Vcoding_category_list
5188         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5189                  Vcoding_category_list);
5190   }
5191
5192   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5193     "Specify the coding system for read operations.\n\
5194 It is useful to bind this variable with `let', but do not set it globally.\n\
5195 If the value is a coding system, it is used for decoding on read operation.\n\
5196 If not, an appropriate element is used from one of the coding system alists:\n\
5197 There are three such tables, `file-coding-system-alist',\n\
5198 `process-coding-system-alist', and `network-coding-system-alist'.");
5199   Vcoding_system_for_read = Qnil;
5200
5201   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5202     "Specify the coding system for write operations.\n\
5203 It is useful to bind this variable with `let', but do not set it globally.\n\
5204 If the value is a coding system, it is used for encoding on write operation.\n\
5205 If not, an appropriate element is used from one of the coding system alists:\n\
5206 There are three such tables, `file-coding-system-alist',\n\
5207 `process-coding-system-alist', and `network-coding-system-alist'.");
5208   Vcoding_system_for_write = Qnil;
5209
5210   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5211     "Coding system used in the latest file or process I/O.");
5212   Vlast_coding_system_used = Qnil;
5213
5214   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5215     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5216   inhibit_eol_conversion = 0;
5217
5218   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5219     "Non-nil means process buffer inherits coding system of process output.\n\
5220 Bind it to t if the process output is to be treated as if it were a file\n\
5221 read from some filesystem.");
5222   inherit_process_coding_system = 0;
5223
5224   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5225     "Alist to decide a coding system to use for a file I/O operation.\n\
5226 The format is ((PATTERN . VAL) ...),\n\
5227 where PATTERN is a regular expression matching a file name,\n\
5228 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5229 If VAL is a coding system, it is used for both decoding and encoding\n\
5230 the file contents.\n\
5231 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5232 and the cdr part is used for encoding.\n\
5233 If VAL is a function symbol, the function must return a coding system\n\
5234 or a cons of coding systems which are used as above.\n\
5235 \n\
5236 See also the function `find-operation-coding-system'.");
5237   Vfile_coding_system_alist = Qnil;
5238
5239   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5240     "Alist to decide a coding system to use for a process I/O operation.\n\
5241 The format is ((PATTERN . VAL) ...),\n\
5242 where PATTERN is a regular expression matching a program name,\n\
5243 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5244 If VAL is a coding system, it is used for both decoding what received\n\
5245 from the program and encoding what sent to the program.\n\
5246 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5247 and the cdr part is used for encoding.\n\
5248 If VAL is a function symbol, the function must return a coding system\n\
5249 or a cons of coding systems which are used as above.\n\
5250 \n\
5251 See also the function `find-operation-coding-system'.");
5252   Vprocess_coding_system_alist = Qnil;
5253
5254   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5255     "Alist to decide a coding system to use for a network I/O operation.\n\
5256 The format is ((PATTERN . VAL) ...),\n\
5257 where PATTERN is a regular expression matching a network service name\n\
5258 or is a port number to connect to,\n\
5259 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5260 If VAL is a coding system, it is used for both decoding what received\n\
5261 from the network stream and encoding what sent to the network stream.\n\
5262 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5263 and the cdr part is used for encoding.\n\
5264 If VAL is a function symbol, the function must return a coding system\n\
5265 or a cons of coding systems which are used as above.\n\
5266 \n\
5267 See also the function `find-operation-coding-system'.");
5268   Vnetwork_coding_system_alist = Qnil;
5269
5270   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5271     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5272   eol_mnemonic_unix = ':';
5273
5274   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5275     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5276   eol_mnemonic_dos = '\\';
5277
5278   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5279     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5280   eol_mnemonic_mac = '/';
5281
5282   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5283     "Mnemonic character indicating end-of-line format is not yet decided.");
5284   eol_mnemonic_undecided = ':';
5285
5286   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5287     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5288   Venable_character_unification = Qt;
5289
5290   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5291     &Vstandard_character_unification_table_for_decode,
5292     "Table for unifying characters when reading.");
5293   Vstandard_character_unification_table_for_decode = Qnil;
5294
5295   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5296     &Vstandard_character_unification_table_for_encode,
5297     "Table for unifying characters when writing.");
5298   Vstandard_character_unification_table_for_encode = Qnil;
5299
5300   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5301     "Alist of charsets vs revision numbers.\n\
5302 While encoding, if a charset (car part of an element) is found,\n\
5303 designate it with the escape sequence identifing revision (cdr part of the element).");
5304   Vcharset_revision_alist = Qnil;
5305
5306   DEFVAR_LISP ("default-process-coding-system",
5307                &Vdefault_process_coding_system,
5308     "Cons of coding systems used for process I/O by default.\n\
5309 The car part is used for decoding a process output,\n\
5310 the cdr part is used for encoding a text to be sent to a process.");
5311   Vdefault_process_coding_system = Qnil;
5312
5313   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5314     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5315 This is a vector of length 256.\n\
5316 If Nth element is non-nil, the existence of code N in a file\n\
5317 \(or output of subprocess) doesn't prevent it to be detected as\n\
5318 a coding system of ISO 2022 variant which has a flag\n\
5319 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5320 or reading output of a subprocess.\n\
5321 Only 128th through 159th elements has a meaning.");
5322   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5323
5324   DEFVAR_LISP ("select-safe-coding-system-function",
5325                &Vselect_safe_coding_system_function,
5326     "Function to call to select safe coding system for encoding a text.\n\
5327 \n\
5328 If set, this function is called to force a user to select a proper\n\
5329 coding system which can encode the text in the case that a default\n\
5330 coding system used in each operation can't encode the text.\n\
5331 \n\
5332 The default value is `select-safe-codign-system' (which see).");
5333   Vselect_safe_coding_system_function = Qnil;
5334
5335 }
5336
5337 #endif /* emacs */