src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348 #include "intervals.h"
 349
 350 #else  /* not emacs */
 351
 352 #include "mulelib.h"
 353
 354 #endif /* not emacs */
 355
 356 Lisp_Object Qcoding_system, Qeol_type;
 357 Lisp_Object Qbuffer_file_coding_system;
 358 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 359 Lisp_Object Qno_conversion, Qundecided;
 360 Lisp_Object Qcoding_system_history;
 361 Lisp_Object Qsafe_chars;
 362 Lisp_Object Qvalid_codes;
 363
 364 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 365 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 366 Lisp_Object Qstart_process, Qopen_network_stream;
 367 Lisp_Object Qtarget_idx;
 368
 369 /* If a symbol has this property, evaluate the value to define the
 370    symbol as a coding system.  */
 371 Lisp_Object Qcoding_system_define_form;
 372
 373 Lisp_Object Vselect_safe_coding_system_function;
 374
 375 int coding_system_require_warning;
 376
 377 /* Mnemonic string for each format of end-of-line.  */
 378 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 379 /* Mnemonic string to indicate format of end-of-line is not yet
 380    decided.  */
 381 Lisp_Object eol_mnemonic_undecided;
 382
 383 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 384    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 385 int system_eol_type;
 386
 387 #ifdef emacs
 388
 389 /* Information about which coding system is safe for which chars.
 390    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 391
 392    GENERIC-LIST is a list of generic coding systems which can encode
 393    any characters.
 394
 395    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 396    corresponding char table that contains safe chars.  */
 397 Lisp_Object Vcoding_system_safe_chars;
 398
 399 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 400
 401 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 402
 403 /* Coding system emacs-mule and raw-text are for converting only
 404    end-of-line format.  */
 405 Lisp_Object Qemacs_mule, Qraw_text;
 406
 407 Lisp_Object Qutf_8;
 408
 409 /* Coding-systems are handed between Emacs Lisp programs and C internal
 410    routines by the following three variables.  */
 411 /* Coding-system for reading files and receiving data from process.  */
 412 Lisp_Object Vcoding_system_for_read;
 413 /* Coding-system for writing files and sending data to process.  */
 414 Lisp_Object Vcoding_system_for_write;
 415 /* Coding-system actually used in the latest I/O.  */
 416 Lisp_Object Vlast_coding_system_used;
 417
 418 /* A vector of length 256 which contains information about special
 419    Latin codes (especially for dealing with Microsoft codes).  */
 420 Lisp_Object Vlatin_extra_code_table;
 421
 422 /* Flag to inhibit code conversion of end-of-line format.  */
 423 int inhibit_eol_conversion;
 424
 425 /* Flag to inhibit ISO2022 escape sequence detection.  */
 426 int inhibit_iso_escape_detection;
 427
 428 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 429 int inherit_process_coding_system;
 430
 431 /* Coding system to be used to encode text for terminal display.  */
 432 struct coding_system terminal_coding;
 433
 434 /* Coding system to be used to encode text for terminal display when
 435    terminal coding system is nil.  */
 436 struct coding_system safe_terminal_coding;
 437
 438 /* Coding system of what is sent from terminal keyboard.  */
 439 struct coding_system keyboard_coding;
 440
 441 /* Default coding system to be used to write a file.  */
 442 struct coding_system default_buffer_file_coding;
 443
 444 Lisp_Object Vfile_coding_system_alist;
 445 Lisp_Object Vprocess_coding_system_alist;
 446 Lisp_Object Vnetwork_coding_system_alist;
 447
 448 Lisp_Object Vlocale_coding_system;
 449
 450 #endif /* emacs */
 451
 452 Lisp_Object Qcoding_category, Qcoding_category_index;
 453
 454 /* List of symbols `coding-category-xxx' ordered by priority.  */
 455 Lisp_Object Vcoding_category_list;
 456
 457 /* Table of coding categories (Lisp symbols).  */
 458 Lisp_Object Vcoding_category_table;
 459
 460 /* Table of names of symbol for each coding-category.  */
 461 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 462   "coding-category-emacs-mule",
 463   "coding-category-sjis",
 464   "coding-category-iso-7",
 465   "coding-category-iso-7-tight",
 466   "coding-category-iso-8-1",
 467   "coding-category-iso-8-2",
 468   "coding-category-iso-7-else",
 469   "coding-category-iso-8-else",
 470   "coding-category-ccl",
 471   "coding-category-big5",
 472   "coding-category-utf-8",
 473   "coding-category-utf-16-be",
 474   "coding-category-utf-16-le",
 475   "coding-category-raw-text",
 476   "coding-category-binary"
 477 };
 478
 479 /* Table of pointers to coding systems corresponding to each coding
 480    categories.  */
 481 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 482
 483 /* Table of coding category masks.  Nth element is a mask for a coding
 484    category of which priority is Nth.  */
 485 static
 486 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 487
 488 /* Flag to tell if we look up translation table on character code
 489    conversion.  */
 490 Lisp_Object Venable_character_translation;
 491 /* Standard translation table to look up on decoding (reading).  */
 492 Lisp_Object Vstandard_translation_table_for_decode;
 493 /* Standard translation table to look up on encoding (writing).  */
 494 Lisp_Object Vstandard_translation_table_for_encode;
 495
 496 Lisp_Object Qtranslation_table;
 497 Lisp_Object Qtranslation_table_id;
 498 Lisp_Object Qtranslation_table_for_decode;
 499 Lisp_Object Qtranslation_table_for_encode;
 500
 501 /* Alist of charsets vs revision number.  */
 502 Lisp_Object Vcharset_revision_alist;
 503
 504 /* Default coding systems used for process I/O.  */
 505 Lisp_Object Vdefault_process_coding_system;
 506
 507 /* Char table for translating Quail and self-inserting input.  */
 508 Lisp_Object Vtranslation_table_for_input;
 509
 510 /* Global flag to tell that we can't call post-read-conversion and
 511    pre-write-conversion functions.  Usually the value is zero, but it
 512    is set to 1 temporarily while such functions are running.  This is
 513    to avoid infinite recursive call.  */
 514 static int inhibit_pre_post_conversion;
 515
 516 Lisp_Object Qchar_coding_system;
 517
 518 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 519    its validity.  */
 520
 521 Lisp_Object
 522 coding_safe_chars (coding_system)
 523      Lisp_Object coding_system;
 524 {
 525   Lisp_Object coding_spec, plist, safe_chars;
 526
 527   coding_spec = Fget (coding_system, Qcoding_system);
 528   plist = XVECTOR (coding_spec)->contents[3];
 529   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 530   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 531 }
 532
 533 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 534   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 535
 536 \f
 537 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 538
 539 /* Emacs' internal format for representation of multiple character
 540    sets is a kind of multi-byte encoding, i.e. characters are
 541    represented by variable-length sequences of one-byte codes.
 542
 543    ASCII characters and control characters (e.g. `tab', `newline') are
 544    represented by one-byte sequences which are their ASCII codes, in
 545    the range 0x00 through 0x7F.
 546
 547    8-bit characters of the range 0x80..0x9F are represented by
 548    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 549    code + 0x20).
 550
 551    8-bit characters of the range 0xA0..0xFF are represented by
 552    one-byte sequences which are their 8-bit code.
 553
 554    The other characters are represented by a sequence of `base
 555    leading-code', optional `extended leading-code', and one or two
 556    `position-code's.  The length of the sequence is determined by the
 557    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 558    whereas extended leading-code and position-code take the range 0xA0
 559    through 0xFF.  See `charset.h' for more details about leading-code
 560    and position-code.
 561
 562    --- CODE RANGE of Emacs' internal format ---
 563    character set        range
 564    -------------        -----
 565    ascii                0x00..0x7F
 566    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 567    eight-bit-graphic    0xA0..0xBF
 568    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 569    ---------------------------------------------
 570
 571    As this is the internal character representation, the format is
 572    usually not used externally (i.e. in a file or in a data sent to a
 573    process).  But, it is possible to have a text externally in this
 574    format (i.e. by encoding by the coding system `emacs-mule').
 575
 576    In that case, a sequence of one-byte codes has a slightly different
 577    form.
 578
 579    Firstly, all characters in eight-bit-control are represented by
 580    one-byte sequences which are their 8-bit code.
 581
 582    Next, character composition data are represented by the byte
 583    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 584    where,
 585         METHOD is 0xF0 plus one of composition method (enum
 586         composition_method),
 587
 588         BYTES is 0xA0 plus the byte length of these composition data,
 589
 590         CHARS is 0xA0 plus the number of characters composed by these
 591         data,
 592
 593         COMPONENTs are characters of multibyte form or composition
 594         rules encoded by two-byte of ASCII codes.
 595
 596    In addition, for backward compatibility, the following formats are
 597    also recognized as composition data on decoding.
 598
 599    0x80 MSEQ ...
 600    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 601
 602    Here,
 603         MSEQ is a multibyte form but in these special format:
 604           ASCII: 0xA0 ASCII_CODE+0x80,
 605           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 606         RULE is a one byte code of the range 0xA0..0xF0 that
 607         represents a composition rule.
 608   */
 609
 610 enum emacs_code_class_type emacs_code_class[256];
 611
 612 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 613    Check if a text is encoded in Emacs' internal format.  If it is,
 614    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 615
 616 static int
 617 detect_coding_emacs_mule (src, src_end, multibytep)
 618       unsigned char *src, *src_end;
 619       int multibytep;
 620 {
 621   unsigned char c;
 622   int composing = 0;
 623   /* Dummy for ONE_MORE_BYTE.  */
 624   struct coding_system dummy_coding;
 625   struct coding_system *coding = &dummy_coding;
 626
 627   while (1)
 628     {
 629       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 630
 631       if (composing)
 632         {
 633           if (c < 0xA0)
 634             composing = 0;
 635           else if (c == 0xA0)
 636             {
 637               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 638               c &= 0x7F;
 639             }
 640           else
 641             c -= 0x20;
 642         }
 643
 644       if (c < 0x20)
 645         {
 646           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 647             return 0;
 648         }
 649       else if (c >= 0x80 && c < 0xA0)
 650         {
 651           if (c == 0x80)
 652             /* Old leading code for a composite character.  */
 653             composing = 1;
 654           else
 655             {
 656               unsigned char *src_base = src - 1;
 657               int bytes;
 658
 659               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 660                                                bytes))
 661                 return 0;
 662               src = src_base + bytes;
 663             }
 664         }
 665     }
 666  label_end_of_loop:
 667   return CODING_CATEGORY_MASK_EMACS_MULE;
 668 }
 669
 670
 671 /* Record the starting position START and METHOD of one composition.  */
 672
 673 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 674   do {                                                          \
 675     struct composition_data *cmp_data = coding->cmp_data;       \
 676     int *data = cmp_data->data + cmp_data->used;                \
 677     coding->cmp_data_start = cmp_data->used;                    \
 678     data[0] = -1;                                               \
 679     data[1] = cmp_data->char_offset + start;                    \
 680     data[3] = (int) method;                                     \
 681     cmp_data->used += 4;                                        \
 682   } while (0)
 683
 684 /* Record the ending position END of the current composition.  */
 685
 686 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 687   do {                                                          \
 688     struct composition_data *cmp_data = coding->cmp_data;       \
 689     int *data = cmp_data->data + coding->cmp_data_start;        \
 690     data[0] = cmp_data->used - coding->cmp_data_start;          \
 691     data[2] = cmp_data->char_offset + end;                      \
 692   } while (0)
 693
 694 /* Record one COMPONENT (alternate character or composition rule).  */
 695
 696 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 697   do {                                                                  \
 698     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 699     if (coding->cmp_data->used - coding->cmp_data_start                 \
 700         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 701       {                                                                 \
 702         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 703         coding->composing = COMPOSITION_NO;                             \
 704       }                                                                 \
 705   } while (0)
 706
 707
 708 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 709    is not less than SRC_END, return -1 without incrementing Src.  */
 710
 711 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 712
 713
 714 /* Decode a character represented as a component of composition
 715    sequence of Emacs 20 style at SRC.  Set C to that character, store
 716    its multibyte form sequence at P, and set P to the end of that
 717    sequence.  If no valid character is found, set C to -1.  */
 718
 719 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 720   do {                                                          \
 721     int bytes;                                                  \
 722                                                                 \
 723     c = SAFE_ONE_MORE_BYTE ();                                  \
 724     if (c < 0)                                                  \
 725       break;                                                    \
 726     if (CHAR_HEAD_P (c))                                        \
 727       c = -1;                                                   \
 728     else if (c == 0xA0)                                         \
 729       {                                                         \
 730         c = SAFE_ONE_MORE_BYTE ();                              \
 731         if (c < 0xA0)                                           \
 732           c = -1;                                               \
 733         else                                                    \
 734           {                                                     \
 735             c -= 0xA0;                                          \
 736             *p++ = c;                                           \
 737           }                                                     \
 738       }                                                         \
 739     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 740       {                                                         \
 741         unsigned char *p0 = p;                                  \
 742                                                                 \
 743         c -= 0x20;                                              \
 744         *p++ = c;                                               \
 745         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 746         while (--bytes)                                         \
 747           {                                                     \
 748             c = SAFE_ONE_MORE_BYTE ();                          \
 749             if (c < 0)                                          \
 750               break;                                            \
 751             *p++ = c;                                           \
 752           }                                                     \
 753         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 754             || (coding->flags /* We are recovering a file.  */  \
 755                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 756                 && ! CHAR_HEAD_P (p0[1])))                      \
 757           c = STRING_CHAR (p0, bytes);                          \
 758         else                                                    \
 759           c = -1;                                               \
 760       }                                                         \
 761     else                                                        \
 762       c = -1;                                                   \
 763   } while (0)
 764
 765
 766 /* Decode a composition rule represented as a component of composition
 767    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 768    valid rule is found, set C to -1.  */
 769
 770 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 771   do {                                                  \
 772     c = SAFE_ONE_MORE_BYTE ();                          \
 773     c -= 0xA0;                                          \
 774     if (c < 0 || c >= 81)                               \
 775       c = -1;                                           \
 776     else                                                \
 777       {                                                 \
 778         gref = c / 9, nref = c % 9;                     \
 779         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 780       }                                                 \
 781   } while (0)
 782
 783
 784 /* Decode composition sequence encoded by `emacs-mule' at the source
 785    pointed by SRC.  SRC_END is the end of source.  Store information
 786    of the composition in CODING->cmp_data.
 787
 788    For backward compatibility, decode also a composition sequence of
 789    Emacs 20 style.  In that case, the composition sequence contains
 790    characters that should be extracted into a buffer or string.  Store
 791    those characters at *DESTINATION in multibyte form.
 792
 793    If we encounter an invalid byte sequence, return 0.
 794    If we encounter an insufficient source or destination, or
 795    insufficient space in CODING->cmp_data, return 1.
 796    Otherwise, return consumed bytes in the source.
 797
 798 */
 799 static INLINE int
 800 decode_composition_emacs_mule (coding, src, src_end,
 801                                destination, dst_end, dst_bytes)
 802      struct coding_system *coding;
 803      unsigned char *src, *src_end, **destination, *dst_end;
 804      int dst_bytes;
 805 {
 806   unsigned char *dst = *destination;
 807   int method, data_len, nchars;
 808   unsigned char *src_base = src++;
 809   /* Store components of composition.  */
 810   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 811   int ncomponent;
 812   /* Store multibyte form of characters to be composed.  This is for
 813      Emacs 20 style composition sequence.  */
 814   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 815   unsigned char *bufp = buf;
 816   int c, i, gref, nref;
 817
 818   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 819       >= COMPOSITION_DATA_SIZE)
 820     {
 821       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 822       return -1;
 823     }
 824
 825   ONE_MORE_BYTE (c);
 826   if (c - 0xF0 >= COMPOSITION_RELATIVE
 827            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 828     {
 829       int with_rule;
 830
 831       method = c - 0xF0;
 832       with_rule = (method == COMPOSITION_WITH_RULE
 833                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 834       ONE_MORE_BYTE (c);
 835       data_len = c - 0xA0;
 836       if (data_len < 4
 837           || src_base + data_len > src_end)
 838         return 0;
 839       ONE_MORE_BYTE (c);
 840       nchars = c - 0xA0;
 841       if (c < 1)
 842         return 0;
 843       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 844         {
 845           /* If it is longer than this, it can't be valid.  */
 846           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 847             return 0;
 848
 849           if (ncomponent % 2 && with_rule)
 850             {
 851               ONE_MORE_BYTE (gref);
 852               gref -= 32;
 853               ONE_MORE_BYTE (nref);
 854               nref -= 32;
 855               c = COMPOSITION_ENCODE_RULE (gref, nref);
 856             }
 857           else
 858             {
 859               int bytes;
 860               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 861                   || (coding->flags /* We are recovering a file.  */
 862                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 863                       && ! CHAR_HEAD_P (src[1])))
 864                 c = STRING_CHAR (src, bytes);
 865               else
 866                 c = *src, bytes = 1;
 867               src += bytes;
 868             }
 869           component[ncomponent] = c;
 870         }
 871     }
 872   else
 873     {
 874       /* This may be an old Emacs 20 style format.  See the comment at
 875          the section 2 of this file.  */
 876       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 877       if (src == src_end
 878           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 879         goto label_end_of_loop;
 880
 881       src_end = src;
 882       src = src_base + 1;
 883       if (c < 0xC0)
 884         {
 885           method = COMPOSITION_RELATIVE;
 886           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 887             {
 888               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889               if (c < 0)
 890                 break;
 891               component[ncomponent++] = c;
 892             }
 893           if (ncomponent < 2)
 894             return 0;
 895           nchars = ncomponent;
 896         }
 897       else if (c == 0xFF)
 898         {
 899           method = COMPOSITION_WITH_RULE;
 900           src++;
 901           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 902           if (c < 0)
 903             return 0;
 904           component[0] = c;
 905           for (ncomponent = 1;
 906                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 907             {
 908               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 909               if (c < 0)
 910                 break;
 911               component[ncomponent++] = c;
 912               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 913               if (c < 0)
 914                 break;
 915               component[ncomponent++] = c;
 916             }
 917           if (ncomponent < 3)
 918             return 0;
 919           nchars = (ncomponent + 1) / 2;
 920         }
 921       else
 922         return 0;
 923     }
 924
 925   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 926     {
 927       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 928       for (i = 0; i < ncomponent; i++)
 929         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 930       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 931       if (buf < bufp)
 932         {
 933           unsigned char *p = buf;
 934           EMIT_BYTES (p, bufp);
 935           *destination += bufp - buf;
 936           coding->produced_char += nchars;
 937         }
 938       return (src - src_base);
 939     }
 940  label_end_of_loop:
 941   return -1;
 942 }
 943
 944 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 945
 946 static void
 947 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 948      struct coding_system *coding;
 949      unsigned char *source, *destination;
 950      int src_bytes, dst_bytes;
 951 {
 952   unsigned char *src = source;
 953   unsigned char *src_end = source + src_bytes;
 954   unsigned char *dst = destination;
 955   unsigned char *dst_end = destination + dst_bytes;
 956   /* SRC_BASE remembers the start position in source in each loop.
 957      The loop will be exited when there's not enough source code, or
 958      when there's not enough destination area to produce a
 959      character.  */
 960   unsigned char *src_base;
 961
 962   coding->produced_char = 0;
 963   while ((src_base = src) < src_end)
 964     {
 965       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 966       int bytes;
 967
 968       if (*src == '\r')
 969         {
 970           int c = *src++;
 971
 972           if (coding->eol_type == CODING_EOL_CR)
 973             c = '\n';
 974           else if (coding->eol_type == CODING_EOL_CRLF)
 975             {
 976               ONE_MORE_BYTE (c);
 977               if (c != '\n')
 978                 {
 979                   src--;
 980                   c = '\r';
 981                 }
 982             }
 983           *dst++ = c;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == '\n')
 988         {
 989           if ((coding->eol_type == CODING_EOL_CR
 990                || coding->eol_type == CODING_EOL_CRLF)
 991               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 992             {
 993               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 994               goto label_end_of_loop;
 995             }
 996           *dst++ = *src++;
 997           coding->produced_char++;
 998           continue;
 999         }
1000       else if (*src == 0x80 && coding->cmp_data)
1001         {
1002           /* Start of composition data.  */
1003           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
1004                                                          &dst, dst_end,
1005                                                          dst_bytes);
1006           if (consumed < 0)
1007             goto label_end_of_loop;
1008           else if (consumed > 0)
1009             {
1010               src += consumed;
1011               continue;
1012             }
1013           bytes = CHAR_STRING (*src, tmp);
1014           p = tmp;
1015           src++;
1016         }
1017       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1018                || (coding->flags /* We are recovering a file.  */
1019                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1020                    && ! CHAR_HEAD_P (src[1])))
1021         {
1022           p = src;
1023           src += bytes;
1024         }
1025       else
1026         {
1027           bytes = CHAR_STRING (*src, tmp);
1028           p = tmp;
1029           src++;
1030         }
1031       if (dst + bytes >= (dst_bytes ? dst_end : src))
1032         {
1033           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1034           break;
1035         }
1036       while (bytes--) *dst++ = *p++;
1037       coding->produced_char++;
1038     }
1039  label_end_of_loop:
1040   coding->consumed = coding->consumed_char = src_base - source;
1041   coding->produced = dst - destination;
1042 }
1043
1044
1045 /* Encode composition data stored at DATA into a special byte sequence
1046    starting by 0x80.  Update CODING->cmp_data_start and maybe
1047    CODING->cmp_data for the next call.  */
1048
1049 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1050   do {                                                                  \
1051     unsigned char buf[1024], *p0 = buf, *p;                             \
1052     int len = data[0];                                                  \
1053     int i;                                                              \
1054                                                                         \
1055     buf[0] = 0x80;                                                      \
1056     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1057     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1058     p = buf + 4;                                                        \
1059     if (data[3] == COMPOSITION_WITH_RULE                                \
1060         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1061       {                                                                 \
1062         p += CHAR_STRING (data[4], p);                                  \
1063         for (i = 5; i < len; i += 2)                                    \
1064           {                                                             \
1065             int gref, nref;                                             \
1066              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1067             *p++ = 0x20 + gref;                                         \
1068             *p++ = 0x20 + nref;                                         \
1069             p += CHAR_STRING (data[i + 1], p);                          \
1070           }                                                             \
1071       }                                                                 \
1072     else                                                                \
1073       {                                                                 \
1074         for (i = 4; i < len; i++)                                       \
1075           p += CHAR_STRING (data[i], p);                                \
1076       }                                                                 \
1077     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1078                                                                         \
1079     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1080       {                                                                 \
1081         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1082         goto label_end_of_loop;                                         \
1083       }                                                                 \
1084     while (p0 < p)                                                      \
1085       *dst++ = *p0++;                                                   \
1086     coding->cmp_data_start += data[0];                                  \
1087     if (coding->cmp_data_start == coding->cmp_data->used                \
1088         && coding->cmp_data->next)                                      \
1089       {                                                                 \
1090         coding->cmp_data = coding->cmp_data->next;                      \
1091         coding->cmp_data_start = 0;                                     \
1092       }                                                                 \
1093   } while (0)
1094
1095
1096 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1097                             unsigned char *, int, int));
1098
1099 static void
1100 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1101      struct coding_system *coding;
1102      unsigned char *source, *destination;
1103      int src_bytes, dst_bytes;
1104 {
1105   unsigned char *src = source;
1106   unsigned char *src_end = source + src_bytes;
1107   unsigned char *dst = destination;
1108   unsigned char *dst_end = destination + dst_bytes;
1109   unsigned char *src_base;
1110   int c;
1111   int char_offset;
1112   int *data;
1113
1114   Lisp_Object translation_table;
1115
1116   translation_table = Qnil;
1117
1118   /* Optimization for the case that there's no composition.  */
1119   if (!coding->cmp_data || coding->cmp_data->used == 0)
1120     {
1121       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1122       return;
1123     }
1124
1125   char_offset = coding->cmp_data->char_offset;
1126   data = coding->cmp_data->data + coding->cmp_data_start;
1127   while (1)
1128     {
1129       src_base = src;
1130
1131       /* If SRC starts a composition, encode the information about the
1132          composition in advance.  */
1133       if (coding->cmp_data_start < coding->cmp_data->used
1134           && char_offset + coding->consumed_char == data[1])
1135         {
1136           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1137           char_offset = coding->cmp_data->char_offset;
1138           data = coding->cmp_data->data + coding->cmp_data_start;
1139         }
1140
1141       ONE_MORE_CHAR (c);
1142       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1143                         || coding->eol_type == CODING_EOL_CR))
1144         {
1145           if (coding->eol_type == CODING_EOL_CRLF)
1146             EMIT_TWO_BYTES ('\r', c);
1147           else
1148             EMIT_ONE_BYTE ('\r');
1149         }
1150       else if (SINGLE_BYTE_CHAR_P (c))
1151         {
1152           if (coding->flags && ! ASCII_BYTE_P (c))
1153             {
1154               /* As we are auto saving, retain the multibyte form for
1155                  8-bit chars.  */
1156               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1157               int bytes = CHAR_STRING (c, buf);
1158
1159               if (bytes == 1)
1160                 EMIT_ONE_BYTE (buf[0]);
1161               else
1162                 EMIT_TWO_BYTES (buf[0], buf[1]);
1163             }
1164           else
1165             EMIT_ONE_BYTE (c);
1166         }
1167       else
1168         EMIT_BYTES (src_base, src);
1169       coding->consumed_char++;
1170     }
1171  label_end_of_loop:
1172   coding->consumed = src_base - source;
1173   coding->produced = coding->produced_char = dst - destination;
1174   return;
1175 }
1176
1177 \f
1178 /*** 3. ISO2022 handlers ***/
1179
1180 /* The following note describes the coding system ISO2022 briefly.
1181    Since the intention of this note is to help understand the
1182    functions in this file, some parts are NOT ACCURATE or are OVERLY
1183    SIMPLIFIED.  For thorough understanding, please refer to the
1184    original document of ISO2022.  This is equivalent to the standard
1185    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1186
1187    ISO2022 provides many mechanisms to encode several character sets
1188    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1189    is encoded using bytes less than 128.  This may make the encoded
1190    text a little bit longer, but the text passes more easily through
1191    several types of gateway, some of which strip off the MSB (Most
1192    Significant Bit).
1193
1194    There are two kinds of character sets: control character sets and
1195    graphic character sets.  The former contain control characters such
1196    as `newline' and `escape' to provide control functions (control
1197    functions are also provided by escape sequences).  The latter
1198    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1199    two control character sets and many graphic character sets.
1200
1201    Graphic character sets are classified into one of the following
1202    four classes, according to the number of bytes (DIMENSION) and
1203    number of characters in one dimension (CHARS) of the set:
1204    - DIMENSION1_CHARS94
1205    - DIMENSION1_CHARS96
1206    - DIMENSION2_CHARS94
1207    - DIMENSION2_CHARS96
1208
1209    In addition, each character set is assigned an identification tag,
1210    unique for each set, called the "final character" (denoted as <F>
1211    hereafter).  The <F> of each character set is decided by ECMA(*)
1212    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1213    (0x30..0x3F are for private use only).
1214
1215    Note (*): ECMA = European Computer Manufacturers Association
1216
1217    Here are examples of graphic character sets [NAME(<F>)]:
1218         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1219         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1220         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1221         o DIMENSION2_CHARS96 -- none for the moment
1222
1223    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1224         C0 [0x00..0x1F] -- control character plane 0
1225         GL [0x20..0x7F] -- graphic character plane 0
1226         C1 [0x80..0x9F] -- control character plane 1
1227         GR [0xA0..0xFF] -- graphic character plane 1
1228
1229    A control character set is directly designated and invoked to C0 or
1230    C1 by an escape sequence.  The most common case is that:
1231    - ISO646's  control character set is designated/invoked to C0, and
1232    - ISO6429's control character set is designated/invoked to C1,
1233    and usually these designations/invocations are omitted in encoded
1234    text.  In a 7-bit environment, only C0 can be used, and a control
1235    character for C1 is encoded by an appropriate escape sequence to
1236    fit into the environment.  All control characters for C1 are
1237    defined to have corresponding escape sequences.
1238
1239    A graphic character set is at first designated to one of four
1240    graphic registers (G0 through G3), then these graphic registers are
1241    invoked to GL or GR.  These designations and invocations can be
1242    done independently.  The most common case is that G0 is invoked to
1243    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1244    these invocations and designations are omitted in encoded text.
1245    In a 7-bit environment, only GL can be used.
1246
1247    When a graphic character set of CHARS94 is invoked to GL, codes
1248    0x20 and 0x7F of the GL area work as control characters SPACE and
1249    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1250    be used.
1251
1252    There are two ways of invocation: locking-shift and single-shift.
1253    With locking-shift, the invocation lasts until the next different
1254    invocation, whereas with single-shift, the invocation affects the
1255    following character only and doesn't affect the locking-shift
1256    state.  Invocations are done by the following control characters or
1257    escape sequences:
1258
1259    ----------------------------------------------------------------------
1260    abbrev  function                  cntrl escape seq   description
1261    ----------------------------------------------------------------------
1262    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1263    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1264    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1265    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1266    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1267    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1268    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1269    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1270    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1271    ----------------------------------------------------------------------
1272    (*) These are not used by any known coding system.
1273
1274    Control characters for these functions are defined by macros
1275    ISO_CODE_XXX in `coding.h'.
1276
1277    Designations are done by the following escape sequences:
1278    ----------------------------------------------------------------------
1279    escape sequence      description
1280    ----------------------------------------------------------------------
1281    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1282    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1283    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1284    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1285    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1286    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1287    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1288    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1289    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1290    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1291    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1292    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1293    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1294    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1295    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1296    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1297    ----------------------------------------------------------------------
1298
1299    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1300    of dimension 1, chars 94, and final character <F>, etc...
1301
1302    Note (*): Although these designations are not allowed in ISO2022,
1303    Emacs accepts them on decoding, and produces them on encoding
1304    CHARS96 character sets in a coding system which is characterized as
1305    7-bit environment, non-locking-shift, and non-single-shift.
1306
1307    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1308    '(' can be omitted.  We refer to this as "short-form" hereafter.
1309
1310    Now you may notice that there are a lot of ways of encoding the
1311    same multilingual text in ISO2022.  Actually, there exist many
1312    coding systems such as Compound Text (used in X11's inter client
1313    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1314    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1315    localized platforms), and all of these are variants of ISO2022.
1316
1317    In addition to the above, Emacs handles two more kinds of escape
1318    sequences: ISO6429's direction specification and Emacs' private
1319    sequence for specifying character composition.
1320
1321    ISO6429's direction specification takes the following form:
1322         o CSI ']'      -- end of the current direction
1323         o CSI '0' ']'  -- end of the current direction
1324         o CSI '1' ']'  -- start of left-to-right text
1325         o CSI '2' ']'  -- start of right-to-left text
1326    The control character CSI (0x9B: control sequence introducer) is
1327    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1328
1329    Character composition specification takes the following form:
1330         o ESC '0' -- start relative composition
1331         o ESC '1' -- end composition
1332         o ESC '2' -- start rule-base composition (*)
1333         o ESC '3' -- start relative composition with alternate chars  (**)
1334         o ESC '4' -- start rule-base composition with alternate chars  (**)
1335   Since these are not standard escape sequences of any ISO standard,
1336   the use of them with these meanings is restricted to Emacs only.
1337
1338   (*) This form is used only in Emacs 20.5 and older versions,
1339   but the newer versions can safely decode it.
1340   (**) This form is used only in Emacs 21.1 and newer versions,
1341   and the older versions can't decode it.
1342
1343   Here's a list of example usages of these composition escape
1344   sequences (categorized by `enum composition_method').
1345
1346   COMPOSITION_RELATIVE:
1347         ESC 0 CHAR [ CHAR ] ESC 1
1348   COMPOSITION_WITH_RULE:
1349         ESC 2 CHAR [ RULE CHAR ] ESC 1
1350   COMPOSITION_WITH_ALTCHARS:
1351         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1352   COMPOSITION_WITH_RULE_ALTCHARS:
1353         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1354
1355 enum iso_code_class_type iso_code_class[256];
1356
1357 #define CHARSET_OK(idx, charset, c)                                     \
1358   (coding_system_table[idx]                                             \
1359    && (charset == CHARSET_ASCII                                         \
1360        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1361            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1362    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1363                                               charset)                  \
1364        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1365
1366 #define SHIFT_OUT_OK(idx) \
1367   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1368
1369 #define COMPOSITION_OK(idx)     \
1370   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1371
1372 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1373    Check if a text is encoded in ISO2022.  If it is, return an
1374    integer in which appropriate flag bits any of:
1375         CODING_CATEGORY_MASK_ISO_7
1376         CODING_CATEGORY_MASK_ISO_7_TIGHT
1377         CODING_CATEGORY_MASK_ISO_8_1
1378         CODING_CATEGORY_MASK_ISO_8_2
1379         CODING_CATEGORY_MASK_ISO_7_ELSE
1380         CODING_CATEGORY_MASK_ISO_8_ELSE
1381    are set.  If a code which should never appear in ISO2022 is found,
1382    returns 0.  */
1383
1384 static int
1385 detect_coding_iso2022 (src, src_end, multibytep)
1386      unsigned char *src, *src_end;
1387      int multibytep;
1388 {
1389   int mask = CODING_CATEGORY_MASK_ISO;
1390   int mask_found = 0;
1391   int reg[4], shift_out = 0, single_shifting = 0;
1392   int c, c1, charset;
1393   /* Dummy for ONE_MORE_BYTE.  */
1394   struct coding_system dummy_coding;
1395   struct coding_system *coding = &dummy_coding;
1396   Lisp_Object safe_chars;
1397
1398   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1399   while (mask && src < src_end)
1400     {
1401       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1402     retry:
1403       switch (c)
1404         {
1405         case ISO_CODE_ESC:
1406           if (inhibit_iso_escape_detection)
1407             break;
1408           single_shifting = 0;
1409           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1410           if (c >= '(' && c <= '/')
1411             {
1412               /* Designation sequence for a charset of dimension 1.  */
1413               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1414               if (c1 < ' ' || c1 >= 0x80
1415                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1416                 /* Invalid designation sequence.  Just ignore.  */
1417                 break;
1418               reg[(c - '(') % 4] = charset;
1419             }
1420           else if (c == '$')
1421             {
1422               /* Designation sequence for a charset of dimension 2.  */
1423               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1424               if (c >= '@' && c <= 'B')
1425                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1426                 reg[0] = charset = iso_charset_table[1][0][c];
1427               else if (c >= '(' && c <= '/')
1428                 {
1429                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1430                   if (c1 < ' ' || c1 >= 0x80
1431                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1432                     /* Invalid designation sequence.  Just ignore.  */
1433                     break;
1434                   reg[(c - '(') % 4] = charset;
1435                 }
1436               else
1437                 /* Invalid designation sequence.  Just ignore.  */
1438                 break;
1439             }
1440           else if (c == 'N' || c == 'O')
1441             {
1442               /* ESC <Fe> for SS2 or SS3.  */
1443               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1444               break;
1445             }
1446           else if (c >= '0' && c <= '4')
1447             {
1448               /* ESC <Fp> for start/end composition.  */
1449               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1450                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1451               else
1452                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1453               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1454                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1455               else
1456                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1458                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1459               else
1460                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1461               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1462                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1463               else
1464                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1465               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1466                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1467               else
1468                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1469               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1470                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1471               else
1472                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1473               break;
1474             }
1475           else
1476             /* Invalid escape sequence.  Just ignore.  */
1477             break;
1478
1479           /* We found a valid designation sequence for CHARSET.  */
1480           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1481           c = MAKE_CHAR (charset, 0, 0);
1482           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1483             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1484           else
1485             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1486           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1487             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1488           else
1489             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1490           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1491             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1492           else
1493             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1494           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1495             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1496           else
1497             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1498           break;
1499
1500         case ISO_CODE_SO:
1501           if (inhibit_iso_escape_detection)
1502             break;
1503           single_shifting = 0;
1504           if (shift_out == 0
1505               && (reg[1] >= 0
1506                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1507                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1508             {
1509               /* Locking shift out.  */
1510               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1511               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1512             }
1513           break;
1514
1515         case ISO_CODE_SI:
1516           if (inhibit_iso_escape_detection)
1517             break;
1518           single_shifting = 0;
1519           if (shift_out == 1)
1520             {
1521               /* Locking shift in.  */
1522               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1523               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1524             }
1525           break;
1526
1527         case ISO_CODE_CSI:
1528           single_shifting = 0;
1529         case ISO_CODE_SS2:
1530         case ISO_CODE_SS3:
1531           {
1532             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1533
1534             if (inhibit_iso_escape_detection)
1535               break;
1536             if (c != ISO_CODE_CSI)
1537               {
1538                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1539                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1540                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1541                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1542                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1543                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1544                 single_shifting = 1;
1545               }
1546             if (VECTORP (Vlatin_extra_code_table)
1547                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1548               {
1549                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1550                     & CODING_FLAG_ISO_LATIN_EXTRA)
1551                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1552                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1553                     & CODING_FLAG_ISO_LATIN_EXTRA)
1554                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1555               }
1556             mask &= newmask;
1557             mask_found |= newmask;
1558           }
1559           break;
1560
1561         default:
1562           if (c < 0x80)
1563             {
1564               single_shifting = 0;
1565               break;
1566             }
1567           else if (c < 0xA0)
1568             {
1569               single_shifting = 0;
1570               if (VECTORP (Vlatin_extra_code_table)
1571                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1572                 {
1573                   int newmask = 0;
1574
1575                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1576                       & CODING_FLAG_ISO_LATIN_EXTRA)
1577                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1578                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1579                       & CODING_FLAG_ISO_LATIN_EXTRA)
1580                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1581                   mask &= newmask;
1582                   mask_found |= newmask;
1583                 }
1584               else
1585                 return 0;
1586             }
1587           else
1588             {
1589               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1590                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1591               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1592               /* Check the length of succeeding codes of the range
1593                  0xA0..0FF.  If the byte length is odd, we exclude
1594                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1595                  when we are not single shifting.  */
1596               if (!single_shifting
1597                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1598                 {
1599                   int i = 1;
1600
1601                   c = -1;
1602                   while (src < src_end)
1603                     {
1604                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1605                       if (c < 0xA0)
1606                         break;
1607                       i++;
1608                     }
1609
1610                   if (i & 1 && src < src_end)
1611                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1612                   else
1613                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1614                   if (c >= 0)
1615                     /* This means that we have read one extra byte.  */
1616                     goto retry;
1617                 }
1618             }
1619           break;
1620         }
1621     }
1622  label_end_of_loop:
1623   return (mask & mask_found);
1624 }
1625
1626 /* Decode a character of which charset is CHARSET, the 1st position
1627    code is C1, the 2nd position code is C2, and return the decoded
1628    character code.  If the variable `translation_table' is non-nil,
1629    returned the translated code.  */
1630
1631 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1632   (NILP (translation_table)                     \
1633    ? MAKE_CHAR (charset, c1, c2)                \
1634    : translate_char (translation_table, -1, charset, c1, c2))
1635
1636 /* Set designation state into CODING.  */
1637 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1638   do {                                                                     \
1639     int charset, c;                                                        \
1640                                                                            \
1641     if (final_char < '0' || final_char >= 128)                             \
1642       goto label_invalid_code;                                             \
1643     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1644                                  make_number (chars),                      \
1645                                  make_number (final_char));                \
1646     c = MAKE_CHAR (charset, 0, 0);                                         \
1647     if (charset >= 0                                                       \
1648         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1649             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1650       {                                                                    \
1651         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1652             && reg == 0                                                    \
1653             && charset == CHARSET_ASCII)                                   \
1654           {                                                                \
1655             /* We should insert this designation sequence as is so         \
1656                that it is surely written back to a file.  */               \
1657             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1658             goto label_invalid_code;                                       \
1659           }                                                                \
1660         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1661         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1662             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1663           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1664         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1665       }                                                                    \
1666     else                                                                   \
1667       {                                                                    \
1668         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1669         goto label_invalid_code;                                           \
1670       }                                                                    \
1671   } while (0)
1672
1673 /* Allocate a memory block for storing information about compositions.
1674    The block is chained to the already allocated blocks.  */
1675
1676 void
1677 coding_allocate_composition_data (coding, char_offset)
1678      struct coding_system *coding;
1679      int char_offset;
1680 {
1681   struct composition_data *cmp_data
1682     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1683
1684   cmp_data->char_offset = char_offset;
1685   cmp_data->used = 0;
1686   cmp_data->prev = coding->cmp_data;
1687   cmp_data->next = NULL;
1688   if (coding->cmp_data)
1689     coding->cmp_data->next = cmp_data;
1690   coding->cmp_data = cmp_data;
1691   coding->cmp_data_start = 0;
1692 }
1693
1694 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1695    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1696    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1697    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1698    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1699   */
1700
1701 #define DECODE_COMPOSITION_START(c1)                                       \
1702   do {                                                                     \
1703     if (coding->composing == COMPOSITION_DISABLED)                         \
1704       {                                                                    \
1705         *dst++ = ISO_CODE_ESC;                                             \
1706         *dst++ = c1 & 0x7f;                                                \
1707         coding->produced_char += 2;                                        \
1708       }                                                                    \
1709     else if (!COMPOSING_P (coding))                                        \
1710       {                                                                    \
1711         /* This is surely the start of a composition.  We must be sure     \
1712            that coding->cmp_data has enough space to store the             \
1713            information about the composition.  If not, terminate the       \
1714            current decoding loop, allocate one more memory block for       \
1715            coding->cmp_data in the caller, then start the decoding         \
1716            loop again.  We can't allocate memory here directly because     \
1717            it may cause buffer/string relocation.  */                      \
1718         if (!coding->cmp_data                                              \
1719             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1720                 >= COMPOSITION_DATA_SIZE))                                 \
1721           {                                                                \
1722             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1723             goto label_end_of_loop;                                        \
1724           }                                                                \
1725         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1726                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1727                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1728                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1729         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1730                                       coding->composing);                  \
1731         coding->composition_rule_follows = 0;                              \
1732       }                                                                    \
1733     else                                                                   \
1734       {                                                                    \
1735         /* We are already handling a composition.  If the method is        \
1736            the following two, the codes following the current escape       \
1737            sequence are actual characters stored in a buffer.  */          \
1738         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1739             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1740           {                                                                \
1741             coding->composing = COMPOSITION_RELATIVE;                      \
1742             coding->composition_rule_follows = 0;                          \
1743           }                                                                \
1744       }                                                                    \
1745   } while (0)
1746
1747 /* Handle composition end sequence ESC 1.  */
1748
1749 #define DECODE_COMPOSITION_END(c1)                                      \
1750   do {                                                                  \
1751     if (! COMPOSING_P (coding))                                         \
1752       {                                                                 \
1753         *dst++ = ISO_CODE_ESC;                                          \
1754         *dst++ = c1;                                                    \
1755         coding->produced_char += 2;                                     \
1756       }                                                                 \
1757     else                                                                \
1758       {                                                                 \
1759         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1760         coding->composing = COMPOSITION_NO;                             \
1761       }                                                                 \
1762   } while (0)
1763
1764 /* Decode a composition rule from the byte C1 (and maybe one more byte
1765    from SRC) and store one encoded composition rule in
1766    coding->cmp_data.  */
1767
1768 #define DECODE_COMPOSITION_RULE(c1)                                     \
1769   do {                                                                  \
1770     int rule = 0;                                                       \
1771     (c1) -= 32;                                                         \
1772     if (c1 < 81)                /* old format (before ver.21) */        \
1773       {                                                                 \
1774         int gref = (c1) / 9;                                            \
1775         int nref = (c1) % 9;                                            \
1776         if (gref == 4) gref = 10;                                       \
1777         if (nref == 4) nref = 10;                                       \
1778         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1779       }                                                                 \
1780     else if (c1 < 93)           /* new format (after ver.21) */         \
1781       {                                                                 \
1782         ONE_MORE_BYTE (c2);                                             \
1783         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1784       }                                                                 \
1785     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1786     coding->composition_rule_follows = 0;                               \
1787   } while (0)
1788
1789
1790 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1791
1792 static void
1793 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1794      struct coding_system *coding;
1795      unsigned char *source, *destination;
1796      int src_bytes, dst_bytes;
1797 {
1798   unsigned char *src = source;
1799   unsigned char *src_end = source + src_bytes;
1800   unsigned char *dst = destination;
1801   unsigned char *dst_end = destination + dst_bytes;
1802   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1803   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1804   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1805   /* SRC_BASE remembers the start position in source in each loop.
1806      The loop will be exited when there's not enough source code
1807      (within macro ONE_MORE_BYTE), or when there's not enough
1808      destination area to produce a character (within macro
1809      EMIT_CHAR).  */
1810   unsigned char *src_base;
1811   int c, charset;
1812   Lisp_Object translation_table;
1813   Lisp_Object safe_chars;
1814
1815   safe_chars = coding_safe_chars (coding->symbol);
1816
1817   if (NILP (Venable_character_translation))
1818     translation_table = Qnil;
1819   else
1820     {
1821       translation_table = coding->translation_table_for_decode;
1822       if (NILP (translation_table))
1823         translation_table = Vstandard_translation_table_for_decode;
1824     }
1825
1826   coding->result = CODING_FINISH_NORMAL;
1827
1828   while (1)
1829     {
1830       int c1, c2 = 0;
1831
1832       src_base = src;
1833       ONE_MORE_BYTE (c1);
1834
1835       /* We produce no character or one character.  */
1836       switch (iso_code_class [c1])
1837         {
1838         case ISO_0x20_or_0x7F:
1839           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1840             {
1841               DECODE_COMPOSITION_RULE (c1);
1842               continue;
1843             }
1844           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1845             {
1846               /* This is SPACE or DEL.  */
1847               charset = CHARSET_ASCII;
1848               break;
1849             }
1850           /* This is a graphic character, we fall down ...  */
1851
1852         case ISO_graphic_plane_0:
1853           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1854             {
1855               DECODE_COMPOSITION_RULE (c1);
1856               continue;
1857             }
1858           charset = charset0;
1859           break;
1860
1861         case ISO_0xA0_or_0xFF:
1862           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1863               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1864             goto label_invalid_code;
1865           /* This is a graphic character, we fall down ... */
1866
1867         case ISO_graphic_plane_1:
1868           if (charset1 < 0)
1869             goto label_invalid_code;
1870           charset = charset1;
1871           break;
1872
1873         case ISO_control_0:
1874           if (COMPOSING_P (coding))
1875             DECODE_COMPOSITION_END ('1');
1876
1877           /* All ISO2022 control characters in this class have the
1878              same representation in Emacs internal format.  */
1879           if (c1 == '\n'
1880               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1881               && (coding->eol_type == CODING_EOL_CR
1882                   || coding->eol_type == CODING_EOL_CRLF))
1883             {
1884               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1885               goto label_end_of_loop;
1886             }
1887           charset = CHARSET_ASCII;
1888           break;
1889
1890         case ISO_control_1:
1891           if (COMPOSING_P (coding))
1892             DECODE_COMPOSITION_END ('1');
1893           goto label_invalid_code;
1894
1895         case ISO_carriage_return:
1896           if (COMPOSING_P (coding))
1897             DECODE_COMPOSITION_END ('1');
1898
1899           if (coding->eol_type == CODING_EOL_CR)
1900             c1 = '\n';
1901           else if (coding->eol_type == CODING_EOL_CRLF)
1902             {
1903               ONE_MORE_BYTE (c1);
1904               if (c1 != ISO_CODE_LF)
1905                 {
1906                   src--;
1907                   c1 = '\r';
1908                 }
1909             }
1910           charset = CHARSET_ASCII;
1911           break;
1912
1913         case ISO_shift_out:
1914           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1915               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1916             goto label_invalid_code;
1917           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1918           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1919           continue;
1920
1921         case ISO_shift_in:
1922           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1923             goto label_invalid_code;
1924           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1925           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1926           continue;
1927
1928         case ISO_single_shift_2_7:
1929         case ISO_single_shift_2:
1930           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1931             goto label_invalid_code;
1932           /* SS2 is handled as an escape sequence of ESC 'N' */
1933           c1 = 'N';
1934           goto label_escape_sequence;
1935
1936         case ISO_single_shift_3:
1937           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1938             goto label_invalid_code;
1939           /* SS2 is handled as an escape sequence of ESC 'O' */
1940           c1 = 'O';
1941           goto label_escape_sequence;
1942
1943         case ISO_control_sequence_introducer:
1944           /* CSI is handled as an escape sequence of ESC '[' ...  */
1945           c1 = '[';
1946           goto label_escape_sequence;
1947
1948         case ISO_escape:
1949           ONE_MORE_BYTE (c1);
1950         label_escape_sequence:
1951           /* Escape sequences handled by Emacs are invocation,
1952              designation, direction specification, and character
1953              composition specification.  */
1954           switch (c1)
1955             {
1956             case '&':           /* revision of following character set */
1957               ONE_MORE_BYTE (c1);
1958               if (!(c1 >= '@' && c1 <= '~'))
1959                 goto label_invalid_code;
1960               ONE_MORE_BYTE (c1);
1961               if (c1 != ISO_CODE_ESC)
1962                 goto label_invalid_code;
1963               ONE_MORE_BYTE (c1);
1964               goto label_escape_sequence;
1965
1966             case '$':           /* designation of 2-byte character set */
1967               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1968                 goto label_invalid_code;
1969               ONE_MORE_BYTE (c1);
1970               if (c1 >= '@' && c1 <= 'B')
1971                 {       /* designation of JISX0208.1978, GB2312.1980,
1972                            or JISX0208.1980 */
1973                   DECODE_DESIGNATION (0, 2, 94, c1);
1974                 }
1975               else if (c1 >= 0x28 && c1 <= 0x2B)
1976                 {       /* designation of DIMENSION2_CHARS94 character set */
1977                   ONE_MORE_BYTE (c2);
1978                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1979                 }
1980               else if (c1 >= 0x2C && c1 <= 0x2F)
1981                 {       /* designation of DIMENSION2_CHARS96 character set */
1982                   ONE_MORE_BYTE (c2);
1983                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1984                 }
1985               else
1986                 goto label_invalid_code;
1987               /* We must update these variables now.  */
1988               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1989               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1990               continue;
1991
1992             case 'n':           /* invocation of locking-shift-2 */
1993               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1994                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1995                 goto label_invalid_code;
1996               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1997               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1998               continue;
1999
2000             case 'o':           /* invocation of locking-shift-3 */
2001               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2002                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2003                 goto label_invalid_code;
2004               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2005               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2006               continue;
2007
2008             case 'N':           /* invocation of single-shift-2 */
2009               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2010                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2011                 goto label_invalid_code;
2012               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2013               ONE_MORE_BYTE (c1);
2014               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2015                 goto label_invalid_code;
2016               break;
2017
2018             case 'O':           /* invocation of single-shift-3 */
2019               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2020                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2021                 goto label_invalid_code;
2022               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2023               ONE_MORE_BYTE (c1);
2024               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2025                 goto label_invalid_code;
2026               break;
2027
2028             case '0': case '2': case '3': case '4': /* start composition */
2029               DECODE_COMPOSITION_START (c1);
2030               continue;
2031
2032             case '1':           /* end composition */
2033               DECODE_COMPOSITION_END (c1);
2034               continue;
2035
2036             case '[':           /* specification of direction */
2037               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2038                 goto label_invalid_code;
2039               /* For the moment, nested direction is not supported.
2040                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2041                  left-to-right, and nonzero means right-to-left.  */
2042               ONE_MORE_BYTE (c1);
2043               switch (c1)
2044                 {
2045                 case ']':       /* end of the current direction */
2046                   coding->mode &= ~CODING_MODE_DIRECTION;
2047
2048                 case '0':       /* end of the current direction */
2049                 case '1':       /* start of left-to-right direction */
2050                   ONE_MORE_BYTE (c1);
2051                   if (c1 == ']')
2052                     coding->mode &= ~CODING_MODE_DIRECTION;
2053                   else
2054                     goto label_invalid_code;
2055                   break;
2056
2057                 case '2':       /* start of right-to-left direction */
2058                   ONE_MORE_BYTE (c1);
2059                   if (c1 == ']')
2060                     coding->mode |= CODING_MODE_DIRECTION;
2061                   else
2062                     goto label_invalid_code;
2063                   break;
2064
2065                 default:
2066                   goto label_invalid_code;
2067                 }
2068               continue;
2069
2070             case '%':
2071               if (COMPOSING_P (coding))
2072                 DECODE_COMPOSITION_END ('1');
2073               ONE_MORE_BYTE (c1);
2074               if (c1 == '/')
2075                 {
2076                   /* CTEXT extended segment:
2077                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2078                      We keep these bytes as is for the moment.
2079                      They may be decoded by post-read-conversion.  */
2080                   int dim, M, L;
2081                   int size, required;
2082                   int produced_chars;
2083
2084                   ONE_MORE_BYTE (dim);
2085                   ONE_MORE_BYTE (M);
2086                   ONE_MORE_BYTE (L);
2087                   size = ((M - 128) * 128) + (L - 128);
2088                   required = 8 + size * 2;
2089                   if (dst + required > (dst_bytes ? dst_end : src))
2090                     goto label_end_of_loop;
2091                   *dst++ = ISO_CODE_ESC;
2092                   *dst++ = '%';
2093                   *dst++ = '/';
2094                   *dst++ = dim;
2095                   produced_chars = 4;
2096                   dst += CHAR_STRING (M, dst), produced_chars++;
2097                   dst += CHAR_STRING (L, dst), produced_chars++;
2098                   while (size-- > 0)
2099                     {
2100                       ONE_MORE_BYTE (c1);
2101                       dst += CHAR_STRING (c1, dst), produced_chars++;
2102                     }
2103                   coding->produced_char += produced_chars;
2104                 }
2105               else if (c1 == 'G')
2106                 {
2107                   unsigned char *d = dst;
2108                   int produced_chars;
2109
2110                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2111                      ESC % G --UTF-8-BYTES-- ESC % @
2112                      We keep these bytes as is for the moment.
2113                      They may be decoded by post-read-conversion.  */
2114                   if (d + 6 > (dst_bytes ? dst_end : src))
2115                     goto label_end_of_loop;
2116                   *d++ = ISO_CODE_ESC;
2117                   *d++ = '%';
2118                   *d++ = 'G';
2119                   produced_chars = 3;
2120                   while (d + 1 < (dst_bytes ? dst_end : src))
2121                     {
2122                       ONE_MORE_BYTE (c1);
2123                       if (c1 == ISO_CODE_ESC
2124                           && src + 1 < src_end
2125                           && src[0] == '%'
2126                           && src[1] == '@')
2127                         break;
2128                       d += CHAR_STRING (c1, d), produced_chars++;
2129                     }
2130                   if (d + 3 > (dst_bytes ? dst_end : src))
2131                     goto label_end_of_loop;
2132                   *d++ = ISO_CODE_ESC;
2133                   *d++ = '%';
2134                   *d++ = '@';
2135                   dst = d;
2136                   coding->produced_char += produced_chars + 3;
2137                 }
2138               else
2139                 goto label_invalid_code;
2140               continue;
2141
2142             default:
2143               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2144                 goto label_invalid_code;
2145               if (c1 >= 0x28 && c1 <= 0x2B)
2146                 {       /* designation of DIMENSION1_CHARS94 character set */
2147                   ONE_MORE_BYTE (c2);
2148                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2149                 }
2150               else if (c1 >= 0x2C && c1 <= 0x2F)
2151                 {       /* designation of DIMENSION1_CHARS96 character set */
2152                   ONE_MORE_BYTE (c2);
2153                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2154                 }
2155               else
2156                 goto label_invalid_code;
2157               /* We must update these variables now.  */
2158               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2159               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2160               continue;
2161             }
2162         }
2163
2164       /* Now we know CHARSET and 1st position code C1 of a character.
2165          Produce a multibyte sequence for that character while getting
2166          2nd position code C2 if necessary.  */
2167       if (CHARSET_DIMENSION (charset) == 2)
2168         {
2169           ONE_MORE_BYTE (c2);
2170           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2171             /* C2 is not in a valid range.  */
2172             goto label_invalid_code;
2173         }
2174       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2175       EMIT_CHAR (c);
2176       continue;
2177
2178     label_invalid_code:
2179       coding->errors++;
2180       if (COMPOSING_P (coding))
2181         DECODE_COMPOSITION_END ('1');
2182       src = src_base;
2183       c = *src++;
2184       EMIT_CHAR (c);
2185     }
2186
2187  label_end_of_loop:
2188   coding->consumed = coding->consumed_char = src_base - source;
2189   coding->produced = dst - destination;
2190   return;
2191 }
2192
2193
2194 /* ISO2022 encoding stuff.  */
2195
2196 /*
2197    It is not enough to say just "ISO2022" on encoding, we have to
2198    specify more details.  In Emacs, each ISO2022 coding system
2199    variant has the following specifications:
2200         1. Initial designation to G0 through G3.
2201         2. Allows short-form designation?
2202         3. ASCII should be designated to G0 before control characters?
2203         4. ASCII should be designated to G0 at end of line?
2204         5. 7-bit environment or 8-bit environment?
2205         6. Use locking-shift?
2206         7. Use Single-shift?
2207    And the following two are only for Japanese:
2208         8. Use ASCII in place of JIS0201-1976-Roman?
2209         9. Use JISX0208-1983 in place of JISX0208-1978?
2210    These specifications are encoded in `coding->flags' as flag bits
2211    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2212    details.
2213 */
2214
2215 /* Produce codes (escape sequence) for designating CHARSET to graphic
2216    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2217    '@', 'A', or 'B' and the coding system CODING allows, produce
2218    designation sequence of short-form.  */
2219
2220 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2221   do {                                                                  \
2222     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2223     char *intermediate_char_94 = "()*+";                                \
2224     char *intermediate_char_96 = ",-./";                                \
2225     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2226                                                                         \
2227     if (revision < 255)                                                 \
2228       {                                                                 \
2229         *dst++ = ISO_CODE_ESC;                                          \
2230         *dst++ = '&';                                                   \
2231         *dst++ = '@' + revision;                                        \
2232       }                                                                 \
2233     *dst++ = ISO_CODE_ESC;                                              \
2234     if (CHARSET_DIMENSION (charset) == 1)                               \
2235       {                                                                 \
2236         if (CHARSET_CHARS (charset) == 94)                              \
2237           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2238         else                                                            \
2239           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2240       }                                                                 \
2241     else                                                                \
2242       {                                                                 \
2243         *dst++ = '$';                                                   \
2244         if (CHARSET_CHARS (charset) == 94)                              \
2245           {                                                             \
2246             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2247                 || reg != 0                                             \
2248                 || final_char < '@' || final_char > 'B')                \
2249               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2250           }                                                             \
2251         else                                                            \
2252           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2253       }                                                                 \
2254     *dst++ = final_char;                                                \
2255     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2256   } while (0)
2257
2258 /* The following two macros produce codes (control character or escape
2259    sequence) for ISO2022 single-shift functions (single-shift-2 and
2260    single-shift-3).  */
2261
2262 #define ENCODE_SINGLE_SHIFT_2                           \
2263   do {                                                  \
2264     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2265       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2266     else                                                \
2267       *dst++ = ISO_CODE_SS2;                            \
2268     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2269   } while (0)
2270
2271 #define ENCODE_SINGLE_SHIFT_3                           \
2272   do {                                                  \
2273     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2274       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2275     else                                                \
2276       *dst++ = ISO_CODE_SS3;                            \
2277     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2278   } while (0)
2279
2280 /* The following four macros produce codes (control character or
2281    escape sequence) for ISO2022 locking-shift functions (shift-in,
2282    shift-out, locking-shift-2, and locking-shift-3).  */
2283
2284 #define ENCODE_SHIFT_IN                         \
2285   do {                                          \
2286     *dst++ = ISO_CODE_SI;                       \
2287     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2288   } while (0)
2289
2290 #define ENCODE_SHIFT_OUT                        \
2291   do {                                          \
2292     *dst++ = ISO_CODE_SO;                       \
2293     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2294   } while (0)
2295
2296 #define ENCODE_LOCKING_SHIFT_2                  \
2297   do {                                          \
2298     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2299     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2300   } while (0)
2301
2302 #define ENCODE_LOCKING_SHIFT_3                  \
2303   do {                                          \
2304     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2305     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2306   } while (0)
2307
2308 /* Produce codes for a DIMENSION1 character whose character set is
2309    CHARSET and whose position-code is C1.  Designation and invocation
2310    sequences are also produced in advance if necessary.  */
2311
2312 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2313   do {                                                                  \
2314     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2315       {                                                                 \
2316         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2317           *dst++ = c1 & 0x7F;                                           \
2318         else                                                            \
2319           *dst++ = c1 | 0x80;                                           \
2320         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2321         break;                                                          \
2322       }                                                                 \
2323     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2324       {                                                                 \
2325         *dst++ = c1 & 0x7F;                                             \
2326         break;                                                          \
2327       }                                                                 \
2328     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2329       {                                                                 \
2330         *dst++ = c1 | 0x80;                                             \
2331         break;                                                          \
2332       }                                                                 \
2333     else                                                                \
2334       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2335          must invoke it, or, at first, designate it to some graphic     \
2336          register.  Then repeat the loop to actually produce the        \
2337          character.  */                                                 \
2338       dst = encode_invocation_designation (charset, coding, dst);       \
2339   } while (1)
2340
2341 /* Produce codes for a DIMENSION2 character whose character set is
2342    CHARSET and whose position-codes are C1 and C2.  Designation and
2343    invocation codes are also produced in advance if necessary.  */
2344
2345 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2346   do {                                                                  \
2347     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2348       {                                                                 \
2349         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2350           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2351         else                                                            \
2352           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2353         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2354         break;                                                          \
2355       }                                                                 \
2356     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2357       {                                                                 \
2358         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2359         break;                                                          \
2360       }                                                                 \
2361     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2362       {                                                                 \
2363         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2364         break;                                                          \
2365       }                                                                 \
2366     else                                                                \
2367       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2368          must invoke it, or, at first, designate it to some graphic     \
2369          register.  Then repeat the loop to actually produce the        \
2370          character.  */                                                 \
2371       dst = encode_invocation_designation (charset, coding, dst);       \
2372   } while (1)
2373
2374 #define ENCODE_ISO_CHARACTER(c)                                 \
2375   do {                                                          \
2376     int charset, c1, c2;                                        \
2377                                                                 \
2378     SPLIT_CHAR (c, charset, c1, c2);                            \
2379     if (CHARSET_DEFINED_P (charset))                            \
2380       {                                                         \
2381         if (CHARSET_DIMENSION (charset) == 1)                   \
2382           {                                                     \
2383             if (charset == CHARSET_ASCII                        \
2384                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2385               charset = charset_latin_jisx0201;                 \
2386             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2387           }                                                     \
2388         else                                                    \
2389           {                                                     \
2390             if (charset == charset_jisx0208                     \
2391                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2392               charset = charset_jisx0208_1978;                  \
2393             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2394           }                                                     \
2395       }                                                         \
2396     else                                                        \
2397       {                                                         \
2398         *dst++ = c1;                                            \
2399         if (c2 >= 0)                                            \
2400           *dst++ = c2;                                          \
2401       }                                                         \
2402   } while (0)
2403
2404
2405 /* Instead of encoding character C, produce one or two `?'s.  */
2406
2407 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2408   do {                                                          \
2409     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2410     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2411       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2412   } while (0)
2413
2414
2415 /* Produce designation and invocation codes at a place pointed by DST
2416    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2417    Return new DST.  */
2418
2419 unsigned char *
2420 encode_invocation_designation (charset, coding, dst)
2421      int charset;
2422      struct coding_system *coding;
2423      unsigned char *dst;
2424 {
2425   int reg;                      /* graphic register number */
2426
2427   /* At first, check designations.  */
2428   for (reg = 0; reg < 4; reg++)
2429     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2430       break;
2431
2432   if (reg >= 4)
2433     {
2434       /* CHARSET is not yet designated to any graphic registers.  */
2435       /* At first check the requested designation.  */
2436       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2437       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2438         /* Since CHARSET requests no special designation, designate it
2439            to graphic register 0.  */
2440         reg = 0;
2441
2442       ENCODE_DESIGNATION (charset, reg, coding);
2443     }
2444
2445   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2446       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2447     {
2448       /* Since the graphic register REG is not invoked to any graphic
2449          planes, invoke it to graphic plane 0.  */
2450       switch (reg)
2451         {
2452         case 0:                 /* graphic register 0 */
2453           ENCODE_SHIFT_IN;
2454           break;
2455
2456         case 1:                 /* graphic register 1 */
2457           ENCODE_SHIFT_OUT;
2458           break;
2459
2460         case 2:                 /* graphic register 2 */
2461           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2462             ENCODE_SINGLE_SHIFT_2;
2463           else
2464             ENCODE_LOCKING_SHIFT_2;
2465           break;
2466
2467         case 3:                 /* graphic register 3 */
2468           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2469             ENCODE_SINGLE_SHIFT_3;
2470           else
2471             ENCODE_LOCKING_SHIFT_3;
2472           break;
2473         }
2474     }
2475
2476   return dst;
2477 }
2478
2479 /* Produce 2-byte codes for encoded composition rule RULE.  */
2480
2481 #define ENCODE_COMPOSITION_RULE(rule)           \
2482   do {                                          \
2483     int gref, nref;                             \
2484     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2485     *dst++ = 32 + 81 + gref;                    \
2486     *dst++ = 32 + nref;                         \
2487   } while (0)
2488
2489 /* Produce codes for indicating the start of a composition sequence
2490    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2491    which specify information about the composition.  See the comment
2492    in coding.h for the format of DATA.  */
2493
2494 #define ENCODE_COMPOSITION_START(coding, data)                          \
2495   do {                                                                  \
2496     coding->composing = data[3];                                        \
2497     *dst++ = ISO_CODE_ESC;                                              \
2498     if (coding->composing == COMPOSITION_RELATIVE)                      \
2499       *dst++ = '0';                                                     \
2500     else                                                                \
2501       {                                                                 \
2502         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2503                   ? '3' : '4');                                         \
2504         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2505         coding->composition_rule_follows = 0;                           \
2506       }                                                                 \
2507   } while (0)
2508
2509 /* Produce codes for indicating the end of the current composition.  */
2510
2511 #define ENCODE_COMPOSITION_END(coding, data)                    \
2512   do {                                                          \
2513     *dst++ = ISO_CODE_ESC;                                      \
2514     *dst++ = '1';                                               \
2515     coding->cmp_data_start += data[0];                          \
2516     coding->composing = COMPOSITION_NO;                         \
2517     if (coding->cmp_data_start == coding->cmp_data->used        \
2518         && coding->cmp_data->next)                              \
2519       {                                                         \
2520         coding->cmp_data = coding->cmp_data->next;              \
2521         coding->cmp_data_start = 0;                             \
2522       }                                                         \
2523   } while (0)
2524
2525 /* Produce composition start sequence ESC 0.  Here, this sequence
2526    doesn't mean the start of a new composition but means that we have
2527    just produced components (alternate chars and composition rules) of
2528    the composition and the actual text follows in SRC.  */
2529
2530 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2531   do {                                          \
2532     *dst++ = ISO_CODE_ESC;                      \
2533     *dst++ = '0';                               \
2534     coding->composing = COMPOSITION_RELATIVE;   \
2535   } while (0)
2536
2537 /* The following three macros produce codes for indicating direction
2538    of text.  */
2539 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2540   do {                                                  \
2541     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2542       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2543     else                                                \
2544       *dst++ = ISO_CODE_CSI;                            \
2545   } while (0)
2546
2547 #define ENCODE_DIRECTION_R2L    \
2548   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2549
2550 #define ENCODE_DIRECTION_L2R    \
2551   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2552
2553 /* Produce codes for designation and invocation to reset the graphic
2554    planes and registers to initial state.  */
2555 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2556   do {                                                                      \
2557     int reg;                                                                \
2558     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2559       ENCODE_SHIFT_IN;                                                      \
2560     for (reg = 0; reg < 4; reg++)                                           \
2561       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2562           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2563               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2564         ENCODE_DESIGNATION                                                  \
2565           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2566   } while (0)
2567
2568 /* Produce designation sequences of charsets in the line started from
2569    SRC to a place pointed by DST, and return updated DST.
2570
2571    If the current block ends before any end-of-line, we may fail to
2572    find all the necessary designations.  */
2573
2574 static unsigned char *
2575 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2576      struct coding_system *coding;
2577      Lisp_Object translation_table;
2578      unsigned char *src, *src_end, *dst;
2579 {
2580   int charset, c, found = 0, reg;
2581   /* Table of charsets to be designated to each graphic register.  */
2582   int r[4];
2583
2584   for (reg = 0; reg < 4; reg++)
2585     r[reg] = -1;
2586
2587   while (found < 4)
2588     {
2589       ONE_MORE_CHAR (c);
2590       if (c == '\n')
2591         break;
2592
2593       charset = CHAR_CHARSET (c);
2594       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2595       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2596         {
2597           found++;
2598           r[reg] = charset;
2599         }
2600     }
2601
2602  label_end_of_loop:
2603   if (found)
2604     {
2605       for (reg = 0; reg < 4; reg++)
2606         if (r[reg] >= 0
2607             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2608           ENCODE_DESIGNATION (r[reg], reg, coding);
2609     }
2610
2611   return dst;
2612 }
2613
2614 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2615
2616 static void
2617 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2618      struct coding_system *coding;
2619      unsigned char *source, *destination;
2620      int src_bytes, dst_bytes;
2621 {
2622   unsigned char *src = source;
2623   unsigned char *src_end = source + src_bytes;
2624   unsigned char *dst = destination;
2625   unsigned char *dst_end = destination + dst_bytes;
2626   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2627      from DST_END to assure overflow checking is necessary only at the
2628      head of loop.  */
2629   unsigned char *adjusted_dst_end = dst_end - 19;
2630   /* SRC_BASE remembers the start position in source in each loop.
2631      The loop will be exited when there's not enough source text to
2632      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2633      there's not enough destination area to produce encoded codes
2634      (within macro EMIT_BYTES).  */
2635   unsigned char *src_base;
2636   int c;
2637   Lisp_Object translation_table;
2638   Lisp_Object safe_chars;
2639
2640   if (coding->flags & CODING_FLAG_ISO_SAFE)
2641     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2642
2643   safe_chars = coding_safe_chars (coding->symbol);
2644
2645   if (NILP (Venable_character_translation))
2646     translation_table = Qnil;
2647   else
2648     {
2649       translation_table = coding->translation_table_for_encode;
2650       if (NILP (translation_table))
2651         translation_table = Vstandard_translation_table_for_encode;
2652     }
2653
2654   coding->consumed_char = 0;
2655   coding->errors = 0;
2656   while (1)
2657     {
2658       src_base = src;
2659
2660       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2661         {
2662           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2663           break;
2664         }
2665
2666       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2667           && CODING_SPEC_ISO_BOL (coding))
2668         {
2669           /* We have to produce designation sequences if any now.  */
2670           dst = encode_designation_at_bol (coding, translation_table,
2671                                            src, src_end, dst);
2672           CODING_SPEC_ISO_BOL (coding) = 0;
2673         }
2674
2675       /* Check composition start and end.  */
2676       if (coding->composing != COMPOSITION_DISABLED
2677           && coding->cmp_data_start < coding->cmp_data->used)
2678         {
2679           struct composition_data *cmp_data = coding->cmp_data;
2680           int *data = cmp_data->data + coding->cmp_data_start;
2681           int this_pos = cmp_data->char_offset + coding->consumed_char;
2682
2683           if (coding->composing == COMPOSITION_RELATIVE)
2684             {
2685               if (this_pos == data[2])
2686                 {
2687                   ENCODE_COMPOSITION_END (coding, data);
2688                   cmp_data = coding->cmp_data;
2689                   data = cmp_data->data + coding->cmp_data_start;
2690                 }
2691             }
2692           else if (COMPOSING_P (coding))
2693             {
2694               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2695               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2696                 /* We have consumed components of the composition.
2697                    What follows in SRC is the composition's base
2698                    text.  */
2699                 ENCODE_COMPOSITION_FAKE_START (coding);
2700               else
2701                 {
2702                   int c = cmp_data->data[coding->cmp_data_index++];
2703                   if (coding->composition_rule_follows)
2704                     {
2705                       ENCODE_COMPOSITION_RULE (c);
2706                       coding->composition_rule_follows = 0;
2707                     }
2708                   else
2709                     {
2710                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2711                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2712                         ENCODE_UNSAFE_CHARACTER (c);
2713                       else
2714                         ENCODE_ISO_CHARACTER (c);
2715                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2716                         coding->composition_rule_follows = 1;
2717                     }
2718                   continue;
2719                 }
2720             }
2721           if (!COMPOSING_P (coding))
2722             {
2723               if (this_pos == data[1])
2724                 {
2725                   ENCODE_COMPOSITION_START (coding, data);
2726                   continue;
2727                 }
2728             }
2729         }
2730
2731       ONE_MORE_CHAR (c);
2732
2733       /* Now encode the character C.  */
2734       if (c < 0x20 || c == 0x7F)
2735         {
2736           if (c == '\r')
2737             {
2738               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2739                 {
2740                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2741                     ENCODE_RESET_PLANE_AND_REGISTER;
2742                   *dst++ = c;
2743                   continue;
2744                 }
2745               /* fall down to treat '\r' as '\n' ...  */
2746               c = '\n';
2747             }
2748           if (c == '\n')
2749             {
2750               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2751                 ENCODE_RESET_PLANE_AND_REGISTER;
2752               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2753                 bcopy (coding->spec.iso2022.initial_designation,
2754                        coding->spec.iso2022.current_designation,
2755                        sizeof coding->spec.iso2022.initial_designation);
2756               if (coding->eol_type == CODING_EOL_LF
2757                   || coding->eol_type == CODING_EOL_UNDECIDED)
2758                 *dst++ = ISO_CODE_LF;
2759               else if (coding->eol_type == CODING_EOL_CRLF)
2760                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2761               else
2762                 *dst++ = ISO_CODE_CR;
2763               CODING_SPEC_ISO_BOL (coding) = 1;
2764             }
2765           else
2766             {
2767               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2768                 ENCODE_RESET_PLANE_AND_REGISTER;
2769               *dst++ = c;
2770             }
2771         }
2772       else if (ASCII_BYTE_P (c))
2773         ENCODE_ISO_CHARACTER (c);
2774       else if (SINGLE_BYTE_CHAR_P (c))
2775         {
2776           *dst++ = c;
2777           coding->errors++;
2778         }
2779       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2780                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2781         ENCODE_UNSAFE_CHARACTER (c);
2782       else
2783         ENCODE_ISO_CHARACTER (c);
2784
2785       coding->consumed_char++;
2786     }
2787
2788  label_end_of_loop:
2789   coding->consumed = src_base - source;
2790   coding->produced = coding->produced_char = dst - destination;
2791 }
2792
2793 \f
2794 /*** 4. SJIS and BIG5 handlers ***/
2795
2796 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2797    quite widely.  So, for the moment, Emacs supports them in the bare
2798    C code.  But, in the future, they may be supported only by CCL.  */
2799
2800 /* SJIS is a coding system encoding three character sets: ASCII, right
2801    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2802    as is.  A character of charset katakana-jisx0201 is encoded by
2803    "position-code + 0x80".  A character of charset japanese-jisx0208
2804    is encoded in 2-byte but two position-codes are divided and shifted
2805    so that it fits in the range below.
2806
2807    --- CODE RANGE of SJIS ---
2808    (character set)      (range)
2809    ASCII                0x00 .. 0x7F
2810    KATAKANA-JISX0201    0xA1 .. 0xDF
2811    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2812             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2813    -------------------------------
2814
2815 */
2816
2817 /* BIG5 is a coding system encoding two character sets: ASCII and
2818    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2819    character set and is encoded in two bytes.
2820
2821    --- CODE RANGE of BIG5 ---
2822    (character set)      (range)
2823    ASCII                0x00 .. 0x7F
2824    Big5 (1st byte)      0xA1 .. 0xFE
2825         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2826    --------------------------
2827
2828    Since the number of characters in Big5 is larger than maximum
2829    characters in Emacs' charset (96x96), it can't be handled as one
2830    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2831    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2832    contains frequently used characters and the latter contains less
2833    frequently used characters.  */
2834
2835 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2836    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2837    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2838    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2839
2840 /* Number of Big5 characters which have the same code in 1st byte.  */
2841 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2842
2843 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2844   do {                                                                  \
2845     unsigned int temp                                                   \
2846       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2847     if (b1 < 0xC9)                                                      \
2848       charset = charset_big5_1;                                         \
2849     else                                                                \
2850       {                                                                 \
2851         charset = charset_big5_2;                                       \
2852         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2853       }                                                                 \
2854     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2855     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2856   } while (0)
2857
2858 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2859   do {                                                                  \
2860     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2861     if (charset == charset_big5_2)                                      \
2862       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2863     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2864     b2 = temp % BIG5_SAME_ROW;                                          \
2865     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2866   } while (0)
2867
2868 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2869    Check if a text is encoded in SJIS.  If it is, return
2870    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2871
2872 static int
2873 detect_coding_sjis (src, src_end, multibytep)
2874      unsigned char *src, *src_end;
2875      int multibytep;
2876 {
2877   int c;
2878   /* Dummy for ONE_MORE_BYTE.  */
2879   struct coding_system dummy_coding;
2880   struct coding_system *coding = &dummy_coding;
2881
2882   while (1)
2883     {
2884       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2885       if (c < 0x80)
2886         continue;
2887       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2888         return 0;
2889       if (c <= 0x9F || c >= 0xE0)
2890         {
2891           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2892           if (c < 0x40 || c == 0x7F || c > 0xFC)
2893             return 0;
2894         }
2895     }
2896  label_end_of_loop:
2897   return CODING_CATEGORY_MASK_SJIS;
2898 }
2899
2900 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2901    Check if a text is encoded in BIG5.  If it is, return
2902    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2903
2904 static int
2905 detect_coding_big5 (src, src_end, multibytep)
2906      unsigned char *src, *src_end;
2907      int multibytep;
2908 {
2909   int c;
2910   /* Dummy for ONE_MORE_BYTE.  */
2911   struct coding_system dummy_coding;
2912   struct coding_system *coding = &dummy_coding;
2913
2914   while (1)
2915     {
2916       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2917       if (c < 0x80)
2918         continue;
2919       if (c < 0xA1 || c > 0xFE)
2920         return 0;
2921       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2922       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2923         return 0;
2924     }
2925  label_end_of_loop:
2926   return CODING_CATEGORY_MASK_BIG5;
2927 }
2928
2929 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2930    Check if a text is encoded in UTF-8.  If it is, return
2931    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2932
2933 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2934 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2935 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2936 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2937 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2938 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2939 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2940
2941 static int
2942 detect_coding_utf_8 (src, src_end, multibytep)
2943      unsigned char *src, *src_end;
2944      int multibytep;
2945 {
2946   unsigned char c;
2947   int seq_maybe_bytes;
2948   /* Dummy for ONE_MORE_BYTE.  */
2949   struct coding_system dummy_coding;
2950   struct coding_system *coding = &dummy_coding;
2951
2952   while (1)
2953     {
2954       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2955       if (UTF_8_1_OCTET_P (c))
2956         continue;
2957       else if (UTF_8_2_OCTET_LEADING_P (c))
2958         seq_maybe_bytes = 1;
2959       else if (UTF_8_3_OCTET_LEADING_P (c))
2960         seq_maybe_bytes = 2;
2961       else if (UTF_8_4_OCTET_LEADING_P (c))
2962         seq_maybe_bytes = 3;
2963       else if (UTF_8_5_OCTET_LEADING_P (c))
2964         seq_maybe_bytes = 4;
2965       else if (UTF_8_6_OCTET_LEADING_P (c))
2966         seq_maybe_bytes = 5;
2967       else
2968         return 0;
2969
2970       do
2971         {
2972           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2973           if (!UTF_8_EXTRA_OCTET_P (c))
2974             return 0;
2975           seq_maybe_bytes--;
2976         }
2977       while (seq_maybe_bytes > 0);
2978     }
2979
2980  label_end_of_loop:
2981   return CODING_CATEGORY_MASK_UTF_8;
2982 }
2983
2984 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2985    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2986    Little Endian (otherwise).  If it is, return
2987    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2988    else return 0.  */
2989
2990 #define UTF_16_INVALID_P(val)   \
2991   (((val) == 0xFFFE)            \
2992    || ((val) == 0xFFFF))
2993
2994 #define UTF_16_HIGH_SURROGATE_P(val) \
2995   (((val) & 0xD800) == 0xD800)
2996
2997 #define UTF_16_LOW_SURROGATE_P(val) \
2998   (((val) & 0xDC00) == 0xDC00)
2999
3000 static int
3001 detect_coding_utf_16 (src, src_end, multibytep)
3002      unsigned char *src, *src_end;
3003      int multibytep;
3004 {
3005   unsigned char c1, c2;
3006   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3007   struct coding_system dummy_coding;
3008   struct coding_system *coding = &dummy_coding;
3009
3010   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3011   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3012
3013   if ((c1 == 0xFF) && (c2 == 0xFE))
3014     return CODING_CATEGORY_MASK_UTF_16_LE;
3015   else if ((c1 == 0xFE) && (c2 == 0xFF))
3016     return CODING_CATEGORY_MASK_UTF_16_BE;
3017
3018  label_end_of_loop:
3019   return 0;
3020 }
3021
3022 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3023    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3024
3025 static void
3026 decode_coding_sjis_big5 (coding, source, destination,
3027                          src_bytes, dst_bytes, sjis_p)
3028      struct coding_system *coding;
3029      unsigned char *source, *destination;
3030      int src_bytes, dst_bytes;
3031      int sjis_p;
3032 {
3033   unsigned char *src = source;
3034   unsigned char *src_end = source + src_bytes;
3035   unsigned char *dst = destination;
3036   unsigned char *dst_end = destination + dst_bytes;
3037   /* SRC_BASE remembers the start position in source in each loop.
3038      The loop will be exited when there's not enough source code
3039      (within macro ONE_MORE_BYTE), or when there's not enough
3040      destination area to produce a character (within macro
3041      EMIT_CHAR).  */
3042   unsigned char *src_base;
3043   Lisp_Object translation_table;
3044
3045   if (NILP (Venable_character_translation))
3046     translation_table = Qnil;
3047   else
3048     {
3049       translation_table = coding->translation_table_for_decode;
3050       if (NILP (translation_table))
3051         translation_table = Vstandard_translation_table_for_decode;
3052     }
3053
3054   coding->produced_char = 0;
3055   while (1)
3056     {
3057       int c, charset, c1, c2 = 0;
3058
3059       src_base = src;
3060       ONE_MORE_BYTE (c1);
3061
3062       if (c1 < 0x80)
3063         {
3064           charset = CHARSET_ASCII;
3065           if (c1 < 0x20)
3066             {
3067               if (c1 == '\r')
3068                 {
3069                   if (coding->eol_type == CODING_EOL_CRLF)
3070                     {
3071                       ONE_MORE_BYTE (c2);
3072                       if (c2 == '\n')
3073                         c1 = c2;
3074                       else
3075                         /* To process C2 again, SRC is subtracted by 1.  */
3076                         src--;
3077                     }
3078                   else if (coding->eol_type == CODING_EOL_CR)
3079                     c1 = '\n';
3080                 }
3081               else if (c1 == '\n'
3082                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3083                        && (coding->eol_type == CODING_EOL_CR
3084                            || coding->eol_type == CODING_EOL_CRLF))
3085                 {
3086                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3087                   goto label_end_of_loop;
3088                 }
3089             }
3090         }
3091       else
3092         {
3093           if (sjis_p)
3094             {
3095               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3096                 goto label_invalid_code;
3097               if (c1 <= 0x9F || c1 >= 0xE0)
3098                 {
3099                   /* SJIS -> JISX0208 */
3100                   ONE_MORE_BYTE (c2);
3101                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3102                     goto label_invalid_code;
3103                   DECODE_SJIS (c1, c2, c1, c2);
3104                   charset = charset_jisx0208;
3105                 }
3106               else
3107                 /* SJIS -> JISX0201-Kana */
3108                 charset = charset_katakana_jisx0201;
3109             }
3110           else
3111             {
3112               /* BIG5 -> Big5 */
3113               if (c1 < 0xA0 || c1 > 0xFE)
3114                 goto label_invalid_code;
3115               ONE_MORE_BYTE (c2);
3116               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3117                 goto label_invalid_code;
3118               DECODE_BIG5 (c1, c2, charset, c1, c2);
3119             }
3120         }
3121
3122       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3123       EMIT_CHAR (c);
3124       continue;
3125
3126     label_invalid_code:
3127       coding->errors++;
3128       src = src_base;
3129       c = *src++;
3130       EMIT_CHAR (c);
3131     }
3132
3133  label_end_of_loop:
3134   coding->consumed = coding->consumed_char = src_base - source;
3135   coding->produced = dst - destination;
3136   return;
3137 }
3138
3139 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3140    This function can encode charsets `ascii', `katakana-jisx0201',
3141    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3142    are sure that all these charsets are registered as official charset
3143    (i.e. do not have extended leading-codes).  Characters of other
3144    charsets are produced without any encoding.  If SJIS_P is 1, encode
3145    SJIS text, else encode BIG5 text.  */
3146
3147 static void
3148 encode_coding_sjis_big5 (coding, source, destination,
3149                          src_bytes, dst_bytes, sjis_p)
3150      struct coding_system *coding;
3151      unsigned char *source, *destination;
3152      int src_bytes, dst_bytes;
3153      int sjis_p;
3154 {
3155   unsigned char *src = source;
3156   unsigned char *src_end = source + src_bytes;
3157   unsigned char *dst = destination;
3158   unsigned char *dst_end = destination + dst_bytes;
3159   /* SRC_BASE remembers the start position in source in each loop.
3160      The loop will be exited when there's not enough source text to
3161      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3162      there's not enough destination area to produce encoded codes
3163      (within macro EMIT_BYTES).  */
3164   unsigned char *src_base;
3165   Lisp_Object translation_table;
3166
3167   if (NILP (Venable_character_translation))
3168     translation_table = Qnil;
3169   else
3170     {
3171       translation_table = coding->translation_table_for_encode;
3172       if (NILP (translation_table))
3173         translation_table = Vstandard_translation_table_for_encode;
3174     }
3175
3176   while (1)
3177     {
3178       int c, charset, c1, c2;
3179
3180       src_base = src;
3181       ONE_MORE_CHAR (c);
3182
3183       /* Now encode the character C.  */
3184       if (SINGLE_BYTE_CHAR_P (c))
3185         {
3186           switch (c)
3187             {
3188             case '\r':
3189               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3190                 {
3191                   EMIT_ONE_BYTE (c);
3192                   break;
3193                 }
3194               c = '\n';
3195             case '\n':
3196               if (coding->eol_type == CODING_EOL_CRLF)
3197                 {
3198                   EMIT_TWO_BYTES ('\r', c);
3199                   break;
3200                 }
3201               else if (coding->eol_type == CODING_EOL_CR)
3202                 c = '\r';
3203             default:
3204               EMIT_ONE_BYTE (c);
3205             }
3206         }
3207       else
3208         {
3209           SPLIT_CHAR (c, charset, c1, c2);
3210           if (sjis_p)
3211             {
3212               if (charset == charset_jisx0208
3213                   || charset == charset_jisx0208_1978)
3214                 {
3215                   ENCODE_SJIS (c1, c2, c1, c2);
3216                   EMIT_TWO_BYTES (c1, c2);
3217                 }
3218               else if (charset == charset_katakana_jisx0201)
3219                 EMIT_ONE_BYTE (c1 | 0x80);
3220               else if (charset == charset_latin_jisx0201)
3221                 EMIT_ONE_BYTE (c1);
3222               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3223                 {
3224                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3225                   if (CHARSET_WIDTH (charset) > 1)
3226                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3227                 }
3228               else
3229                 /* There's no way other than producing the internal
3230                    codes as is.  */
3231                 EMIT_BYTES (src_base, src);
3232             }
3233           else
3234             {
3235               if (charset == charset_big5_1 || charset == charset_big5_2)
3236                 {
3237                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3238                   EMIT_TWO_BYTES (c1, c2);
3239                 }
3240               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3241                 {
3242                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3243                   if (CHARSET_WIDTH (charset) > 1)
3244                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3245                 }
3246               else
3247                 /* There's no way other than producing the internal
3248                    codes as is.  */
3249                 EMIT_BYTES (src_base, src);
3250             }
3251         }
3252       coding->consumed_char++;
3253     }
3254
3255  label_end_of_loop:
3256   coding->consumed = src_base - source;
3257   coding->produced = coding->produced_char = dst - destination;
3258 }
3259
3260 \f
3261 /*** 5. CCL handlers ***/
3262
3263 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3264    Check if a text is encoded in a coding system of which
3265    encoder/decoder are written in CCL program.  If it is, return
3266    CODING_CATEGORY_MASK_CCL, else return 0.  */
3267
3268 static int
3269 detect_coding_ccl (src, src_end, multibytep)
3270      unsigned char *src, *src_end;
3271      int multibytep;
3272 {
3273   unsigned char *valid;
3274   int c;
3275   /* Dummy for ONE_MORE_BYTE.  */
3276   struct coding_system dummy_coding;
3277   struct coding_system *coding = &dummy_coding;
3278
3279   /* No coding system is assigned to coding-category-ccl.  */
3280   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3281     return 0;
3282
3283   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3284   while (1)
3285     {
3286       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3287       if (! valid[c])
3288         return 0;
3289     }
3290  label_end_of_loop:
3291   return CODING_CATEGORY_MASK_CCL;
3292 }
3293
3294 \f
3295 /*** 6. End-of-line handlers ***/
3296
3297 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3298
3299 static void
3300 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3301      struct coding_system *coding;
3302      unsigned char *source, *destination;
3303      int src_bytes, dst_bytes;
3304 {
3305   unsigned char *src = source;
3306   unsigned char *dst = destination;
3307   unsigned char *src_end = src + src_bytes;
3308   unsigned char *dst_end = dst + dst_bytes;
3309   Lisp_Object translation_table;
3310   /* SRC_BASE remembers the start position in source in each loop.
3311      The loop will be exited when there's not enough source code
3312      (within macro ONE_MORE_BYTE), or when there's not enough
3313      destination area to produce a character (within macro
3314      EMIT_CHAR).  */
3315   unsigned char *src_base;
3316   int c;
3317
3318   translation_table = Qnil;
3319   switch (coding->eol_type)
3320     {
3321     case CODING_EOL_CRLF:
3322       while (1)
3323         {
3324           src_base = src;
3325           ONE_MORE_BYTE (c);
3326           if (c == '\r')
3327             {
3328               ONE_MORE_BYTE (c);
3329               if (c != '\n')
3330                 {
3331                   src--;
3332                   c = '\r';
3333                 }
3334             }
3335           else if (c == '\n'
3336                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3337             {
3338               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3339               goto label_end_of_loop;
3340             }
3341           EMIT_CHAR (c);
3342         }
3343       break;
3344
3345     case CODING_EOL_CR:
3346       while (1)
3347         {
3348           src_base = src;
3349           ONE_MORE_BYTE (c);
3350           if (c == '\n')
3351             {
3352               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3353                 {
3354                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3355                   goto label_end_of_loop;
3356                 }
3357             }
3358           else if (c == '\r')
3359             c = '\n';
3360           EMIT_CHAR (c);
3361         }
3362       break;
3363
3364     default:                    /* no need for EOL handling */
3365       while (1)
3366         {
3367           src_base = src;
3368           ONE_MORE_BYTE (c);
3369           EMIT_CHAR (c);
3370         }
3371     }
3372
3373  label_end_of_loop:
3374   coding->consumed = coding->consumed_char = src_base - source;
3375   coding->produced = dst - destination;
3376   return;
3377 }
3378
3379 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3380    format of end-of-line according to `coding->eol_type'.  It also
3381    convert multibyte form 8-bit characters to unibyte if
3382    CODING->src_multibyte is nonzero.  If `coding->mode &
3383    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3384    also means end-of-line.  */
3385
3386 static void
3387 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3388      struct coding_system *coding;
3389      const unsigned char *source;
3390      unsigned char *destination;
3391      int src_bytes, dst_bytes;
3392 {
3393   const unsigned char *src = source;
3394   unsigned char *dst = destination;
3395   const unsigned char *src_end = src + src_bytes;
3396   unsigned char *dst_end = dst + dst_bytes;
3397   Lisp_Object translation_table;
3398   /* SRC_BASE remembers the start position in source in each loop.
3399      The loop will be exited when there's not enough source text to
3400      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3401      there's not enough destination area to produce encoded codes
3402      (within macro EMIT_BYTES).  */
3403   const unsigned char *src_base;
3404   unsigned char *tmp;
3405   int c;
3406   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3407
3408   translation_table = Qnil;
3409   if (coding->src_multibyte
3410       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3411     {
3412       src_end--;
3413       src_bytes--;
3414       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3415     }
3416
3417   if (coding->eol_type == CODING_EOL_CRLF)
3418     {
3419       while (src < src_end)
3420         {
3421           src_base = src;
3422           c = *src++;
3423           if (c >= 0x20)
3424             EMIT_ONE_BYTE (c);
3425           else if (c == '\n' || (c == '\r' && selective_display))
3426             EMIT_TWO_BYTES ('\r', '\n');
3427           else
3428             EMIT_ONE_BYTE (c);
3429         }
3430       src_base = src;
3431     label_end_of_loop:
3432       ;
3433     }
3434   else
3435     {
3436       if (!dst_bytes || src_bytes <= dst_bytes)
3437         {
3438           safe_bcopy (src, dst, src_bytes);
3439           src_base = src_end;
3440           dst += src_bytes;
3441         }
3442       else
3443         {
3444           if (coding->src_multibyte
3445               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3446             dst_bytes--;
3447           safe_bcopy (src, dst, dst_bytes);
3448           src_base = src + dst_bytes;
3449           dst = destination + dst_bytes;
3450           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3451         }
3452       if (coding->eol_type == CODING_EOL_CR)
3453         {
3454           for (tmp = destination; tmp < dst; tmp++)
3455             if (*tmp == '\n') *tmp = '\r';
3456         }
3457       else if (selective_display)
3458         {
3459           for (tmp = destination; tmp < dst; tmp++)
3460             if (*tmp == '\r') *tmp = '\n';
3461         }
3462     }
3463   if (coding->src_multibyte)
3464     dst = destination + str_as_unibyte (destination, dst - destination);
3465
3466   coding->consumed = src_base - source;
3467   coding->produced = dst - destination;
3468   coding->produced_char = coding->produced;
3469 }
3470
3471 \f
3472 /*** 7. C library functions ***/
3473
3474 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3475    has a property `coding-system'.  The value of this property is a
3476    vector of length 5 (called the coding-vector).  Among elements of
3477    this vector, the first (element[0]) and the fifth (element[4])
3478    carry important information for decoding/encoding.  Before
3479    decoding/encoding, this information should be set in fields of a
3480    structure of type `coding_system'.
3481
3482    The value of the property `coding-system' can be a symbol of another
3483    subsidiary coding-system.  In that case, Emacs gets coding-vector
3484    from that symbol.
3485
3486    `element[0]' contains information to be set in `coding->type'.  The
3487    value and its meaning is as follows:
3488
3489    0 -- coding_type_emacs_mule
3490    1 -- coding_type_sjis
3491    2 -- coding_type_iso2022
3492    3 -- coding_type_big5
3493    4 -- coding_type_ccl encoder/decoder written in CCL
3494    nil -- coding_type_no_conversion
3495    t -- coding_type_undecided (automatic conversion on decoding,
3496                                no-conversion on encoding)
3497
3498    `element[4]' contains information to be set in `coding->flags' and
3499    `coding->spec'.  The meaning varies by `coding->type'.
3500
3501    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3502    of length 32 (of which the first 13 sub-elements are used now).
3503    Meanings of these sub-elements are:
3504
3505    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3506         If the value is an integer of valid charset, the charset is
3507         assumed to be designated to graphic register N initially.
3508
3509         If the value is minus, it is a minus value of charset which
3510         reserves graphic register N, which means that the charset is
3511         not designated initially but should be designated to graphic
3512         register N just before encoding a character in that charset.
3513
3514         If the value is nil, graphic register N is never used on
3515         encoding.
3516
3517    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3518         Each value takes t or nil.  See the section ISO2022 of
3519         `coding.h' for more information.
3520
3521    If `coding->type' is `coding_type_big5', element[4] is t to denote
3522    BIG5-ETen or nil to denote BIG5-HKU.
3523
3524    If `coding->type' takes the other value, element[4] is ignored.
3525
3526    Emacs Lisp's coding systems also carry information about format of
3527    end-of-line in a value of property `eol-type'.  If the value is
3528    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3529    means CODING_EOL_CR.  If it is not integer, it should be a vector
3530    of subsidiary coding systems of which property `eol-type' has one
3531    of the above values.
3532
3533 */
3534
3535 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3536    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3537    is setup so that no conversion is necessary and return -1, else
3538    return 0.  */
3539
3540 int
3541 setup_coding_system (coding_system, coding)
3542      Lisp_Object coding_system;
3543      struct coding_system *coding;
3544 {
3545   Lisp_Object coding_spec, coding_type, eol_type, plist;
3546   Lisp_Object val;
3547
3548   /* At first, zero clear all members.  */
3549   bzero (coding, sizeof (struct coding_system));
3550
3551   /* Initialize some fields required for all kinds of coding systems.  */
3552   coding->symbol = coding_system;
3553   coding->heading_ascii = -1;
3554   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3555   coding->composing = COMPOSITION_DISABLED;
3556   coding->cmp_data = NULL;
3557
3558   if (NILP (coding_system))
3559     goto label_invalid_coding_system;
3560
3561   coding_spec = Fget (coding_system, Qcoding_system);
3562
3563   if (!VECTORP (coding_spec)
3564       || XVECTOR (coding_spec)->size != 5
3565       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3566     goto label_invalid_coding_system;
3567
3568   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3569   if (VECTORP (eol_type))
3570     {
3571       coding->eol_type = CODING_EOL_UNDECIDED;
3572       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3573     }
3574   else if (XFASTINT (eol_type) == 1)
3575     {
3576       coding->eol_type = CODING_EOL_CRLF;
3577       coding->common_flags
3578         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3579     }
3580   else if (XFASTINT (eol_type) == 2)
3581     {
3582       coding->eol_type = CODING_EOL_CR;
3583       coding->common_flags
3584         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3585     }
3586   else
3587     coding->eol_type = CODING_EOL_LF;
3588
3589   coding_type = XVECTOR (coding_spec)->contents[0];
3590   /* Try short cut.  */
3591   if (SYMBOLP (coding_type))
3592     {
3593       if (EQ (coding_type, Qt))
3594         {
3595           coding->type = coding_type_undecided;
3596           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3597         }
3598       else
3599         coding->type = coding_type_no_conversion;
3600       /* Initialize this member.  Any thing other than
3601          CODING_CATEGORY_IDX_UTF_16_BE and
3602          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3603          special treatment in detect_eol.  */
3604       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3605
3606       return 0;
3607     }
3608
3609   /* Get values of coding system properties:
3610      `post-read-conversion', `pre-write-conversion',
3611      `translation-table-for-decode', `translation-table-for-encode'.  */
3612   plist = XVECTOR (coding_spec)->contents[3];
3613   /* Pre & post conversion functions should be disabled if
3614      inhibit_eol_conversion is nonzero.  This is the case that a code
3615      conversion function is called while those functions are running.  */
3616   if (! inhibit_pre_post_conversion)
3617     {
3618       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3619       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3620     }
3621   val = Fplist_get (plist, Qtranslation_table_for_decode);
3622   if (SYMBOLP (val))
3623     val = Fget (val, Qtranslation_table_for_decode);
3624   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3625   val = Fplist_get (plist, Qtranslation_table_for_encode);
3626   if (SYMBOLP (val))
3627     val = Fget (val, Qtranslation_table_for_encode);
3628   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3629   val = Fplist_get (plist, Qcoding_category);
3630   if (!NILP (val))
3631     {
3632       val = Fget (val, Qcoding_category_index);
3633       if (INTEGERP (val))
3634         coding->category_idx = XINT (val);
3635       else
3636         goto label_invalid_coding_system;
3637     }
3638   else
3639     goto label_invalid_coding_system;
3640
3641   /* If the coding system has non-nil `composition' property, enable
3642      composition handling.  */
3643   val = Fplist_get (plist, Qcomposition);
3644   if (!NILP (val))
3645     coding->composing = COMPOSITION_NO;
3646
3647   switch (XFASTINT (coding_type))
3648     {
3649     case 0:
3650       coding->type = coding_type_emacs_mule;
3651       coding->common_flags
3652         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3653       if (!NILP (coding->post_read_conversion))
3654         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3655       if (!NILP (coding->pre_write_conversion))
3656         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3657       break;
3658
3659     case 1:
3660       coding->type = coding_type_sjis;
3661       coding->common_flags
3662         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3663       break;
3664
3665     case 2:
3666       coding->type = coding_type_iso2022;
3667       coding->common_flags
3668         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3669       {
3670         Lisp_Object val, temp;
3671         Lisp_Object *flags;
3672         int i, charset, reg_bits = 0;
3673
3674         val = XVECTOR (coding_spec)->contents[4];
3675
3676         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3677           goto label_invalid_coding_system;
3678
3679         flags = XVECTOR (val)->contents;
3680         coding->flags
3681           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3682              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3683              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3684              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3685              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3686              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3687              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3688              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3689              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3690              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3691              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3692              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3693              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3694              );
3695
3696         /* Invoke graphic register 0 to plane 0.  */
3697         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3698         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3699         CODING_SPEC_ISO_INVOCATION (coding, 1)
3700           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3701         /* Not single shifting at first.  */
3702         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3703         /* Beginning of buffer should also be regarded as bol. */
3704         CODING_SPEC_ISO_BOL (coding) = 1;
3705
3706         for (charset = 0; charset <= MAX_CHARSET; charset++)
3707           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3708         val = Vcharset_revision_alist;
3709         while (CONSP (val))
3710           {
3711             charset = get_charset_id (Fcar_safe (XCAR (val)));
3712             if (charset >= 0
3713                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3714                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3715               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3716             val = XCDR (val);
3717           }
3718
3719         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3720            FLAGS[REG] can be one of below:
3721                 integer CHARSET: CHARSET occupies register I,
3722                 t: designate nothing to REG initially, but can be used
3723                   by any charsets,
3724                 list of integer, nil, or t: designate the first
3725                   element (if integer) to REG initially, the remaining
3726                   elements (if integer) is designated to REG on request,
3727                   if an element is t, REG can be used by any charsets,
3728                 nil: REG is never used.  */
3729         for (charset = 0; charset <= MAX_CHARSET; charset++)
3730           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3731             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3732         for (i = 0; i < 4; i++)
3733           {
3734             if ((INTEGERP (flags[i])
3735                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3736                 || (charset = get_charset_id (flags[i])) >= 0)
3737               {
3738                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3739                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3740               }
3741             else if (EQ (flags[i], Qt))
3742               {
3743                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3744                 reg_bits |= 1 << i;
3745                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3746               }
3747             else if (CONSP (flags[i]))
3748               {
3749                 Lisp_Object tail;
3750                 tail = flags[i];
3751
3752                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3753                 if ((INTEGERP (XCAR (tail))
3754                      && (charset = XINT (XCAR (tail)),
3755                          CHARSET_VALID_P (charset)))
3756                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3757                   {
3758                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3759                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3760                   }
3761                 else
3762                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3763                 tail = XCDR (tail);
3764                 while (CONSP (tail))
3765                   {
3766                     if ((INTEGERP (XCAR (tail))
3767                          && (charset = XINT (XCAR (tail)),
3768                              CHARSET_VALID_P (charset)))
3769                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3770                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3771                         = i;
3772                     else if (EQ (XCAR (tail), Qt))
3773                       reg_bits |= 1 << i;
3774                     tail = XCDR (tail);
3775                   }
3776               }
3777             else
3778               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3779
3780             CODING_SPEC_ISO_DESIGNATION (coding, i)
3781               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3782           }
3783
3784         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3785           {
3786             /* REG 1 can be used only by locking shift in 7-bit env.  */
3787             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3788               reg_bits &= ~2;
3789             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3790               /* Without any shifting, only REG 0 and 1 can be used.  */
3791               reg_bits &= 3;
3792           }
3793
3794         if (reg_bits)
3795           for (charset = 0; charset <= MAX_CHARSET; charset++)
3796             {
3797               if (CHARSET_DEFINED_P (charset)
3798                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3799                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3800                 {
3801                   /* There exist some default graphic registers to be
3802                      used by CHARSET.  */
3803
3804                   /* We had better avoid designating a charset of
3805                      CHARS96 to REG 0 as far as possible.  */
3806                   if (CHARSET_CHARS (charset) == 96)
3807                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3808                       = (reg_bits & 2
3809                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3810                   else
3811                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3812                       = (reg_bits & 1
3813                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3814                 }
3815             }
3816       }
3817       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3818       coding->spec.iso2022.last_invalid_designation_register = -1;
3819       break;
3820
3821     case 3:
3822       coding->type = coding_type_big5;
3823       coding->common_flags
3824         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3825       coding->flags
3826         = (NILP (XVECTOR (coding_spec)->contents[4])
3827            ? CODING_FLAG_BIG5_HKU
3828            : CODING_FLAG_BIG5_ETEN);
3829       break;
3830
3831     case 4:
3832       coding->type = coding_type_ccl;
3833       coding->common_flags
3834         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3835       {
3836         val = XVECTOR (coding_spec)->contents[4];
3837         if (! CONSP (val)
3838             || setup_ccl_program (&(coding->spec.ccl.decoder),
3839                                   XCAR (val)) < 0
3840             || setup_ccl_program (&(coding->spec.ccl.encoder),
3841                                   XCDR (val)) < 0)
3842           goto label_invalid_coding_system;
3843
3844         bzero (coding->spec.ccl.valid_codes, 256);
3845         val = Fplist_get (plist, Qvalid_codes);
3846         if (CONSP (val))
3847           {
3848             Lisp_Object this;
3849
3850             for (; CONSP (val); val = XCDR (val))
3851               {
3852                 this = XCAR (val);
3853                 if (INTEGERP (this)
3854                     && XINT (this) >= 0 && XINT (this) < 256)
3855                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3856                 else if (CONSP (this)
3857                          && INTEGERP (XCAR (this))
3858                          && INTEGERP (XCDR (this)))
3859                   {
3860                     int start = XINT (XCAR (this));
3861                     int end = XINT (XCDR (this));
3862
3863                     if (start >= 0 && start <= end && end < 256)
3864                       while (start <= end)
3865                         coding->spec.ccl.valid_codes[start++] = 1;
3866                   }
3867               }
3868           }
3869       }
3870       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3871       coding->spec.ccl.cr_carryover = 0;
3872       coding->spec.ccl.eight_bit_carryover[0] = 0;
3873       break;
3874
3875     case 5:
3876       coding->type = coding_type_raw_text;
3877       break;
3878
3879     default:
3880       goto label_invalid_coding_system;
3881     }
3882   return 0;
3883
3884  label_invalid_coding_system:
3885   coding->type = coding_type_no_conversion;
3886   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3887   coding->common_flags = 0;
3888   coding->eol_type = CODING_EOL_LF;
3889   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3890   return -1;
3891 }
3892
3893 /* Free memory blocks allocated for storing composition information.  */
3894
3895 void
3896 coding_free_composition_data (coding)
3897      struct coding_system *coding;
3898 {
3899   struct composition_data *cmp_data = coding->cmp_data, *next;
3900
3901   if (!cmp_data)
3902     return;
3903   /* Memory blocks are chained.  At first, rewind to the first, then,
3904      free blocks one by one.  */
3905   while (cmp_data->prev)
3906     cmp_data = cmp_data->prev;
3907   while (cmp_data)
3908     {
3909       next = cmp_data->next;
3910       xfree (cmp_data);
3911       cmp_data = next;
3912     }
3913   coding->cmp_data = NULL;
3914 }
3915
3916 /* Set `char_offset' member of all memory blocks pointed by
3917    coding->cmp_data to POS.  */
3918
3919 void
3920 coding_adjust_composition_offset (coding, pos)
3921      struct coding_system *coding;
3922      int pos;
3923 {
3924   struct composition_data *cmp_data;
3925
3926   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3927     cmp_data->char_offset = pos;
3928 }
3929
3930 /* Setup raw-text or one of its subsidiaries in the structure
3931    coding_system CODING according to the already setup value eol_type
3932    in CODING.  CODING should be setup for some coding system in
3933    advance.  */
3934
3935 void
3936 setup_raw_text_coding_system (coding)
3937      struct coding_system *coding;
3938 {
3939   if (coding->type != coding_type_raw_text)
3940     {
3941       coding->symbol = Qraw_text;
3942       coding->type = coding_type_raw_text;
3943       if (coding->eol_type != CODING_EOL_UNDECIDED)
3944         {
3945           Lisp_Object subsidiaries;
3946           subsidiaries = Fget (Qraw_text, Qeol_type);
3947
3948           if (VECTORP (subsidiaries)
3949               && XVECTOR (subsidiaries)->size == 3)
3950             coding->symbol
3951               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3952         }
3953       setup_coding_system (coding->symbol, coding);
3954     }
3955   return;
3956 }
3957
3958 /* Emacs has a mechanism to automatically detect a coding system if it
3959    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3960    it's impossible to distinguish some coding systems accurately
3961    because they use the same range of codes.  So, at first, coding
3962    systems are categorized into 7, those are:
3963
3964    o coding-category-emacs-mule
3965
3966         The category for a coding system which has the same code range
3967         as Emacs' internal format.  Assigned the coding-system (Lisp
3968         symbol) `emacs-mule' by default.
3969
3970    o coding-category-sjis
3971
3972         The category for a coding system which has the same code range
3973         as SJIS.  Assigned the coding-system (Lisp
3974         symbol) `japanese-shift-jis' by default.
3975
3976    o coding-category-iso-7
3977
3978         The category for a coding system which has the same code range
3979         as ISO2022 of 7-bit environment.  This doesn't use any locking
3980         shift and single shift functions.  This can encode/decode all
3981         charsets.  Assigned the coding-system (Lisp symbol)
3982         `iso-2022-7bit' by default.
3983
3984    o coding-category-iso-7-tight
3985
3986         Same as coding-category-iso-7 except that this can
3987         encode/decode only the specified charsets.
3988
3989    o coding-category-iso-8-1
3990
3991         The category for a coding system which has the same code range
3992         as ISO2022 of 8-bit environment and graphic plane 1 used only
3993         for DIMENSION1 charset.  This doesn't use any locking shift
3994         and single shift functions.  Assigned the coding-system (Lisp
3995         symbol) `iso-latin-1' by default.
3996
3997    o coding-category-iso-8-2
3998
3999         The category for a coding system which has the same code range
4000         as ISO2022 of 8-bit environment and graphic plane 1 used only
4001         for DIMENSION2 charset.  This doesn't use any locking shift
4002         and single shift functions.  Assigned the coding-system (Lisp
4003         symbol) `japanese-iso-8bit' by default.
4004
4005    o coding-category-iso-7-else
4006
4007         The category for a coding system which has the same code range
4008         as ISO2022 of 7-bit environment but uses locking shift or
4009         single shift functions.  Assigned the coding-system (Lisp
4010         symbol) `iso-2022-7bit-lock' by default.
4011
4012    o coding-category-iso-8-else
4013
4014         The category for a coding system which has the same code range
4015         as ISO2022 of 8-bit environment but uses locking shift or
4016         single shift functions.  Assigned the coding-system (Lisp
4017         symbol) `iso-2022-8bit-ss2' by default.
4018
4019    o coding-category-big5
4020
4021         The category for a coding system which has the same code range
4022         as BIG5.  Assigned the coding-system (Lisp symbol)
4023         `cn-big5' by default.
4024
4025    o coding-category-utf-8
4026
4027         The category for a coding system which has the same code range
4028         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4029         symbol) `utf-8' by default.
4030
4031    o coding-category-utf-16-be
4032
4033         The category for a coding system in which a text has an
4034         Unicode signature (cf. Unicode Standard) in the order of BIG
4035         endian at the head.  Assigned the coding-system (Lisp symbol)
4036         `utf-16-be' by default.
4037
4038    o coding-category-utf-16-le
4039
4040         The category for a coding system in which a text has an
4041         Unicode signature (cf. Unicode Standard) in the order of
4042         LITTLE endian at the head.  Assigned the coding-system (Lisp
4043         symbol) `utf-16-le' by default.
4044
4045    o coding-category-ccl
4046
4047         The category for a coding system of which encoder/decoder is
4048         written in CCL programs.  The default value is nil, i.e., no
4049         coding system is assigned.
4050
4051    o coding-category-binary
4052
4053         The category for a coding system not categorized in any of the
4054         above.  Assigned the coding-system (Lisp symbol)
4055         `no-conversion' by default.
4056
4057    Each of them is a Lisp symbol and the value is an actual
4058    `coding-system' (this is also a Lisp symbol) assigned by a user.
4059    What Emacs does actually is to detect a category of coding system.
4060    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4061    decide a single possible category, it selects a category of the
4062    highest priority.  Priorities of categories are also specified by a
4063    user in a Lisp variable `coding-category-list'.
4064
4065 */
4066
4067 static
4068 int ascii_skip_code[256];
4069
4070 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4071    If it detects possible coding systems, return an integer in which
4072    appropriate flag bits are set.  Flag bits are defined by macros
4073    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4074    it should point the table `coding_priorities'.  In that case, only
4075    the flag bit for a coding system of the highest priority is set in
4076    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4077    range 0x80..0x9F are in multibyte form.
4078
4079    How many ASCII characters are at the head is returned as *SKIP.  */
4080
4081 static int
4082 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4083      unsigned char *source;
4084      int src_bytes, *priorities, *skip;
4085      int multibytep;
4086 {
4087   register unsigned char c;
4088   unsigned char *src = source, *src_end = source + src_bytes;
4089   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4090   int i;
4091
4092   /* At first, skip all ASCII characters and control characters except
4093      for three ISO2022 specific control characters.  */
4094   ascii_skip_code[ISO_CODE_SO] = 0;
4095   ascii_skip_code[ISO_CODE_SI] = 0;
4096   ascii_skip_code[ISO_CODE_ESC] = 0;
4097
4098  label_loop_detect_coding:
4099   while (src < src_end && ascii_skip_code[*src]) src++;
4100   *skip = src - source;
4101
4102   if (src >= src_end)
4103     /* We found nothing other than ASCII.  There's nothing to do.  */
4104     return 0;
4105
4106   c = *src;
4107   /* The text seems to be encoded in some multilingual coding system.
4108      Now, try to find in which coding system the text is encoded.  */
4109   if (c < 0x80)
4110     {
4111       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4112       /* C is an ISO2022 specific control code of C0.  */
4113       mask = detect_coding_iso2022 (src, src_end, multibytep);
4114       if (mask == 0)
4115         {
4116           /* No valid ISO2022 code follows C.  Try again.  */
4117           src++;
4118           if (c == ISO_CODE_ESC)
4119             ascii_skip_code[ISO_CODE_ESC] = 1;
4120           else
4121             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4122           goto label_loop_detect_coding;
4123         }
4124       if (priorities)
4125         {
4126           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4127             {
4128               if (mask & priorities[i])
4129                 return priorities[i];
4130             }
4131           return CODING_CATEGORY_MASK_RAW_TEXT;
4132         }
4133     }
4134   else
4135     {
4136       int try;
4137
4138       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4139         c = src[1] - 0x20;
4140
4141       if (c < 0xA0)
4142         {
4143           /* C is the first byte of SJIS character code,
4144              or a leading-code of Emacs' internal format (emacs-mule),
4145              or the first byte of UTF-16.  */
4146           try = (CODING_CATEGORY_MASK_SJIS
4147                   | CODING_CATEGORY_MASK_EMACS_MULE
4148                   | CODING_CATEGORY_MASK_UTF_16_BE
4149                   | CODING_CATEGORY_MASK_UTF_16_LE);
4150
4151           /* Or, if C is a special latin extra code,
4152              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4153              or is an ISO2022 control-sequence-introducer (CSI),
4154              we should also consider the possibility of ISO2022 codings.  */
4155           if ((VECTORP (Vlatin_extra_code_table)
4156                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4157               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4158               || (c == ISO_CODE_CSI
4159                   && (src < src_end
4160                       && (*src == ']'
4161                           || ((*src == '0' || *src == '1' || *src == '2')
4162                               && src + 1 < src_end
4163                               && src[1] == ']')))))
4164             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4165                      | CODING_CATEGORY_MASK_ISO_8BIT);
4166         }
4167       else
4168         /* C is a character of ISO2022 in graphic plane right,
4169            or a SJIS's 1-byte character code (i.e. JISX0201),
4170            or the first byte of BIG5's 2-byte code,
4171            or the first byte of UTF-8/16.  */
4172         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4173                 | CODING_CATEGORY_MASK_ISO_8BIT
4174                 | CODING_CATEGORY_MASK_SJIS
4175                 | CODING_CATEGORY_MASK_BIG5
4176                 | CODING_CATEGORY_MASK_UTF_8
4177                 | CODING_CATEGORY_MASK_UTF_16_BE
4178                 | CODING_CATEGORY_MASK_UTF_16_LE);
4179
4180       /* Or, we may have to consider the possibility of CCL.  */
4181       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4182           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4183               ->spec.ccl.valid_codes)[c])
4184         try |= CODING_CATEGORY_MASK_CCL;
4185
4186       mask = 0;
4187       utf16_examined_p = iso2022_examined_p = 0;
4188       if (priorities)
4189         {
4190           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4191             {
4192               if (!iso2022_examined_p
4193                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4194                 {
4195                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4196                   iso2022_examined_p = 1;
4197                 }
4198               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4199                 mask |= detect_coding_sjis (src, src_end, multibytep);
4200               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4201                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4202               else if (!utf16_examined_p
4203                        && (priorities[i] & try &
4204                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4205                 {
4206                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4207                   utf16_examined_p = 1;
4208                 }
4209               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4210                 mask |= detect_coding_big5 (src, src_end, multibytep);
4211               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4212                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4213               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4214                 mask |= detect_coding_ccl (src, src_end, multibytep);
4215               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4216                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4217               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4218                 mask |= CODING_CATEGORY_MASK_BINARY;
4219               if (mask & priorities[i])
4220                 return priorities[i];
4221             }
4222           return CODING_CATEGORY_MASK_RAW_TEXT;
4223         }
4224       if (try & CODING_CATEGORY_MASK_ISO)
4225         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4226       if (try & CODING_CATEGORY_MASK_SJIS)
4227         mask |= detect_coding_sjis (src, src_end, multibytep);
4228       if (try & CODING_CATEGORY_MASK_BIG5)
4229         mask |= detect_coding_big5 (src, src_end, multibytep);
4230       if (try & CODING_CATEGORY_MASK_UTF_8)
4231         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4232       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4233         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4234       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4235         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4236       if (try & CODING_CATEGORY_MASK_CCL)
4237         mask |= detect_coding_ccl (src, src_end, multibytep);
4238     }
4239   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4240 }
4241
4242 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4243    The information of the detected coding system is set in CODING.  */
4244
4245 void
4246 detect_coding (coding, src, src_bytes)
4247      struct coding_system *coding;
4248      const unsigned char *src;
4249      int src_bytes;
4250 {
4251   unsigned int idx;
4252   int skip, mask;
4253   Lisp_Object val;
4254
4255   val = Vcoding_category_list;
4256   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4257                              coding->src_multibyte);
4258   coding->heading_ascii = skip;
4259
4260   if (!mask) return;
4261
4262   /* We found a single coding system of the highest priority in MASK.  */
4263   idx = 0;
4264   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4265   if (! mask)
4266     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4267
4268   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4269
4270   if (coding->eol_type != CODING_EOL_UNDECIDED)
4271     {
4272       Lisp_Object tmp;
4273
4274       tmp = Fget (val, Qeol_type);
4275       if (VECTORP (tmp))
4276         val = XVECTOR (tmp)->contents[coding->eol_type];
4277     }
4278
4279   /* Setup this new coding system while preserving some slots.  */
4280   {
4281     int src_multibyte = coding->src_multibyte;
4282     int dst_multibyte = coding->dst_multibyte;
4283
4284     setup_coding_system (val, coding);
4285     coding->src_multibyte = src_multibyte;
4286     coding->dst_multibyte = dst_multibyte;
4287     coding->heading_ascii = skip;
4288   }
4289 }
4290
4291 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4292    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4293    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4294
4295    How many non-eol characters are at the head is returned as *SKIP.  */
4296
4297 #define MAX_EOL_CHECK_COUNT 3
4298
4299 static int
4300 detect_eol_type (source, src_bytes, skip)
4301      unsigned char *source;
4302      int src_bytes, *skip;
4303 {
4304   unsigned char *src = source, *src_end = src + src_bytes;
4305   unsigned char c;
4306   int total = 0;                /* How many end-of-lines are found so far.  */
4307   int eol_type = CODING_EOL_UNDECIDED;
4308   int this_eol_type;
4309
4310   *skip = 0;
4311
4312   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4313     {
4314       c = *src++;
4315       if (c == '\n' || c == '\r')
4316         {
4317           if (*skip == 0)
4318             *skip = src - 1 - source;
4319           total++;
4320           if (c == '\n')
4321             this_eol_type = CODING_EOL_LF;
4322           else if (src >= src_end || *src != '\n')
4323             this_eol_type = CODING_EOL_CR;
4324           else
4325             this_eol_type = CODING_EOL_CRLF, src++;
4326
4327           if (eol_type == CODING_EOL_UNDECIDED)
4328             /* This is the first end-of-line.  */
4329             eol_type = this_eol_type;
4330           else if (eol_type != this_eol_type)
4331             {
4332               /* The found type is different from what found before.  */
4333               eol_type = CODING_EOL_INCONSISTENT;
4334               break;
4335             }
4336         }
4337     }
4338
4339   if (*skip == 0)
4340     *skip = src_end - source;
4341   return eol_type;
4342 }
4343
4344 /* Like detect_eol_type, but detect EOL type in 2-octet
4345    big-endian/little-endian format for coding systems utf-16-be and
4346    utf-16-le.  */
4347
4348 static int
4349 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4350      unsigned char *source;
4351      int src_bytes, *skip, big_endian_p;
4352 {
4353   unsigned char *src = source, *src_end = src + src_bytes;
4354   unsigned int c1, c2;
4355   int total = 0;                /* How many end-of-lines are found so far.  */
4356   int eol_type = CODING_EOL_UNDECIDED;
4357   int this_eol_type;
4358   int msb, lsb;
4359
4360   if (big_endian_p)
4361     msb = 0, lsb = 1;
4362   else
4363     msb = 1, lsb = 0;
4364
4365   *skip = 0;
4366
4367   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4368     {
4369       c1 = (src[msb] << 8) | (src[lsb]);
4370       src += 2;
4371
4372       if (c1 == '\n' || c1 == '\r')
4373         {
4374           if (*skip == 0)
4375             *skip = src - 2 - source;
4376           total++;
4377           if (c1 == '\n')
4378             {
4379               this_eol_type = CODING_EOL_LF;
4380             }
4381           else
4382             {
4383               if ((src + 1) >= src_end)
4384                 {
4385                   this_eol_type = CODING_EOL_CR;
4386                 }
4387               else
4388                 {
4389                   c2 = (src[msb] << 8) | (src[lsb]);
4390                   if (c2 == '\n')
4391                     this_eol_type = CODING_EOL_CRLF, src += 2;
4392                   else
4393                     this_eol_type = CODING_EOL_CR;
4394                 }
4395             }
4396
4397           if (eol_type == CODING_EOL_UNDECIDED)
4398             /* This is the first end-of-line.  */
4399             eol_type = this_eol_type;
4400           else if (eol_type != this_eol_type)
4401             {
4402               /* The found type is different from what found before.  */
4403               eol_type = CODING_EOL_INCONSISTENT;
4404               break;
4405             }
4406         }
4407     }
4408
4409   if (*skip == 0)
4410     *skip = src_end - source;
4411   return eol_type;
4412 }
4413
4414 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4415    is encoded.  If it detects an appropriate format of end-of-line, it
4416    sets the information in *CODING.  */
4417
4418 void
4419 detect_eol (coding, src, src_bytes)
4420      struct coding_system *coding;
4421      const unsigned char *src;
4422      int src_bytes;
4423 {
4424   Lisp_Object val;
4425   int skip;
4426   int eol_type;
4427
4428   switch (coding->category_idx)
4429     {
4430     case CODING_CATEGORY_IDX_UTF_16_BE:
4431       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4432       break;
4433     case CODING_CATEGORY_IDX_UTF_16_LE:
4434       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4435       break;
4436     default:
4437       eol_type = detect_eol_type (src, src_bytes, &skip);
4438       break;
4439     }
4440
4441   if (coding->heading_ascii > skip)
4442     coding->heading_ascii = skip;
4443   else
4444     skip = coding->heading_ascii;
4445
4446   if (eol_type == CODING_EOL_UNDECIDED)
4447     return;
4448   if (eol_type == CODING_EOL_INCONSISTENT)
4449     {
4450 #if 0
4451       /* This code is suppressed until we find a better way to
4452          distinguish raw text file and binary file.  */
4453
4454       /* If we have already detected that the coding is raw-text, the
4455          coding should actually be no-conversion.  */
4456       if (coding->type == coding_type_raw_text)
4457         {
4458           setup_coding_system (Qno_conversion, coding);
4459           return;
4460         }
4461       /* Else, let's decode only text code anyway.  */
4462 #endif /* 0 */
4463       eol_type = CODING_EOL_LF;
4464     }
4465
4466   val = Fget (coding->symbol, Qeol_type);
4467   if (VECTORP (val) && XVECTOR (val)->size == 3)
4468     {
4469       int src_multibyte = coding->src_multibyte;
4470       int dst_multibyte = coding->dst_multibyte;
4471       struct composition_data *cmp_data = coding->cmp_data;
4472
4473       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4474       coding->src_multibyte = src_multibyte;
4475       coding->dst_multibyte = dst_multibyte;
4476       coding->heading_ascii = skip;
4477       coding->cmp_data = cmp_data;
4478     }
4479 }
4480
4481 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4482
4483 #define DECODING_BUFFER_MAG(coding)                     \
4484   (coding->type == coding_type_iso2022                  \
4485    ? 3                                                  \
4486    : (coding->type == coding_type_ccl                   \
4487       ? coding->spec.ccl.decoder.buf_magnification      \
4488       : 2))
4489
4490 /* Return maximum size (bytes) of a buffer enough for decoding
4491    SRC_BYTES of text encoded in CODING.  */
4492
4493 int
4494 decoding_buffer_size (coding, src_bytes)
4495      struct coding_system *coding;
4496      int src_bytes;
4497 {
4498   return (src_bytes * DECODING_BUFFER_MAG (coding)
4499           + CONVERSION_BUFFER_EXTRA_ROOM);
4500 }
4501
4502 /* Return maximum size (bytes) of a buffer enough for encoding
4503    SRC_BYTES of text to CODING.  */
4504
4505 int
4506 encoding_buffer_size (coding, src_bytes)
4507      struct coding_system *coding;
4508      int src_bytes;
4509 {
4510   int magnification;
4511
4512   if (coding->type == coding_type_ccl)
4513     {
4514       magnification = coding->spec.ccl.encoder.buf_magnification;
4515       if (coding->eol_type == CODING_EOL_CRLF)
4516         magnification *= 2;
4517     }
4518   else if (CODING_REQUIRE_ENCODING (coding))
4519     magnification = 3;
4520   else
4521     magnification = 1;
4522
4523   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4524 }
4525
4526 /* Working buffer for code conversion.  */
4527 struct conversion_buffer
4528 {
4529   int size;                     /* size of data.  */
4530   int on_stack;                 /* 1 if allocated by alloca.  */
4531   unsigned char *data;
4532 };
4533
4534 /* Don't use alloca for allocating memory space larger than this, lest
4535    we overflow their stack.  */
4536 #define MAX_ALLOCA 16*1024
4537
4538 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4539 #define allocate_conversion_buffer(buf, len)            \
4540   do {                                                  \
4541     if (len < MAX_ALLOCA)                               \
4542       {                                                 \
4543         buf.data = (unsigned char *) alloca (len);      \
4544         buf.on_stack = 1;                               \
4545       }                                                 \
4546     else                                                \
4547       {                                                 \
4548         buf.data = (unsigned char *) xmalloc (len);     \
4549         buf.on_stack = 0;                               \
4550       }                                                 \
4551     buf.size = len;                                     \
4552   } while (0)
4553
4554 /* Double the allocated memory for *BUF.  */
4555 static void
4556 extend_conversion_buffer (buf)
4557      struct conversion_buffer *buf;
4558 {
4559   if (buf->on_stack)
4560     {
4561       unsigned char *save = buf->data;
4562       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4563       bcopy (save, buf->data, buf->size);
4564       buf->on_stack = 0;
4565     }
4566   else
4567     {
4568       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4569     }
4570   buf->size *= 2;
4571 }
4572
4573 /* Free the allocated memory for BUF if it is not on stack.  */
4574 static void
4575 free_conversion_buffer (buf)
4576      struct conversion_buffer *buf;
4577 {
4578   if (!buf->on_stack)
4579     xfree (buf->data);
4580 }
4581
4582 int
4583 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4584      struct coding_system *coding;
4585      unsigned char *source, *destination;
4586      int src_bytes, dst_bytes, encodep;
4587 {
4588   struct ccl_program *ccl
4589     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4590   unsigned char *dst = destination;
4591
4592   ccl->suppress_error = coding->suppress_error;
4593   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4594   if (encodep)
4595     {
4596       /* On encoding, EOL format is converted within ccl_driver.  For
4597          that, setup proper information in the structure CCL.  */
4598       ccl->eol_type = coding->eol_type;
4599       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4600         ccl->eol_type = CODING_EOL_LF;
4601       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4602       ccl->eight_bit_control = coding->dst_multibyte;
4603     }
4604   else
4605     ccl->eight_bit_control = 1;
4606   ccl->multibyte = coding->src_multibyte;
4607   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4608     {
4609       /* Move carryover bytes to DESTINATION.  */
4610       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4611       while (*p)
4612         *dst++ = *p++;
4613       coding->spec.ccl.eight_bit_carryover[0] = 0;
4614       if (dst_bytes)
4615         dst_bytes -= dst - destination;
4616     }
4617
4618   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4619                                   &(coding->consumed))
4620                       + dst - destination);
4621
4622   if (encodep)
4623     {
4624       coding->produced_char = coding->produced;
4625       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4626     }
4627   else if (!ccl->eight_bit_control)
4628     {
4629       /* The produced bytes forms a valid multibyte sequence. */
4630       coding->produced_char
4631         = multibyte_chars_in_text (destination, coding->produced);
4632       coding->spec.ccl.eight_bit_carryover[0] = 0;
4633     }
4634   else
4635     {
4636       /* On decoding, the destination should always multibyte.  But,
4637          CCL program might have been generated an invalid multibyte
4638          sequence.  Here we make such a sequence valid as
4639          multibyte.  */
4640       int bytes
4641         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4642
4643       if ((coding->consumed < src_bytes
4644            || !ccl->last_block)
4645           && coding->produced >= 1
4646           && destination[coding->produced - 1] >= 0x80)
4647         {
4648           /* We should not convert the tailing 8-bit codes to
4649              multibyte form even if they doesn't form a valid
4650              multibyte sequence.  They may form a valid sequence in
4651              the next call.  */
4652           int carryover = 0;
4653
4654           if (destination[coding->produced - 1] < 0xA0)
4655             carryover = 1;
4656           else if (coding->produced >= 2)
4657             {
4658               if (destination[coding->produced - 2] >= 0x80)
4659                 {
4660                   if (destination[coding->produced - 2] < 0xA0)
4661                     carryover = 2;
4662                   else if (coding->produced >= 3
4663                            && destination[coding->produced - 3] >= 0x80
4664                            && destination[coding->produced - 3] < 0xA0)
4665                     carryover = 3;
4666                 }
4667             }
4668           if (carryover > 0)
4669             {
4670               BCOPY_SHORT (destination + coding->produced - carryover,
4671                            coding->spec.ccl.eight_bit_carryover,
4672                            carryover);
4673               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4674               coding->produced -= carryover;
4675             }
4676         }
4677       coding->produced = str_as_multibyte (destination, bytes,
4678                                            coding->produced,
4679                                            &(coding->produced_char));
4680     }
4681
4682   switch (ccl->status)
4683     {
4684     case CCL_STAT_SUSPEND_BY_SRC:
4685       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4686       break;
4687     case CCL_STAT_SUSPEND_BY_DST:
4688       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4689       break;
4690     case CCL_STAT_QUIT:
4691     case CCL_STAT_INVALID_CMD:
4692       coding->result = CODING_FINISH_INTERRUPT;
4693       break;
4694     default:
4695       coding->result = CODING_FINISH_NORMAL;
4696       break;
4697     }
4698   return coding->result;
4699 }
4700
4701 /* Decode EOL format of the text at PTR of BYTES length destructively
4702    according to CODING->eol_type.  This is called after the CCL
4703    program produced a decoded text at PTR.  If we do CRLF->LF
4704    conversion, update CODING->produced and CODING->produced_char.  */
4705
4706 static void
4707 decode_eol_post_ccl (coding, ptr, bytes)
4708      struct coding_system *coding;
4709      unsigned char *ptr;
4710      int bytes;
4711 {
4712   Lisp_Object val, saved_coding_symbol;
4713   unsigned char *pend = ptr + bytes;
4714   int dummy;
4715
4716   /* Remember the current coding system symbol.  We set it back when
4717      an inconsistent EOL is found so that `last-coding-system-used' is
4718      set to the coding system that doesn't specify EOL conversion.  */
4719   saved_coding_symbol = coding->symbol;
4720
4721   coding->spec.ccl.cr_carryover = 0;
4722   if (coding->eol_type == CODING_EOL_UNDECIDED)
4723     {
4724       /* Here, to avoid the call of setup_coding_system, we directly
4725          call detect_eol_type.  */
4726       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4727       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4728         coding->eol_type = CODING_EOL_LF;
4729       if (coding->eol_type != CODING_EOL_UNDECIDED)
4730         {
4731           val = Fget (coding->symbol, Qeol_type);
4732           if (VECTORP (val) && XVECTOR (val)->size == 3)
4733             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4734         }
4735       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4736     }
4737
4738   if (coding->eol_type == CODING_EOL_LF
4739       || coding->eol_type == CODING_EOL_UNDECIDED)
4740     {
4741       /* We have nothing to do.  */
4742       ptr = pend;
4743     }
4744   else if (coding->eol_type == CODING_EOL_CRLF)
4745     {
4746       unsigned char *pstart = ptr, *p = ptr;
4747
4748       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4749           && *(pend - 1) == '\r')
4750         {
4751           /* If the last character is CR, we can't handle it here
4752              because LF will be in the not-yet-decoded source text.
4753              Record that the CR is not yet processed.  */
4754           coding->spec.ccl.cr_carryover = 1;
4755           coding->produced--;
4756           coding->produced_char--;
4757           pend--;
4758         }
4759       while (ptr < pend)
4760         {
4761           if (*ptr == '\r')
4762             {
4763               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4764                 {
4765                   *p++ = '\n';
4766                   ptr += 2;
4767                 }
4768               else
4769                 {
4770                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4771                     goto undo_eol_conversion;
4772                   *p++ = *ptr++;
4773                 }
4774             }
4775           else if (*ptr == '\n'
4776                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4777             goto undo_eol_conversion;
4778           else
4779             *p++ = *ptr++;
4780           continue;
4781
4782         undo_eol_conversion:
4783           /* We have faced with inconsistent EOL format at PTR.
4784              Convert all LFs before PTR back to CRLFs.  */
4785           for (p--, ptr--; p >= pstart; p--)
4786             {
4787               if (*p == '\n')
4788                 *ptr-- = '\n', *ptr-- = '\r';
4789               else
4790                 *ptr-- = *p;
4791             }
4792           /*  If carryover is recorded, cancel it because we don't
4793               convert CRLF anymore.  */
4794           if (coding->spec.ccl.cr_carryover)
4795             {
4796               coding->spec.ccl.cr_carryover = 0;
4797               coding->produced++;
4798               coding->produced_char++;
4799               pend++;
4800             }
4801           p = ptr = pend;
4802           coding->eol_type = CODING_EOL_LF;
4803           coding->symbol = saved_coding_symbol;
4804         }
4805       if (p < pend)
4806         {
4807           /* As each two-byte sequence CRLF was converted to LF, (PEND
4808              - P) is the number of deleted characters.  */
4809           coding->produced -= pend - p;
4810           coding->produced_char -= pend - p;
4811         }
4812     }
4813   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4814     {
4815       unsigned char *p = ptr;
4816
4817       for (; ptr < pend; ptr++)
4818         {
4819           if (*ptr == '\r')
4820             *ptr = '\n';
4821           else if (*ptr == '\n'
4822                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4823             {
4824               for (; p < ptr; p++)
4825                 {
4826                   if (*p == '\n')
4827                     *p = '\r';
4828                 }
4829               ptr = pend;
4830               coding->eol_type = CODING_EOL_LF;
4831               coding->symbol = saved_coding_symbol;
4832             }
4833         }
4834     }
4835 }
4836
4837 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4838    decoding, it may detect coding system and format of end-of-line if
4839    those are not yet decided.  The source should be unibyte, the
4840    result is multibyte if CODING->dst_multibyte is nonzero, else
4841    unibyte.  */
4842
4843 int
4844 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4845      struct coding_system *coding;
4846      const unsigned char *source;
4847      unsigned char *destination;
4848      int src_bytes, dst_bytes;
4849 {
4850   int extra = 0;
4851
4852   if (coding->type == coding_type_undecided)
4853     detect_coding (coding, source, src_bytes);
4854
4855   if (coding->eol_type == CODING_EOL_UNDECIDED
4856       && coding->type != coding_type_ccl)
4857     {
4858       detect_eol (coding, source, src_bytes);
4859       /* We had better recover the original eol format if we
4860          encounter an inconsistent eol format while decoding.  */
4861       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4862     }
4863
4864   coding->produced = coding->produced_char = 0;
4865   coding->consumed = coding->consumed_char = 0;
4866   coding->errors = 0;
4867   coding->result = CODING_FINISH_NORMAL;
4868
4869   switch (coding->type)
4870     {
4871     case coding_type_sjis:
4872       decode_coding_sjis_big5 (coding, source, destination,
4873                                src_bytes, dst_bytes, 1);
4874       break;
4875
4876     case coding_type_iso2022:
4877       decode_coding_iso2022 (coding, source, destination,
4878                              src_bytes, dst_bytes);
4879       break;
4880
4881     case coding_type_big5:
4882       decode_coding_sjis_big5 (coding, source, destination,
4883                                src_bytes, dst_bytes, 0);
4884       break;
4885
4886     case coding_type_emacs_mule:
4887       decode_coding_emacs_mule (coding, source, destination,
4888                                 src_bytes, dst_bytes);
4889       break;
4890
4891     case coding_type_ccl:
4892       if (coding->spec.ccl.cr_carryover)
4893         {
4894           /* Put the CR which was not processed by the previous call
4895              of decode_eol_post_ccl in DESTINATION.  It will be
4896              decoded together with the following LF by the call to
4897              decode_eol_post_ccl below.  */
4898           *destination = '\r';
4899           coding->produced++;
4900           coding->produced_char++;
4901           dst_bytes--;
4902           extra = coding->spec.ccl.cr_carryover;
4903         }
4904       ccl_coding_driver (coding, source, destination + extra,
4905                          src_bytes, dst_bytes, 0);
4906       if (coding->eol_type != CODING_EOL_LF)
4907         {
4908           coding->produced += extra;
4909           coding->produced_char += extra;
4910           decode_eol_post_ccl (coding, destination, coding->produced);
4911         }
4912       break;
4913
4914     default:
4915       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4916     }
4917
4918   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4919       && coding->mode & CODING_MODE_LAST_BLOCK
4920       && coding->consumed == src_bytes)
4921     coding->result = CODING_FINISH_NORMAL;
4922
4923   if (coding->mode & CODING_MODE_LAST_BLOCK
4924       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4925     {
4926       const unsigned char *src = source + coding->consumed;
4927       unsigned char *dst = destination + coding->produced;
4928
4929       src_bytes -= coding->consumed;
4930       coding->errors++;
4931       if (COMPOSING_P (coding))
4932         DECODE_COMPOSITION_END ('1');
4933       while (src_bytes--)
4934         {
4935           int c = *src++;
4936           dst += CHAR_STRING (c, dst);
4937           coding->produced_char++;
4938         }
4939       coding->consumed = coding->consumed_char = src - source;
4940       coding->produced = dst - destination;
4941       coding->result = CODING_FINISH_NORMAL;
4942     }
4943
4944   if (!coding->dst_multibyte)
4945     {
4946       coding->produced = str_as_unibyte (destination, coding->produced);
4947       coding->produced_char = coding->produced;
4948     }
4949
4950   return coding->result;
4951 }
4952
4953 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4954    multibyteness of the source is CODING->src_multibyte, the
4955    multibyteness of the result is always unibyte.  */
4956
4957 int
4958 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4959      struct coding_system *coding;
4960      const unsigned char *source;
4961      unsigned char *destination;
4962      int src_bytes, dst_bytes;
4963 {
4964   coding->produced = coding->produced_char = 0;
4965   coding->consumed = coding->consumed_char = 0;
4966   coding->errors = 0;
4967   coding->result = CODING_FINISH_NORMAL;
4968
4969   switch (coding->type)
4970     {
4971     case coding_type_sjis:
4972       encode_coding_sjis_big5 (coding, source, destination,
4973                                src_bytes, dst_bytes, 1);
4974       break;
4975
4976     case coding_type_iso2022:
4977       encode_coding_iso2022 (coding, source, destination,
4978                              src_bytes, dst_bytes);
4979       break;
4980
4981     case coding_type_big5:
4982       encode_coding_sjis_big5 (coding, source, destination,
4983                                src_bytes, dst_bytes, 0);
4984       break;
4985
4986     case coding_type_emacs_mule:
4987       encode_coding_emacs_mule (coding, source, destination,
4988                                 src_bytes, dst_bytes);
4989       break;
4990
4991     case coding_type_ccl:
4992       ccl_coding_driver (coding, source, destination,
4993                          src_bytes, dst_bytes, 1);
4994       break;
4995
4996     default:
4997       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4998     }
4999
5000   if (coding->mode & CODING_MODE_LAST_BLOCK
5001       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
5002     {
5003       const unsigned char *src = source + coding->consumed;
5004       unsigned char *dst = destination + coding->produced;
5005
5006       if (coding->type == coding_type_iso2022)
5007         ENCODE_RESET_PLANE_AND_REGISTER;
5008       if (COMPOSING_P (coding))
5009         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5010       if (coding->consumed < src_bytes)
5011         {
5012           int len = src_bytes - coding->consumed;
5013
5014           BCOPY_SHORT (src, dst, len);
5015           if (coding->src_multibyte)
5016             len = str_as_unibyte (dst, len);
5017           dst += len;
5018           coding->consumed = src_bytes;
5019         }
5020       coding->produced = coding->produced_char = dst - destination;
5021       coding->result = CODING_FINISH_NORMAL;
5022     }
5023
5024   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5025       && coding->consumed == src_bytes)
5026     coding->result = CODING_FINISH_NORMAL;
5027
5028   return coding->result;
5029 }
5030
5031 /* Scan text in the region between *BEG and *END (byte positions),
5032    skip characters which we don't have to decode by coding system
5033    CODING at the head and tail, then set *BEG and *END to the region
5034    of the text we actually have to convert.  The caller should move
5035    the gap out of the region in advance if the region is from a
5036    buffer.
5037
5038    If STR is not NULL, *BEG and *END are indices into STR.  */
5039
5040 static void
5041 shrink_decoding_region (beg, end, coding, str)
5042      int *beg, *end;
5043      struct coding_system *coding;
5044      unsigned char *str;
5045 {
5046   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5047   int eol_conversion;
5048   Lisp_Object translation_table;
5049
5050   if (coding->type == coding_type_ccl
5051       || coding->type == coding_type_undecided
5052       || coding->eol_type != CODING_EOL_LF
5053       || !NILP (coding->post_read_conversion)
5054       || coding->composing != COMPOSITION_DISABLED)
5055     {
5056       /* We can't skip any data.  */
5057       return;
5058     }
5059   if (coding->type == coding_type_no_conversion
5060       || coding->type == coding_type_raw_text
5061       || coding->type == coding_type_emacs_mule)
5062     {
5063       /* We need no conversion, but don't have to skip any data here.
5064          Decoding routine handles them effectively anyway.  */
5065       return;
5066     }
5067
5068   translation_table = coding->translation_table_for_decode;
5069   if (NILP (translation_table) && !NILP (Venable_character_translation))
5070     translation_table = Vstandard_translation_table_for_decode;
5071   if (CHAR_TABLE_P (translation_table))
5072     {
5073       int i;
5074       for (i = 0; i < 128; i++)
5075         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5076           break;
5077       if (i < 128)
5078         /* Some ASCII character should be translated.  We give up
5079            shrinking.  */
5080         return;
5081     }
5082
5083   if (coding->heading_ascii >= 0)
5084     /* Detection routine has already found how much we can skip at the
5085        head.  */
5086     *beg += coding->heading_ascii;
5087
5088   if (str)
5089     {
5090       begp_orig = begp = str + *beg;
5091       endp_orig = endp = str + *end;
5092     }
5093   else
5094     {
5095       begp_orig = begp = BYTE_POS_ADDR (*beg);
5096       endp_orig = endp = begp + *end - *beg;
5097     }
5098
5099   eol_conversion = (coding->eol_type == CODING_EOL_CR
5100                     || coding->eol_type == CODING_EOL_CRLF);
5101
5102   switch (coding->type)
5103     {
5104     case coding_type_sjis:
5105     case coding_type_big5:
5106       /* We can skip all ASCII characters at the head.  */
5107       if (coding->heading_ascii < 0)
5108         {
5109           if (eol_conversion)
5110             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5111           else
5112             while (begp < endp && *begp < 0x80) begp++;
5113         }
5114       /* We can skip all ASCII characters at the tail except for the
5115          second byte of SJIS or BIG5 code.  */
5116       if (eol_conversion)
5117         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5118       else
5119         while (begp < endp && endp[-1] < 0x80) endp--;
5120       /* Do not consider LF as ascii if preceded by CR, since that
5121          confuses eol decoding. */
5122       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5123         endp++;
5124       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5125         endp++;
5126       break;
5127
5128     case coding_type_iso2022:
5129       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5130         /* We can't skip any data.  */
5131         break;
5132       if (coding->heading_ascii < 0)
5133         {
5134           /* We can skip all ASCII characters at the head except for a
5135              few control codes.  */
5136           while (begp < endp && (c = *begp) < 0x80
5137                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5138                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5139                  && (!eol_conversion || c != ISO_CODE_LF))
5140             begp++;
5141         }
5142       switch (coding->category_idx)
5143         {
5144         case CODING_CATEGORY_IDX_ISO_8_1:
5145         case CODING_CATEGORY_IDX_ISO_8_2:
5146           /* We can skip all ASCII characters at the tail.  */
5147           if (eol_conversion)
5148             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5149           else
5150             while (begp < endp && endp[-1] < 0x80) endp--;
5151           /* Do not consider LF as ascii if preceded by CR, since that
5152              confuses eol decoding. */
5153           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5154             endp++;
5155           break;
5156
5157         case CODING_CATEGORY_IDX_ISO_7:
5158         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5159           {
5160             /* We can skip all characters at the tail except for 8-bit
5161                codes and ESC and the following 2-byte at the tail.  */
5162             unsigned char *eight_bit = NULL;
5163
5164             if (eol_conversion)
5165               while (begp < endp
5166                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5167                 {
5168                   if (!eight_bit && c & 0x80) eight_bit = endp;
5169                   endp--;
5170                 }
5171             else
5172               while (begp < endp
5173                      && (c = endp[-1]) != ISO_CODE_ESC)
5174                 {
5175                   if (!eight_bit && c & 0x80) eight_bit = endp;
5176                   endp--;
5177                 }
5178             /* Do not consider LF as ascii if preceded by CR, since that
5179                confuses eol decoding. */
5180             if (begp < endp && endp < endp_orig
5181                 && endp[-1] == '\r' && endp[0] == '\n')
5182               endp++;
5183             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5184               {
5185                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5186                   /* This is an ASCII designation sequence.  We can
5187                      surely skip the tail.  But, if we have
5188                      encountered an 8-bit code, skip only the codes
5189                      after that.  */
5190                   endp = eight_bit ? eight_bit : endp + 2;
5191                 else
5192                   /* Hmmm, we can't skip the tail.  */
5193                   endp = endp_orig;
5194               }
5195             else if (eight_bit)
5196               endp = eight_bit;
5197           }
5198         }
5199       break;
5200
5201     default:
5202       abort ();
5203     }
5204   *beg += begp - begp_orig;
5205   *end += endp - endp_orig;
5206   return;
5207 }
5208
5209 /* Like shrink_decoding_region but for encoding.  */
5210
5211 static void
5212 shrink_encoding_region (beg, end, coding, str)
5213      int *beg, *end;
5214      struct coding_system *coding;
5215      unsigned char *str;
5216 {
5217   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5218   int eol_conversion;
5219   Lisp_Object translation_table;
5220
5221   if (coding->type == coding_type_ccl
5222       || coding->eol_type == CODING_EOL_CRLF
5223       || coding->eol_type == CODING_EOL_CR
5224       || (coding->cmp_data && coding->cmp_data->used > 0))
5225     {
5226       /* We can't skip any data.  */
5227       return;
5228     }
5229   if (coding->type == coding_type_no_conversion
5230       || coding->type == coding_type_raw_text
5231       || coding->type == coding_type_emacs_mule
5232       || coding->type == coding_type_undecided)
5233     {
5234       /* We need no conversion, but don't have to skip any data here.
5235          Encoding routine handles them effectively anyway.  */
5236       return;
5237     }
5238
5239   translation_table = coding->translation_table_for_encode;
5240   if (NILP (translation_table) && !NILP (Venable_character_translation))
5241     translation_table = Vstandard_translation_table_for_encode;
5242   if (CHAR_TABLE_P (translation_table))
5243     {
5244       int i;
5245       for (i = 0; i < 128; i++)
5246         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5247           break;
5248       if (i < 128)
5249         /* Some ASCII character should be translated.  We give up
5250            shrinking.  */
5251         return;
5252     }
5253
5254   if (str)
5255     {
5256       begp_orig = begp = str + *beg;
5257       endp_orig = endp = str + *end;
5258     }
5259   else
5260     {
5261       begp_orig = begp = BYTE_POS_ADDR (*beg);
5262       endp_orig = endp = begp + *end - *beg;
5263     }
5264
5265   eol_conversion = (coding->eol_type == CODING_EOL_CR
5266                     || coding->eol_type == CODING_EOL_CRLF);
5267
5268   /* Here, we don't have to check coding->pre_write_conversion because
5269      the caller is expected to have handled it already.  */
5270   switch (coding->type)
5271     {
5272     case coding_type_iso2022:
5273       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5274         /* We can't skip any data.  */
5275         break;
5276       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5277         {
5278           unsigned char *bol = begp;
5279           while (begp < endp && *begp < 0x80)
5280             {
5281               begp++;
5282               if (begp[-1] == '\n')
5283                 bol = begp;
5284             }
5285           begp = bol;
5286           goto label_skip_tail;
5287         }
5288       /* fall down ... */
5289
5290     case coding_type_sjis:
5291     case coding_type_big5:
5292       /* We can skip all ASCII characters at the head and tail.  */
5293       if (eol_conversion)
5294         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5295       else
5296         while (begp < endp && *begp < 0x80) begp++;
5297     label_skip_tail:
5298       if (eol_conversion)
5299         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5300       else
5301         while (begp < endp && *(endp - 1) < 0x80) endp--;
5302       break;
5303
5304     default:
5305       abort ();
5306     }
5307
5308   *beg += begp - begp_orig;
5309   *end += endp - endp_orig;
5310   return;
5311 }
5312
5313 /* As shrinking conversion region requires some overhead, we don't try
5314    shrinking if the length of conversion region is less than this
5315    value.  */
5316 static int shrink_conversion_region_threshhold = 1024;
5317
5318 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5319   do {                                                                  \
5320     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5321       {                                                                 \
5322         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5323         else shrink_decoding_region (beg, end, coding, str);            \
5324       }                                                                 \
5325   } while (0)
5326
5327 static Lisp_Object
5328 code_convert_region_unwind (arg)
5329      Lisp_Object arg;
5330 {
5331   inhibit_pre_post_conversion = 0;
5332   Vlast_coding_system_used = arg;
5333   return Qnil;
5334 }
5335
5336 /* Store information about all compositions in the range FROM and TO
5337    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5338    buffer or a string, defaults to the current buffer.  */
5339
5340 void
5341 coding_save_composition (coding, from, to, obj)
5342      struct coding_system *coding;
5343      int from, to;
5344      Lisp_Object obj;
5345 {
5346   Lisp_Object prop;
5347   int start, end;
5348
5349   if (coding->composing == COMPOSITION_DISABLED)
5350     return;
5351   if (!coding->cmp_data)
5352     coding_allocate_composition_data (coding, from);
5353   if (!find_composition (from, to, &start, &end, &prop, obj)
5354       || end > to)
5355     return;
5356   if (start < from
5357       && (!find_composition (end, to, &start, &end, &prop, obj)
5358           || end > to))
5359     return;
5360   coding->composing = COMPOSITION_NO;
5361   do
5362     {
5363       if (COMPOSITION_VALID_P (start, end, prop))
5364         {
5365           enum composition_method method = COMPOSITION_METHOD (prop);
5366           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5367               >= COMPOSITION_DATA_SIZE)
5368             coding_allocate_composition_data (coding, from);
5369           /* For relative composition, we remember start and end
5370              positions, for the other compositions, we also remember
5371              components.  */
5372           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5373           if (method != COMPOSITION_RELATIVE)
5374             {
5375               /* We must store a*/
5376               Lisp_Object val, ch;
5377
5378               val = COMPOSITION_COMPONENTS (prop);
5379               if (CONSP (val))
5380                 while (CONSP (val))
5381                   {
5382                     ch = XCAR (val), val = XCDR (val);
5383                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5384                   }
5385               else if (VECTORP (val) || STRINGP (val))
5386                 {
5387                   int len = (VECTORP (val)
5388                              ? XVECTOR (val)->size : SCHARS (val));
5389                   int i;
5390                   for (i = 0; i < len; i++)
5391                     {
5392                       ch = (STRINGP (val)
5393                             ? Faref (val, make_number (i))
5394                             : XVECTOR (val)->contents[i]);
5395                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5396                     }
5397                 }
5398               else              /* INTEGERP (val) */
5399                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5400             }
5401           CODING_ADD_COMPOSITION_END (coding, end - from);
5402         }
5403       start = end;
5404     }
5405   while (start < to
5406          && find_composition (start, to, &start, &end, &prop, obj)
5407          && end <= to);
5408
5409   /* Make coding->cmp_data point to the first memory block.  */
5410   while (coding->cmp_data->prev)
5411     coding->cmp_data = coding->cmp_data->prev;
5412   coding->cmp_data_start = 0;
5413 }
5414
5415 /* Reflect the saved information about compositions to OBJ.
5416    CODING->cmp_data points to a memory block for the information.  OBJ
5417    is a buffer or a string, defaults to the current buffer.  */
5418
5419 void
5420 coding_restore_composition (coding, obj)
5421      struct coding_system *coding;
5422      Lisp_Object obj;
5423 {
5424   struct composition_data *cmp_data = coding->cmp_data;
5425
5426   if (!cmp_data)
5427     return;
5428
5429   while (cmp_data->prev)
5430     cmp_data = cmp_data->prev;
5431
5432   while (cmp_data)
5433     {
5434       int i;
5435
5436       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5437            i += cmp_data->data[i])
5438         {
5439           int *data = cmp_data->data + i;
5440           enum composition_method method = (enum composition_method) data[3];
5441           Lisp_Object components;
5442
5443           if (method == COMPOSITION_RELATIVE)
5444             components = Qnil;
5445           else
5446             {
5447               int len = data[0] - 4, j;
5448               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5449
5450               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5451                   && len % 2 == 0)
5452                 len --;
5453               for (j = 0; j < len; j++)
5454                 args[j] = make_number (data[4 + j]);
5455               components = (method == COMPOSITION_WITH_ALTCHARS
5456                             ? Fstring (len, args) : Fvector (len, args));
5457             }
5458           compose_text (data[1], data[2], components, Qnil, obj);
5459         }
5460       cmp_data = cmp_data->next;
5461     }
5462 }
5463
5464 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5465    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5466    coding system CODING, and return the status code of code conversion
5467    (currently, this value has no meaning).
5468
5469    How many characters (and bytes) are converted to how many
5470    characters (and bytes) are recorded in members of the structure
5471    CODING.
5472
5473    If REPLACE is nonzero, we do various things as if the original text
5474    is deleted and a new text is inserted.  See the comments in
5475    replace_range (insdel.c) to know what we are doing.
5476
5477    If REPLACE is zero, it is assumed that the source text is unibyte.
5478    Otherwise, it is assumed that the source text is multibyte.  */
5479
5480 int
5481 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5482      int from, from_byte, to, to_byte, encodep, replace;
5483      struct coding_system *coding;
5484 {
5485   int len = to - from, len_byte = to_byte - from_byte;
5486   int nchars_del = 0, nbytes_del = 0;
5487   int require, inserted, inserted_byte;
5488   int head_skip, tail_skip, total_skip = 0;
5489   Lisp_Object saved_coding_symbol;
5490   int first = 1;
5491   unsigned char *src, *dst;
5492   Lisp_Object deletion;
5493   int orig_point = PT, orig_len = len;
5494   int prev_Z;
5495   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5496
5497   deletion = Qnil;
5498   saved_coding_symbol = coding->symbol;
5499
5500   if (from < PT && PT < to)
5501     {
5502       TEMP_SET_PT_BOTH (from, from_byte);
5503       orig_point = from;
5504     }
5505
5506   if (replace)
5507     {
5508       int saved_from = from;
5509       int saved_inhibit_modification_hooks;
5510
5511       prepare_to_modify_buffer (from, to, &from);
5512       if (saved_from != from)
5513         {
5514           to = from + len;
5515           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5516           len_byte = to_byte - from_byte;
5517         }
5518
5519       /* The code conversion routine can not preserve text properties
5520          for now.  So, we must remove all text properties in the
5521          region.  Here, we must suppress all modification hooks.  */
5522       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5523       inhibit_modification_hooks = 1;
5524       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5525       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5526     }
5527
5528   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5529     {
5530       /* We must detect encoding of text and eol format.  */
5531
5532       if (from < GPT && to > GPT)
5533         move_gap_both (from, from_byte);
5534       if (coding->type == coding_type_undecided)
5535         {
5536           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5537           if (coding->type == coding_type_undecided)
5538             {
5539               /* It seems that the text contains only ASCII, but we
5540                  should not leave it undecided because the deeper
5541                  decoding routine (decode_coding) tries to detect the
5542                  encodings again in vain.  */
5543               coding->type = coding_type_emacs_mule;
5544               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5545               /* As emacs-mule decoder will handle composition, we
5546                  need this setting to allocate coding->cmp_data
5547                  later.  */
5548               coding->composing = COMPOSITION_NO;
5549             }
5550         }
5551       if (coding->eol_type == CODING_EOL_UNDECIDED
5552           && coding->type != coding_type_ccl)
5553         {
5554           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5555           if (coding->eol_type == CODING_EOL_UNDECIDED)
5556             coding->eol_type = CODING_EOL_LF;
5557           /* We had better recover the original eol format if we
5558              encounter an inconsistent eol format while decoding.  */
5559           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5560         }
5561     }
5562
5563   /* Now we convert the text.  */
5564
5565   /* For encoding, we must process pre-write-conversion in advance.  */
5566   if (! inhibit_pre_post_conversion
5567       && encodep
5568       && SYMBOLP (coding->pre_write_conversion)
5569       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5570     {
5571       /* The function in pre-write-conversion may put a new text in a
5572          new buffer.  */
5573       struct buffer *prev = current_buffer;
5574       Lisp_Object new;
5575
5576       record_unwind_protect (code_convert_region_unwind,
5577                              Vlast_coding_system_used);
5578       /* We should not call any more pre-write/post-read-conversion
5579          functions while this pre-write-conversion is running.  */
5580       inhibit_pre_post_conversion = 1;
5581       call2 (coding->pre_write_conversion,
5582              make_number (from), make_number (to));
5583       inhibit_pre_post_conversion = 0;
5584       /* Discard the unwind protect.  */
5585       specpdl_ptr--;
5586
5587       if (current_buffer != prev)
5588         {
5589           len = ZV - BEGV;
5590           new = Fcurrent_buffer ();
5591           set_buffer_internal_1 (prev);
5592           del_range_2 (from, from_byte, to, to_byte, 0);
5593           TEMP_SET_PT_BOTH (from, from_byte);
5594           insert_from_buffer (XBUFFER (new), 1, len, 0);
5595           Fkill_buffer (new);
5596           if (orig_point >= to)
5597             orig_point += len - orig_len;
5598           else if (orig_point > from)
5599             orig_point = from;
5600           orig_len = len;
5601           to = from + len;
5602           from_byte = CHAR_TO_BYTE (from);
5603           to_byte = CHAR_TO_BYTE (to);
5604           len_byte = to_byte - from_byte;
5605           TEMP_SET_PT_BOTH (from, from_byte);
5606         }
5607     }
5608
5609   if (replace)
5610     {
5611       if (! EQ (current_buffer->undo_list, Qt))
5612         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5613       else
5614         {
5615           nchars_del = to - from;
5616           nbytes_del = to_byte - from_byte;
5617         }
5618     }
5619
5620   if (coding->composing != COMPOSITION_DISABLED)
5621     {
5622       if (encodep)
5623         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5624       else
5625         coding_allocate_composition_data (coding, from);
5626     }
5627
5628   /* Try to skip the heading and tailing ASCIIs.  */
5629   if (coding->type != coding_type_ccl)
5630     {
5631       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5632
5633       if (from < GPT && GPT < to)
5634         move_gap_both (from, from_byte);
5635       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5636       if (from_byte == to_byte
5637           && (encodep || NILP (coding->post_read_conversion))
5638           && ! CODING_REQUIRE_FLUSHING (coding))
5639         {
5640           coding->produced = len_byte;
5641           coding->produced_char = len;
5642           if (!replace)
5643             /* We must record and adjust for this new text now.  */
5644             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5645           return 0;
5646         }
5647
5648       head_skip = from_byte - from_byte_orig;
5649       tail_skip = to_byte_orig - to_byte;
5650       total_skip = head_skip + tail_skip;
5651       from += head_skip;
5652       to -= tail_skip;
5653       len -= total_skip; len_byte -= total_skip;
5654     }
5655
5656   /* For conversion, we must put the gap before the text in addition to
5657      making the gap larger for efficient decoding.  The required gap
5658      size starts from 2000 which is the magic number used in make_gap.
5659      But, after one batch of conversion, it will be incremented if we
5660      find that it is not enough .  */
5661   require = 2000;
5662
5663   if (GAP_SIZE  < require)
5664     make_gap (require - GAP_SIZE);
5665   move_gap_both (from, from_byte);
5666
5667   inserted = inserted_byte = 0;
5668
5669   GAP_SIZE += len_byte;
5670   ZV -= len;
5671   Z -= len;
5672   ZV_BYTE -= len_byte;
5673   Z_BYTE -= len_byte;
5674
5675   if (GPT - BEG < BEG_UNCHANGED)
5676     BEG_UNCHANGED = GPT - BEG;
5677   if (Z - GPT < END_UNCHANGED)
5678     END_UNCHANGED = Z - GPT;
5679
5680   if (!encodep && coding->src_multibyte)
5681     {
5682       /* Decoding routines expects that the source text is unibyte.
5683          We must convert 8-bit characters of multibyte form to
5684          unibyte.  */
5685       int len_byte_orig = len_byte;
5686       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5687       if (len_byte < len_byte_orig)
5688         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5689                     len_byte);
5690       coding->src_multibyte = 0;
5691     }
5692
5693   for (;;)
5694     {
5695       int result;
5696
5697       /* The buffer memory is now:
5698          +--------+converted-text+---------+-------original-text-------+---+
5699          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5700                   |<---------------------- GAP ----------------------->|  */
5701       src = GAP_END_ADDR - len_byte;
5702       dst = GPT_ADDR + inserted_byte;
5703
5704       if (encodep)
5705         result = encode_coding (coding, src, dst, len_byte, 0);
5706       else
5707         {
5708           if (coding->composing != COMPOSITION_DISABLED)
5709             coding->cmp_data->char_offset = from + inserted;
5710           result = decode_coding (coding, src, dst, len_byte, 0);
5711         }
5712
5713       /* The buffer memory is now:
5714          +--------+-------converted-text----+--+------original-text----+---+
5715          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5716                   |<---------------------- GAP ----------------------->|  */
5717
5718       inserted += coding->produced_char;
5719       inserted_byte += coding->produced;
5720       len_byte -= coding->consumed;
5721
5722       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5723         {
5724           coding_allocate_composition_data (coding, from + inserted);
5725           continue;
5726         }
5727
5728       src += coding->consumed;
5729       dst += coding->produced;
5730
5731       if (result == CODING_FINISH_NORMAL)
5732         {
5733           src += len_byte;
5734           break;
5735         }
5736       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5737         {
5738           unsigned char *pend = dst, *p = pend - inserted_byte;
5739           Lisp_Object eol_type;
5740
5741           /* Encode LFs back to the original eol format (CR or CRLF).  */
5742           if (coding->eol_type == CODING_EOL_CR)
5743             {
5744               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5745             }
5746           else
5747             {
5748               int count = 0;
5749
5750               while (p < pend) if (*p++ == '\n') count++;
5751               if (src - dst < count)
5752                 {
5753                   /* We don't have sufficient room for encoding LFs
5754                      back to CRLF.  We must record converted and
5755                      not-yet-converted text back to the buffer
5756                      content, enlarge the gap, then record them out of
5757                      the buffer contents again.  */
5758                   int add = len_byte + inserted_byte;
5759
5760                   GAP_SIZE -= add;
5761                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5762                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5763                   make_gap (count - GAP_SIZE);
5764                   GAP_SIZE += add;
5765                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5766                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5767                   /* Don't forget to update SRC, DST, and PEND.  */
5768                   src = GAP_END_ADDR - len_byte;
5769                   dst = GPT_ADDR + inserted_byte;
5770                   pend = dst;
5771                 }
5772               inserted += count;
5773               inserted_byte += count;
5774               coding->produced += count;
5775               p = dst = pend + count;
5776               while (count)
5777                 {
5778                   *--p = *--pend;
5779                   if (*p == '\n') count--, *--p = '\r';
5780                 }
5781             }
5782
5783           /* Suppress eol-format conversion in the further conversion.  */
5784           coding->eol_type = CODING_EOL_LF;
5785
5786           /* Set the coding system symbol to that for Unix-like EOL.  */
5787           eol_type = Fget (saved_coding_symbol, Qeol_type);
5788           if (VECTORP (eol_type)
5789               && XVECTOR (eol_type)->size == 3
5790               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5791             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5792           else
5793             coding->symbol = saved_coding_symbol;
5794
5795           continue;
5796         }
5797       if (len_byte <= 0)
5798         {
5799           if (coding->type != coding_type_ccl
5800               || coding->mode & CODING_MODE_LAST_BLOCK)
5801             break;
5802           coding->mode |= CODING_MODE_LAST_BLOCK;
5803           continue;
5804         }
5805       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5806         {
5807           /* The source text ends in invalid codes.  Let's just
5808              make them valid buffer contents, and finish conversion.  */
5809           if (multibyte_p)
5810             {
5811               unsigned char *start = dst;
5812
5813               inserted += len_byte;
5814               while (len_byte--)
5815                 {
5816                   int c = *src++;
5817                   dst += CHAR_STRING (c, dst);
5818                 }
5819
5820               inserted_byte += dst - start;
5821             }
5822           else
5823             {
5824               inserted += len_byte;
5825               inserted_byte += len_byte;
5826               while (len_byte--)
5827                 *dst++ = *src++;
5828             }
5829           break;
5830         }
5831       if (result == CODING_FINISH_INTERRUPT)
5832         {
5833           /* The conversion procedure was interrupted by a user.  */
5834           break;
5835         }
5836       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5837       if (coding->consumed < 1)
5838         {
5839           /* It's quite strange to require more memory without
5840              consuming any bytes.  Perhaps CCL program bug.  */
5841           break;
5842         }
5843       if (first)
5844         {
5845           /* We have just done the first batch of conversion which was
5846              stopped because of insufficient gap.  Let's reconsider the
5847              required gap size (i.e. SRT - DST) now.
5848
5849              We have converted ORIG bytes (== coding->consumed) into
5850              NEW bytes (coding->produced).  To convert the remaining
5851              LEN bytes, we may need REQUIRE bytes of gap, where:
5852                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5853                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5854              Here, we are sure that NEW >= ORIG.  */
5855           float ratio;
5856
5857           if (coding->produced <= coding->consumed)
5858             {
5859               /* This happens because of CCL-based coding system with
5860                  eol-type CRLF.  */
5861               require = 0;
5862             }
5863           else
5864             {
5865               ratio = (coding->produced - coding->consumed) / coding->consumed;
5866               require = len_byte * ratio;
5867             }
5868           first = 0;
5869         }
5870       if ((src - dst) < (require + 2000))
5871         {
5872           /* See the comment above the previous call of make_gap.  */
5873           int add = len_byte + inserted_byte;
5874
5875           GAP_SIZE -= add;
5876           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5877           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5878           make_gap (require + 2000);
5879           GAP_SIZE += add;
5880           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5881           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5882         }
5883     }
5884   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5885
5886   if (encodep && coding->dst_multibyte)
5887     {
5888       /* The output is unibyte.  We must convert 8-bit characters to
5889          multibyte form.  */
5890       if (inserted_byte * 2 > GAP_SIZE)
5891         {
5892           GAP_SIZE -= inserted_byte;
5893           ZV += inserted_byte; Z += inserted_byte;
5894           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5895           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5896           make_gap (inserted_byte - GAP_SIZE);
5897           GAP_SIZE += inserted_byte;
5898           ZV -= inserted_byte; Z -= inserted_byte;
5899           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5900           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5901         }
5902       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5903     }
5904
5905   /* If we shrank the conversion area, adjust it now.  */
5906   if (total_skip > 0)
5907     {
5908       if (tail_skip > 0)
5909         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5910       inserted += total_skip; inserted_byte += total_skip;
5911       GAP_SIZE += total_skip;
5912       GPT -= head_skip; GPT_BYTE -= head_skip;
5913       ZV -= total_skip; ZV_BYTE -= total_skip;
5914       Z -= total_skip; Z_BYTE -= total_skip;
5915       from -= head_skip; from_byte -= head_skip;
5916       to += tail_skip; to_byte += tail_skip;
5917     }
5918
5919   prev_Z = Z;
5920   if (! EQ (current_buffer->undo_list, Qt))
5921     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5922   else
5923     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5924                                  inserted, inserted_byte);
5925   inserted = Z - prev_Z;
5926
5927   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5928     coding_restore_composition (coding, Fcurrent_buffer ());
5929   coding_free_composition_data (coding);
5930
5931   if (! inhibit_pre_post_conversion
5932       && ! encodep && ! NILP (coding->post_read_conversion))
5933     {
5934       Lisp_Object val;
5935       Lisp_Object saved_coding_system;
5936
5937       if (from != PT)
5938         TEMP_SET_PT_BOTH (from, from_byte);
5939       prev_Z = Z;
5940       record_unwind_protect (code_convert_region_unwind,
5941                              Vlast_coding_system_used);
5942       saved_coding_system = Vlast_coding_system_used;
5943       Vlast_coding_system_used = coding->symbol;
5944       /* We should not call any more pre-write/post-read-conversion
5945          functions while this post-read-conversion is running.  */
5946       inhibit_pre_post_conversion = 1;
5947       val = call1 (coding->post_read_conversion, make_number (inserted));
5948       inhibit_pre_post_conversion = 0;
5949       coding->symbol = Vlast_coding_system_used;
5950       Vlast_coding_system_used = saved_coding_system;
5951       /* Discard the unwind protect.  */
5952       specpdl_ptr--;
5953       CHECK_NUMBER (val);
5954       inserted += Z - prev_Z;
5955     }
5956
5957   if (orig_point >= from)
5958     {
5959       if (orig_point >= from + orig_len)
5960         orig_point += inserted - orig_len;
5961       else
5962         orig_point = from;
5963       TEMP_SET_PT (orig_point);
5964     }
5965
5966   if (replace)
5967     {
5968       signal_after_change (from, to - from, inserted);
5969       update_compositions (from, from + inserted, CHECK_BORDER);
5970     }
5971
5972   {
5973     coding->consumed = to_byte - from_byte;
5974     coding->consumed_char = to - from;
5975     coding->produced = inserted_byte;
5976     coding->produced_char = inserted;
5977   }
5978
5979   return 0;
5980 }
5981
5982 Lisp_Object
5983 run_pre_post_conversion_on_str (str, coding, encodep)
5984      Lisp_Object str;
5985      struct coding_system *coding;
5986      int encodep;
5987 {
5988   int count = SPECPDL_INDEX ();
5989   struct gcpro gcpro1, gcpro2;
5990   int multibyte = STRING_MULTIBYTE (str);
5991   Lisp_Object buffer;
5992   struct buffer *buf;
5993   Lisp_Object old_deactivate_mark;
5994
5995   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5996   record_unwind_protect (code_convert_region_unwind,
5997                          Vlast_coding_system_used);
5998   /* It is not crucial to specbind this.  */
5999   old_deactivate_mark = Vdeactivate_mark;
6000   GCPRO2 (str, old_deactivate_mark);
6001
6002   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
6003   buf = XBUFFER (buffer);
6004
6005   delete_all_overlays (buf);
6006   buf->directory = current_buffer->directory;
6007   buf->read_only = Qnil;
6008   buf->filename = Qnil;
6009   buf->undo_list = Qt;
6010   eassert (buf->overlays_before == NULL);
6011   eassert (buf->overlays_after == NULL);
6012
6013   set_buffer_internal (buf);
6014   /* We must insert the contents of STR as is without
6015      unibyte<->multibyte conversion.  For that, we adjust the
6016      multibyteness of the working buffer to that of STR.  */
6017   Ferase_buffer ();
6018   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6019
6020   insert_from_string (str, 0, 0,
6021                       SCHARS (str), SBYTES (str), 0);
6022   UNGCPRO;
6023   inhibit_pre_post_conversion = 1;
6024   if (encodep)
6025     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6026   else
6027     {
6028       Vlast_coding_system_used = coding->symbol;
6029       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6030       call1 (coding->post_read_conversion, make_number (Z - BEG));
6031       coding->symbol = Vlast_coding_system_used;
6032     }
6033   inhibit_pre_post_conversion = 0;
6034   Vdeactivate_mark = old_deactivate_mark;
6035   str = make_buffer_string (BEG, Z, 1);
6036   return unbind_to (count, str);
6037 }
6038
6039 Lisp_Object
6040 decode_coding_string (str, coding, nocopy)
6041      Lisp_Object str;
6042      struct coding_system *coding;
6043      int nocopy;
6044 {
6045   int len;
6046   struct conversion_buffer buf;
6047   int from, to_byte;
6048   Lisp_Object saved_coding_symbol;
6049   int result;
6050   int require_decoding;
6051   int shrinked_bytes = 0;
6052   Lisp_Object newstr;
6053   int consumed, consumed_char, produced, produced_char;
6054
6055   from = 0;
6056   to_byte = SBYTES (str);
6057
6058   saved_coding_symbol = coding->symbol;
6059   coding->src_multibyte = STRING_MULTIBYTE (str);
6060   coding->dst_multibyte = 1;
6061   if (CODING_REQUIRE_DETECTION (coding))
6062     {
6063       /* See the comments in code_convert_region.  */
6064       if (coding->type == coding_type_undecided)
6065         {
6066           detect_coding (coding, SDATA (str), to_byte);
6067           if (coding->type == coding_type_undecided)
6068             {
6069               coding->type = coding_type_emacs_mule;
6070               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6071               /* As emacs-mule decoder will handle composition, we
6072                  need this setting to allocate coding->cmp_data
6073                  later.  */
6074               coding->composing = COMPOSITION_NO;
6075             }
6076         }
6077       if (coding->eol_type == CODING_EOL_UNDECIDED
6078           && coding->type != coding_type_ccl)
6079         {
6080           saved_coding_symbol = coding->symbol;
6081           detect_eol (coding, SDATA (str), to_byte);
6082           if (coding->eol_type == CODING_EOL_UNDECIDED)
6083             coding->eol_type = CODING_EOL_LF;
6084           /* We had better recover the original eol format if we
6085              encounter an inconsistent eol format while decoding.  */
6086           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6087         }
6088     }
6089
6090   if (coding->type == coding_type_no_conversion
6091       || coding->type == coding_type_raw_text)
6092     coding->dst_multibyte = 0;
6093
6094   require_decoding = CODING_REQUIRE_DECODING (coding);
6095
6096   if (STRING_MULTIBYTE (str))
6097     {
6098       /* Decoding routines expect the source text to be unibyte.  */
6099       str = Fstring_as_unibyte (str);
6100       to_byte = SBYTES (str);
6101       nocopy = 1;
6102       coding->src_multibyte = 0;
6103     }
6104
6105   /* Try to skip the heading and tailing ASCIIs.  */
6106   if (require_decoding && coding->type != coding_type_ccl)
6107     {
6108       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6109                                 0);
6110       if (from == to_byte)
6111         require_decoding = 0;
6112       shrinked_bytes = from + (SBYTES (str) - to_byte);
6113     }
6114
6115   if (!require_decoding
6116       && !(SYMBOLP (coding->post_read_conversion)
6117            && !NILP (Ffboundp (coding->post_read_conversion))))
6118     {
6119       coding->consumed = SBYTES (str);
6120       coding->consumed_char = SCHARS (str);
6121       if (coding->dst_multibyte)
6122         {
6123           str = Fstring_as_multibyte (str);
6124           nocopy = 1;
6125         }
6126       coding->produced = SBYTES (str);
6127       coding->produced_char = SCHARS (str);
6128       return (nocopy ? str : Fcopy_sequence (str));
6129     }
6130
6131   if (coding->composing != COMPOSITION_DISABLED)
6132     coding_allocate_composition_data (coding, from);
6133   len = decoding_buffer_size (coding, to_byte - from);
6134   allocate_conversion_buffer (buf, len);
6135
6136   consumed = consumed_char = produced = produced_char = 0;
6137   while (1)
6138     {
6139       result = decode_coding (coding, SDATA (str) + from + consumed,
6140                               buf.data + produced, to_byte - from - consumed,
6141                               buf.size - produced);
6142       consumed += coding->consumed;
6143       consumed_char += coding->consumed_char;
6144       produced += coding->produced;
6145       produced_char += coding->produced_char;
6146       if (result == CODING_FINISH_NORMAL
6147           || (result == CODING_FINISH_INSUFFICIENT_SRC
6148               && coding->consumed == 0))
6149         break;
6150       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6151         coding_allocate_composition_data (coding, from + produced_char);
6152       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6153         extend_conversion_buffer (&buf);
6154       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6155         {
6156           Lisp_Object eol_type;
6157
6158           /* Recover the original EOL format.  */
6159           if (coding->eol_type == CODING_EOL_CR)
6160             {
6161               unsigned char *p;
6162               for (p = buf.data; p < buf.data + produced; p++)
6163                 if (*p == '\n') *p = '\r';
6164             }
6165           else if (coding->eol_type == CODING_EOL_CRLF)
6166             {
6167               int num_eol = 0;
6168               unsigned char *p0, *p1;
6169               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6170                 if (*p0 == '\n') num_eol++;
6171               if (produced + num_eol >= buf.size)
6172                 extend_conversion_buffer (&buf);
6173               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6174                 {
6175                   *--p1 = *--p0;
6176                   if (*p0 == '\n') *--p1 = '\r';
6177                 }
6178               produced += num_eol;
6179               produced_char += num_eol;
6180             }
6181           /* Suppress eol-format conversion in the further conversion.  */
6182           coding->eol_type = CODING_EOL_LF;
6183
6184           /* Set the coding system symbol to that for Unix-like EOL.  */
6185           eol_type = Fget (saved_coding_symbol, Qeol_type);
6186           if (VECTORP (eol_type)
6187               && XVECTOR (eol_type)->size == 3
6188               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6189             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6190           else
6191             coding->symbol = saved_coding_symbol;
6192
6193
6194         }
6195     }
6196
6197   coding->consumed = consumed;
6198   coding->consumed_char = consumed_char;
6199   coding->produced = produced;
6200   coding->produced_char = produced_char;
6201
6202   if (coding->dst_multibyte)
6203     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6204                                            produced + shrinked_bytes);
6205   else
6206     newstr = make_uninit_string (produced + shrinked_bytes);
6207   if (from > 0)
6208     STRING_COPYIN (newstr, 0, SDATA (str), from);
6209   STRING_COPYIN (newstr, from, buf.data, produced);
6210   if (shrinked_bytes > from)
6211     STRING_COPYIN (newstr, from + produced,
6212                    SDATA (str) + to_byte,
6213                    shrinked_bytes - from);
6214   free_conversion_buffer (&buf);
6215
6216   if (coding->cmp_data && coding->cmp_data->used)
6217     coding_restore_composition (coding, newstr);
6218   coding_free_composition_data (coding);
6219
6220   if (SYMBOLP (coding->post_read_conversion)
6221       && !NILP (Ffboundp (coding->post_read_conversion)))
6222     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6223
6224   return newstr;
6225 }
6226
6227 Lisp_Object
6228 encode_coding_string (str, coding, nocopy)
6229      Lisp_Object str;
6230      struct coding_system *coding;
6231      int nocopy;
6232 {
6233   int len;
6234   struct conversion_buffer buf;
6235   int from, to, to_byte;
6236   int result;
6237   int shrinked_bytes = 0;
6238   Lisp_Object newstr;
6239   int consumed, consumed_char, produced, produced_char;
6240
6241   if (SYMBOLP (coding->pre_write_conversion)
6242       && !NILP (Ffboundp (coding->pre_write_conversion)))
6243     str = run_pre_post_conversion_on_str (str, coding, 1);
6244
6245   from = 0;
6246   to = SCHARS (str);
6247   to_byte = SBYTES (str);
6248
6249   /* Encoding routines determine the multibyteness of the source text
6250      by coding->src_multibyte.  */
6251   coding->src_multibyte = STRING_MULTIBYTE (str);
6252   coding->dst_multibyte = 0;
6253   if (! CODING_REQUIRE_ENCODING (coding))
6254     {
6255       coding->consumed = SBYTES (str);
6256       coding->consumed_char = SCHARS (str);
6257       if (STRING_MULTIBYTE (str))
6258         {
6259           str = Fstring_as_unibyte (str);
6260           nocopy = 1;
6261         }
6262       coding->produced = SBYTES (str);
6263       coding->produced_char = SCHARS (str);
6264       return (nocopy ? str : Fcopy_sequence (str));
6265     }
6266
6267   if (coding->composing != COMPOSITION_DISABLED)
6268     coding_save_composition (coding, from, to, str);
6269
6270   /* Try to skip the heading and tailing ASCIIs.  */
6271   if (coding->type != coding_type_ccl)
6272     {
6273       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6274                                 1);
6275       if (from == to_byte)
6276         return (nocopy ? str : Fcopy_sequence (str));
6277       shrinked_bytes = from + (SBYTES (str) - to_byte);
6278     }
6279
6280   len = encoding_buffer_size (coding, to_byte - from);
6281   allocate_conversion_buffer (buf, len);
6282
6283   consumed = consumed_char = produced = produced_char = 0;
6284   while (1)
6285     {
6286       result = encode_coding (coding, SDATA (str) + from + consumed,
6287                               buf.data + produced, to_byte - from - consumed,
6288                               buf.size - produced);
6289       consumed += coding->consumed;
6290       consumed_char += coding->consumed_char;
6291       produced += coding->produced;
6292       produced_char += coding->produced_char;
6293       if (result == CODING_FINISH_NORMAL
6294           || (result == CODING_FINISH_INSUFFICIENT_SRC
6295               && coding->consumed == 0))
6296         break;
6297       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6298       extend_conversion_buffer (&buf);
6299     }
6300
6301   coding->consumed = consumed;
6302   coding->consumed_char = consumed_char;
6303   coding->produced = produced;
6304   coding->produced_char = produced_char;
6305
6306   newstr = make_uninit_string (produced + shrinked_bytes);
6307   if (from > 0)
6308     STRING_COPYIN (newstr, 0, SDATA (str), from);
6309   STRING_COPYIN (newstr, from, buf.data, produced);
6310   if (shrinked_bytes > from)
6311     STRING_COPYIN (newstr, from + produced,
6312                    SDATA (str) + to_byte,
6313                    shrinked_bytes - from);
6314
6315   free_conversion_buffer (&buf);
6316   coding_free_composition_data (coding);
6317
6318   return newstr;
6319 }
6320
6321 \f
6322 #ifdef emacs
6323 /*** 8. Emacs Lisp library functions ***/
6324
6325 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6326        doc: /* Return t if OBJECT is nil or a coding-system.
6327 See the documentation of `make-coding-system' for information
6328 about coding-system objects.  */)
6329      (obj)
6330      Lisp_Object obj;
6331 {
6332   if (NILP (obj))
6333     return Qt;
6334   if (!SYMBOLP (obj))
6335     return Qnil;
6336   if (! NILP (Fget (obj, Qcoding_system_define_form)))
6337     return Qt;
6338   /* Get coding-spec vector for OBJ.  */
6339   obj = Fget (obj, Qcoding_system);
6340   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6341           ? Qt : Qnil);
6342 }
6343
6344 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6345        Sread_non_nil_coding_system, 1, 1, 0,
6346        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6347      (prompt)
6348      Lisp_Object prompt;
6349 {
6350   Lisp_Object val;
6351   do
6352     {
6353       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6354                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6355     }
6356   while (SCHARS (val) == 0);
6357   return (Fintern (val, Qnil));
6358 }
6359
6360 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6361        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6362 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6363      (prompt, default_coding_system)
6364      Lisp_Object prompt, default_coding_system;
6365 {
6366   Lisp_Object val;
6367   if (SYMBOLP (default_coding_system))
6368     default_coding_system = SYMBOL_NAME (default_coding_system);
6369   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6370                           Qt, Qnil, Qcoding_system_history,
6371                           default_coding_system, Qnil);
6372   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6373 }
6374
6375 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6376        1, 1, 0,
6377        doc: /* Check validity of CODING-SYSTEM.
6378 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6379 It is valid if it is a symbol with a non-nil `coding-system' property.
6380 The value of property should be a vector of length 5.  */)
6381      (coding_system)
6382      Lisp_Object coding_system;
6383 {
6384   Lisp_Object define_form;
6385
6386   define_form = Fget (coding_system, Qcoding_system_define_form);
6387   if (! NILP (define_form))
6388     {
6389       Fput (coding_system, Qcoding_system_define_form, Qnil);
6390       safe_eval (define_form);
6391     }
6392   if (!NILP (Fcoding_system_p (coding_system)))
6393     return coding_system;
6394   while (1)
6395     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6396 }
6397 \f
6398 Lisp_Object
6399 detect_coding_system (src, src_bytes, highest, multibytep)
6400      const unsigned char *src;
6401      int src_bytes, highest;
6402      int multibytep;
6403 {
6404   int coding_mask, eol_type;
6405   Lisp_Object val, tmp;
6406   int dummy;
6407
6408   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6409   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6410   if (eol_type == CODING_EOL_INCONSISTENT)
6411     eol_type = CODING_EOL_UNDECIDED;
6412
6413   if (!coding_mask)
6414     {
6415       val = Qundecided;
6416       if (eol_type != CODING_EOL_UNDECIDED)
6417         {
6418           Lisp_Object val2;
6419           val2 = Fget (Qundecided, Qeol_type);
6420           if (VECTORP (val2))
6421             val = XVECTOR (val2)->contents[eol_type];
6422         }
6423       return (highest ? val : Fcons (val, Qnil));
6424     }
6425
6426   /* At first, gather possible coding systems in VAL.  */
6427   val = Qnil;
6428   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6429     {
6430       Lisp_Object category_val, category_index;
6431
6432       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6433       category_val = Fsymbol_value (XCAR (tmp));
6434       if (!NILP (category_val)
6435           && NATNUMP (category_index)
6436           && (coding_mask & (1 << XFASTINT (category_index))))
6437         {
6438           val = Fcons (category_val, val);
6439           if (highest)
6440             break;
6441         }
6442     }
6443   if (!highest)
6444     val = Fnreverse (val);
6445
6446   /* Then, replace the elements with subsidiary coding systems.  */
6447   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6448     {
6449       if (eol_type != CODING_EOL_UNDECIDED
6450           && eol_type != CODING_EOL_INCONSISTENT)
6451         {
6452           Lisp_Object eol;
6453           eol = Fget (XCAR (tmp), Qeol_type);
6454           if (VECTORP (eol))
6455             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6456         }
6457     }
6458   return (highest ? XCAR (val) : val);
6459 }
6460
6461 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6462        2, 3, 0,
6463        doc: /* Detect how the byte sequence in the region is encoded.
6464 Return a list of possible coding systems used on decoding a byte
6465 sequence containing the bytes in the region between START and END when
6466 the coding system `undecided' is specified.  The list is ordered by
6467 priority decided in the current language environment.
6468
6469 If only ASCII characters are found, it returns a list of single element
6470 `undecided' or its subsidiary coding system according to a detected
6471 end-of-line format.
6472
6473 If optional argument HIGHEST is non-nil, return the coding system of
6474 highest priority.  */)
6475      (start, end, highest)
6476      Lisp_Object start, end, highest;
6477 {
6478   int from, to;
6479   int from_byte, to_byte;
6480   int include_anchor_byte = 0;
6481
6482   CHECK_NUMBER_COERCE_MARKER (start);
6483   CHECK_NUMBER_COERCE_MARKER (end);
6484
6485   validate_region (&start, &end);
6486   from = XINT (start), to = XINT (end);
6487   from_byte = CHAR_TO_BYTE (from);
6488   to_byte = CHAR_TO_BYTE (to);
6489
6490   if (from < GPT && to >= GPT)
6491     move_gap_both (to, to_byte);
6492   /* If we an anchor byte `\0' follows the region, we include it in
6493      the detecting source.  Then code detectors can handle the tailing
6494      byte sequence more accurately.
6495
6496      Fix me: This is not a perfect solution.  It is better that we
6497      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6498   */
6499   if (to == Z || (to == GPT && GAP_SIZE > 0))
6500     include_anchor_byte = 1;
6501   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6502                                to_byte - from_byte + include_anchor_byte,
6503                                !NILP (highest),
6504                                !NILP (current_buffer
6505                                       ->enable_multibyte_characters));
6506 }
6507
6508 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6509        1, 2, 0,
6510        doc: /* Detect how the byte sequence in STRING is encoded.
6511 Return a list of possible coding systems used on decoding a byte
6512 sequence containing the bytes in STRING when the coding system
6513 `undecided' is specified.  The list is ordered by priority decided in
6514 the current language environment.
6515
6516 If only ASCII characters are found, it returns a list of single element
6517 `undecided' or its subsidiary coding system according to a detected
6518 end-of-line format.
6519
6520 If optional argument HIGHEST is non-nil, return the coding system of
6521 highest priority.  */)
6522      (string, highest)
6523      Lisp_Object string, highest;
6524 {
6525   CHECK_STRING (string);
6526
6527   return detect_coding_system (SDATA (string),
6528                                /* "+ 1" is to include the anchor byte
6529                                   `\0'.  With this, code detectors can
6530                                   handle the tailing bytes more
6531                                   accurately.  */
6532                                SBYTES (string) + 1,
6533                                !NILP (highest),
6534                                STRING_MULTIBYTE (string));
6535 }
6536
6537 /*  Subroutine for Fsafe_coding_systems_region_internal.
6538
6539     Return a list of coding systems that safely encode the multibyte
6540     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6541     possible coding systems.  If it is nil, it means that we have not
6542     yet found any coding systems.
6543
6544     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6545     element of WORK_TABLE is set to t once the element is looked up.
6546
6547     If a non-ASCII single byte char is found, set
6548     *single_byte_char_found to 1.  */
6549
6550 static Lisp_Object
6551 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6552      unsigned char *p, *pend;
6553      Lisp_Object safe_codings, work_table;
6554      int *single_byte_char_found;
6555 {
6556   int c, len;
6557   Lisp_Object val, ch;
6558   Lisp_Object prev, tail;
6559
6560   while (p < pend)
6561     {
6562       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6563       p += len;
6564       if (ASCII_BYTE_P (c))
6565         /* We can ignore ASCII characters here.  */
6566         continue;
6567       if (SINGLE_BYTE_CHAR_P (c))
6568         *single_byte_char_found = 1;
6569       if (NILP (safe_codings))
6570         /* Already all coding systems are excluded.  But, we can't
6571            terminate the loop here because non-ASCII single-byte char
6572            must be found.  */
6573         continue;
6574       /* Check the safe coding systems for C.  */
6575       ch = make_number (c);
6576       val = Faref (work_table, ch);
6577       if (EQ (val, Qt))
6578         /* This element was already checked.  Ignore it.  */
6579         continue;
6580       /* Remember that we checked this element.  */
6581       Faset (work_table, ch, Qt);
6582
6583       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6584         {
6585           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6586           int encodable;
6587
6588           elt = XCAR (tail);
6589           if (CONSP (XCDR (elt)))
6590             {
6591               /* This entry has this format now:
6592                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6593                           ACCEPT-LATIN-EXTRA ) */
6594               val = XCDR (elt);
6595               encodable = ! NILP (Faref (XCAR (val), ch));
6596               if (! encodable)
6597                 {
6598                   val = XCDR (val);
6599                   translation_table = XCAR (val);
6600                   hash_table = XCAR (XCDR (val));
6601                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6602                 }
6603             }
6604           else
6605             {
6606               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6607               encodable = ! NILP (Faref (XCDR (elt), ch));
6608               if (! encodable)
6609                 {
6610                   /* Transform the format to:
6611                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6612                        ACCEPT-LATIN-EXTRA )  */
6613                   val = Fget (XCAR (elt), Qcoding_system);
6614                   translation_table
6615                     = Fplist_get (AREF (val, 3),
6616                                   Qtranslation_table_for_encode);
6617                   if (SYMBOLP (translation_table))
6618                     translation_table = Fget (translation_table,
6619                                               Qtranslation_table);
6620                   hash_table
6621                     = (CHAR_TABLE_P (translation_table)
6622                        ? XCHAR_TABLE (translation_table)->extras[1]
6623                        : Qnil);
6624                   accept_latin_extra
6625                     = ((EQ (AREF (val, 0), make_number (2))
6626                         && VECTORP (AREF (val, 4)))
6627                        ? AREF (AREF (val, 4), 16)
6628                        : Qnil);
6629                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6630                                         translation_table, hash_table,
6631                                         accept_latin_extra));
6632                 }
6633             }
6634
6635           if (! encodable
6636               && ((CHAR_TABLE_P (translation_table)
6637                    && ! NILP (Faref (translation_table, ch)))
6638                   || (HASH_TABLE_P (hash_table)
6639                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6640                   || (SINGLE_BYTE_CHAR_P (c)
6641                       && ! NILP (accept_latin_extra)
6642                       && VECTORP (Vlatin_extra_code_table)
6643                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6644             encodable = 1;
6645           if (encodable)
6646             prev = tail;
6647           else
6648             {
6649               /* Exclude this coding system from SAFE_CODINGS.  */
6650               if (EQ (tail, safe_codings))
6651                 safe_codings = XCDR (safe_codings);
6652               else
6653                 XSETCDR (prev, XCDR (tail));
6654             }
6655         }
6656     }
6657   return safe_codings;
6658 }
6659
6660 DEFUN ("find-coding-systems-region-internal",
6661        Ffind_coding_systems_region_internal,
6662        Sfind_coding_systems_region_internal, 2, 2, 0,
6663        doc: /* Internal use only.  */)
6664      (start, end)
6665      Lisp_Object start, end;
6666 {
6667   Lisp_Object work_table, safe_codings;
6668   int non_ascii_p = 0;
6669   int single_byte_char_found = 0;
6670   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6671
6672   if (STRINGP (start))
6673     {
6674       if (!STRING_MULTIBYTE (start))
6675         return Qt;
6676       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6677       p2 = p2end = p1end;
6678       if (SCHARS (start) != SBYTES (start))
6679         non_ascii_p = 1;
6680     }
6681   else
6682     {
6683       int from, to, stop;
6684
6685       CHECK_NUMBER_COERCE_MARKER (start);
6686       CHECK_NUMBER_COERCE_MARKER (end);
6687       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6688         args_out_of_range (start, end);
6689       if (NILP (current_buffer->enable_multibyte_characters))
6690         return Qt;
6691       from = CHAR_TO_BYTE (XINT (start));
6692       to = CHAR_TO_BYTE (XINT (end));
6693       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6694       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6695       if (stop == to)
6696         p2 = p2end = p1end;
6697       else
6698         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6699       if (XINT (end) - XINT (start) != to - from)
6700         non_ascii_p = 1;
6701     }
6702
6703   if (!non_ascii_p)
6704     {
6705       /* We are sure that the text contains no multibyte character.
6706          Check if it contains eight-bit-graphic.  */
6707       p = p1;
6708       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6709       if (p == p1end)
6710         {
6711           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6712           if (p == p2end)
6713             return Qt;
6714         }
6715     }
6716
6717   /* The text contains non-ASCII characters.  */
6718
6719   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6720   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6721
6722   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6723                                     &single_byte_char_found);
6724   if (p2 < p2end)
6725     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6726                                       &single_byte_char_found);
6727   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6728     safe_codings = Qt;
6729   else
6730     {
6731       /* Turn safe_codings to a list of coding systems... */
6732       Lisp_Object val;
6733
6734       if (single_byte_char_found)
6735         /* ... and append these for eight-bit chars.  */
6736         val = Fcons (Qraw_text,
6737                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6738       else
6739         /* ... and append generic coding systems.  */
6740         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6741
6742       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6743         val = Fcons (XCAR (XCAR (safe_codings)), val);
6744       safe_codings = val;
6745     }
6746
6747   return safe_codings;
6748 }
6749
6750
6751 /* Search from position POS for such characters that are unencodable
6752    accoding to SAFE_CHARS, and return a list of their positions.  P
6753    points where in the memory the character at POS exists.  Limit the
6754    search at PEND or when Nth unencodable characters are found.
6755
6756    If SAFE_CHARS is a char table, an element for an unencodable
6757    character is nil.
6758
6759    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6760
6761    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6762    eight-bit-graphic characters are unencodable.  */
6763
6764 static Lisp_Object
6765 unencodable_char_position (safe_chars, pos, p, pend, n)
6766      Lisp_Object safe_chars;
6767      int pos;
6768      unsigned char *p, *pend;
6769      int n;
6770 {
6771   Lisp_Object pos_list;
6772
6773   pos_list = Qnil;
6774   while (p < pend)
6775     {
6776       int len;
6777       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6778
6779       if (c >= 128
6780           && (CHAR_TABLE_P (safe_chars)
6781               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6782               : (NILP (safe_chars) || c < 256)))
6783         {
6784           pos_list = Fcons (make_number (pos), pos_list);
6785           if (--n <= 0)
6786             break;
6787         }
6788       pos++;
6789       p += len;
6790     }
6791   return Fnreverse (pos_list);
6792 }
6793
6794
6795 DEFUN ("unencodable-char-position", Funencodable_char_position,
6796        Sunencodable_char_position, 3, 5, 0,
6797        doc: /*
6798 Return position of first un-encodable character in a region.
6799 START and END specfiy the region and CODING-SYSTEM specifies the
6800 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6801
6802 If optional 4th argument COUNT is non-nil, it specifies at most how
6803 many un-encodable characters to search.  In this case, the value is a
6804 list of positions.
6805
6806 If optional 5th argument STRING is non-nil, it is a string to search
6807 for un-encodable characters.  In that case, START and END are indexes
6808 to the string.  */)
6809      (start, end, coding_system, count, string)
6810      Lisp_Object start, end, coding_system, count, string;
6811 {
6812   int n;
6813   Lisp_Object safe_chars;
6814   struct coding_system coding;
6815   Lisp_Object positions;
6816   int from, to;
6817   unsigned char *p, *pend;
6818
6819   if (NILP (string))
6820     {
6821       validate_region (&start, &end);
6822       from = XINT (start);
6823       to = XINT (end);
6824       if (NILP (current_buffer->enable_multibyte_characters))
6825         return Qnil;
6826       p = CHAR_POS_ADDR (from);
6827       if (to == GPT)
6828         pend = GPT_ADDR;
6829       else
6830         pend = CHAR_POS_ADDR (to);
6831     }
6832   else
6833     {
6834       CHECK_STRING (string);
6835       CHECK_NATNUM (start);
6836       CHECK_NATNUM (end);
6837       from = XINT (start);
6838       to = XINT (end);
6839       if (from > to
6840           || to > SCHARS (string))
6841         args_out_of_range_3 (string, start, end);
6842       if (! STRING_MULTIBYTE (string))
6843         return Qnil;
6844       p = SDATA (string) + string_char_to_byte (string, from);
6845       pend = SDATA (string) + string_char_to_byte (string, to);
6846     }
6847
6848   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6849
6850   if (NILP (count))
6851     n = 1;
6852   else
6853     {
6854       CHECK_NATNUM (count);
6855       n = XINT (count);
6856     }
6857
6858   if (coding.type == coding_type_no_conversion
6859       || coding.type == coding_type_raw_text)
6860     return Qnil;
6861
6862   if (coding.type == coding_type_undecided)
6863     safe_chars = Qnil;
6864   else
6865     safe_chars = coding_safe_chars (coding_system);
6866
6867   if (STRINGP (string)
6868       || from >= GPT || to <= GPT)
6869     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6870   else
6871     {
6872       Lisp_Object args[2];
6873
6874       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6875       n -= XINT (Flength (args[0]));
6876       if (n <= 0)
6877         positions = args[0];
6878       else
6879         {
6880           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6881                                                pend, n);
6882           positions = Fappend (2, args);
6883         }
6884     }
6885
6886   return  (NILP (count) ? Fcar (positions) : positions);
6887 }
6888
6889
6890 Lisp_Object
6891 code_convert_region1 (start, end, coding_system, encodep)
6892      Lisp_Object start, end, coding_system;
6893      int encodep;
6894 {
6895   struct coding_system coding;
6896   int from, to;
6897
6898   CHECK_NUMBER_COERCE_MARKER (start);
6899   CHECK_NUMBER_COERCE_MARKER (end);
6900   CHECK_SYMBOL (coding_system);
6901
6902   validate_region (&start, &end);
6903   from = XFASTINT (start);
6904   to = XFASTINT (end);
6905
6906   if (NILP (coding_system))
6907     return make_number (to - from);
6908
6909   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6910     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6911
6912   coding.mode |= CODING_MODE_LAST_BLOCK;
6913   coding.src_multibyte = coding.dst_multibyte
6914     = !NILP (current_buffer->enable_multibyte_characters);
6915   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6916                        &coding, encodep, 1);
6917   Vlast_coding_system_used = coding.symbol;
6918   return make_number (coding.produced_char);
6919 }
6920
6921 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6922        3, 3, "r\nzCoding system: ",
6923        doc: /* Decode the current region from the specified coding system.
6924 When called from a program, takes three arguments:
6925 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6926 This function sets `last-coding-system-used' to the precise coding system
6927 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6928 not fully specified.)
6929 It returns the length of the decoded text.  */)
6930      (start, end, coding_system)
6931      Lisp_Object start, end, coding_system;
6932 {
6933   return code_convert_region1 (start, end, coding_system, 0);
6934 }
6935
6936 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6937        3, 3, "r\nzCoding system: ",
6938        doc: /* Encode the current region into the specified coding system.
6939 When called from a program, takes three arguments:
6940 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6941 This function sets `last-coding-system-used' to the precise coding system
6942 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6943 not fully specified.)
6944 It returns the length of the encoded text.  */)
6945      (start, end, coding_system)
6946      Lisp_Object start, end, coding_system;
6947 {
6948   return code_convert_region1 (start, end, coding_system, 1);
6949 }
6950
6951 Lisp_Object
6952 code_convert_string1 (string, coding_system, nocopy, encodep)
6953      Lisp_Object string, coding_system, nocopy;
6954      int encodep;
6955 {
6956   struct coding_system coding;
6957
6958   CHECK_STRING (string);
6959   CHECK_SYMBOL (coding_system);
6960
6961   if (NILP (coding_system))
6962     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6963
6964   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6965     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6966
6967   coding.mode |= CODING_MODE_LAST_BLOCK;
6968   string = (encodep
6969             ? encode_coding_string (string, &coding, !NILP (nocopy))
6970             : decode_coding_string (string, &coding, !NILP (nocopy)));
6971   Vlast_coding_system_used = coding.symbol;
6972
6973   return string;
6974 }
6975
6976 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6977        2, 3, 0,
6978        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6979 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6980 if the decoding operation is trivial.
6981 This function sets `last-coding-system-used' to the precise coding system
6982 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6983 not fully specified.)  */)
6984      (string, coding_system, nocopy)
6985      Lisp_Object string, coding_system, nocopy;
6986 {
6987   return code_convert_string1 (string, coding_system, nocopy, 0);
6988 }
6989
6990 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6991        2, 3, 0,
6992        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6993 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6994 if the encoding operation is trivial.
6995 This function sets `last-coding-system-used' to the precise coding system
6996 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6997 not fully specified.)  */)
6998      (string, coding_system, nocopy)
6999      Lisp_Object string, coding_system, nocopy;
7000 {
7001   return code_convert_string1 (string, coding_system, nocopy, 1);
7002 }
7003
7004 /* Encode or decode STRING according to CODING_SYSTEM.
7005    Do not set Vlast_coding_system_used.
7006
7007    This function is called only from macros DECODE_FILE and
7008    ENCODE_FILE, thus we ignore character composition.  */
7009
7010 Lisp_Object
7011 code_convert_string_norecord (string, coding_system, encodep)
7012      Lisp_Object string, coding_system;
7013      int encodep;
7014 {
7015   struct coding_system coding;
7016
7017   CHECK_STRING (string);
7018   CHECK_SYMBOL (coding_system);
7019
7020   if (NILP (coding_system))
7021     return string;
7022
7023   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7024     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7025
7026   coding.composing = COMPOSITION_DISABLED;
7027   coding.mode |= CODING_MODE_LAST_BLOCK;
7028   return (encodep
7029           ? encode_coding_string (string, &coding, 1)
7030           : decode_coding_string (string, &coding, 1));
7031 }
7032 \f
7033 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7034        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7035 Return the corresponding character.  */)
7036      (code)
7037      Lisp_Object code;
7038 {
7039   unsigned char c1, c2, s1, s2;
7040   Lisp_Object val;
7041
7042   CHECK_NUMBER (code);
7043   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7044   if (s1 == 0)
7045     {
7046       if (s2 < 0x80)
7047         XSETFASTINT (val, s2);
7048       else if (s2 >= 0xA0 || s2 <= 0xDF)
7049         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7050       else
7051         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7052     }
7053   else
7054     {
7055       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7056           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7057         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7058       DECODE_SJIS (s1, s2, c1, c2);
7059       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7060     }
7061   return val;
7062 }
7063
7064 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7065        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7066 Return the corresponding code in SJIS.  */)
7067      (ch)
7068      Lisp_Object ch;
7069 {
7070   int charset, c1, c2, s1, s2;
7071   Lisp_Object val;
7072
7073   CHECK_NUMBER (ch);
7074   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7075   if (charset == CHARSET_ASCII)
7076     {
7077       val = ch;
7078     }
7079   else if (charset == charset_jisx0208
7080            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7081     {
7082       ENCODE_SJIS (c1, c2, s1, s2);
7083       XSETFASTINT (val, (s1 << 8) | s2);
7084     }
7085   else if (charset == charset_katakana_jisx0201
7086            && c1 > 0x20 && c2 < 0xE0)
7087     {
7088       XSETFASTINT (val, c1 | 0x80);
7089     }
7090   else
7091     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7092   return val;
7093 }
7094
7095 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7096        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7097 Return the corresponding character.  */)
7098      (code)
7099      Lisp_Object code;
7100 {
7101   int charset;
7102   unsigned char b1, b2, c1, c2;
7103   Lisp_Object val;
7104
7105   CHECK_NUMBER (code);
7106   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7107   if (b1 == 0)
7108     {
7109       if (b2 >= 0x80)
7110         error ("Invalid BIG5 code: %x", XFASTINT (code));
7111       val = code;
7112     }
7113   else
7114     {
7115       if ((b1 < 0xA1 || b1 > 0xFE)
7116           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7117         error ("Invalid BIG5 code: %x", XFASTINT (code));
7118       DECODE_BIG5 (b1, b2, charset, c1, c2);
7119       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7120     }
7121   return val;
7122 }
7123
7124 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7125        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7126 Return the corresponding character code in Big5.  */)
7127      (ch)
7128      Lisp_Object ch;
7129 {
7130   int charset, c1, c2, b1, b2;
7131   Lisp_Object val;
7132
7133   CHECK_NUMBER (ch);
7134   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7135   if (charset == CHARSET_ASCII)
7136     {
7137       val = ch;
7138     }
7139   else if ((charset == charset_big5_1
7140             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7141            || (charset == charset_big5_2
7142                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7143     {
7144       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7145       XSETFASTINT (val, (b1 << 8) | b2);
7146     }
7147   else
7148     error ("Can't encode to Big5: %d", XFASTINT (ch));
7149   return val;
7150 }
7151 \f
7152 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7153        Sset_terminal_coding_system_internal, 1, 1, 0,
7154        doc: /* Internal use only.  */)
7155      (coding_system)
7156      Lisp_Object coding_system;
7157 {
7158   CHECK_SYMBOL (coding_system);
7159   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7160   /* We had better not send unsafe characters to terminal.  */
7161   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7162   /* Character composition should be disabled.  */
7163   terminal_coding.composing = COMPOSITION_DISABLED;
7164   /* Error notification should be suppressed.  */
7165   terminal_coding.suppress_error = 1;
7166   terminal_coding.src_multibyte = 1;
7167   terminal_coding.dst_multibyte = 0;
7168   return Qnil;
7169 }
7170
7171 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7172        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7173        doc: /* Internal use only.  */)
7174      (coding_system)
7175      Lisp_Object coding_system;
7176 {
7177   CHECK_SYMBOL (coding_system);
7178   setup_coding_system (Fcheck_coding_system (coding_system),
7179                        &safe_terminal_coding);
7180   /* Character composition should be disabled.  */
7181   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7182   /* Error notification should be suppressed.  */
7183   terminal_coding.suppress_error = 1;
7184   safe_terminal_coding.src_multibyte = 1;
7185   safe_terminal_coding.dst_multibyte = 0;
7186   return Qnil;
7187 }
7188
7189 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7190        Sterminal_coding_system, 0, 0, 0,
7191        doc: /* Return coding system specified for terminal output.  */)
7192      ()
7193 {
7194   return terminal_coding.symbol;
7195 }
7196
7197 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7198        Sset_keyboard_coding_system_internal, 1, 1, 0,
7199        doc: /* Internal use only.  */)
7200      (coding_system)
7201      Lisp_Object coding_system;
7202 {
7203   CHECK_SYMBOL (coding_system);
7204   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7205   /* Character composition should be disabled.  */
7206   keyboard_coding.composing = COMPOSITION_DISABLED;
7207   return Qnil;
7208 }
7209
7210 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7211        Skeyboard_coding_system, 0, 0, 0,
7212        doc: /* Return coding system specified for decoding keyboard input.  */)
7213      ()
7214 {
7215   return keyboard_coding.symbol;
7216 }
7217
7218 \f
7219 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7220        Sfind_operation_coding_system,  1, MANY, 0,
7221        doc: /* Choose a coding system for an operation based on the target name.
7222 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7223 DECODING-SYSTEM is the coding system to use for decoding
7224 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7225 for encoding (in case OPERATION does encoding).
7226
7227 The first argument OPERATION specifies an I/O primitive:
7228   For file I/O, `insert-file-contents' or `write-region'.
7229   For process I/O, `call-process', `call-process-region', or `start-process'.
7230   For network I/O, `open-network-stream'.
7231
7232 The remaining arguments should be the same arguments that were passed
7233 to the primitive.  Depending on which primitive, one of those arguments
7234 is selected as the TARGET.  For example, if OPERATION does file I/O,
7235 whichever argument specifies the file name is TARGET.
7236
7237 TARGET has a meaning which depends on OPERATION:
7238   For file I/O, TARGET is a file name.
7239   For process I/O, TARGET is a process name.
7240   For network I/O, TARGET is a service name or a port number
7241
7242 This function looks up what specified for TARGET in,
7243 `file-coding-system-alist', `process-coding-system-alist',
7244 or `network-coding-system-alist' depending on OPERATION.
7245 They may specify a coding system, a cons of coding systems,
7246 or a function symbol to call.
7247 In the last case, we call the function with one argument,
7248 which is a list of all the arguments given to this function.
7249
7250 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7251      (nargs, args)
7252      int nargs;
7253      Lisp_Object *args;
7254 {
7255   Lisp_Object operation, target_idx, target, val;
7256   register Lisp_Object chain;
7257
7258   if (nargs < 2)
7259     error ("Too few arguments");
7260   operation = args[0];
7261   if (!SYMBOLP (operation)
7262       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7263     error ("Invalid first argument");
7264   if (nargs < 1 + XINT (target_idx))
7265     error ("Too few arguments for operation: %s",
7266            SDATA (SYMBOL_NAME (operation)));
7267   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7268      argument to write-region) is string, it must be treated as a
7269      target file name.  */
7270   if (EQ (operation, Qwrite_region)
7271       && nargs > 5
7272       && STRINGP (args[5]))
7273     target_idx = make_number (4);
7274   target = args[XINT (target_idx) + 1];
7275   if (!(STRINGP (target)
7276         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7277     error ("Invalid argument %d", XINT (target_idx) + 1);
7278
7279   chain = ((EQ (operation, Qinsert_file_contents)
7280             || EQ (operation, Qwrite_region))
7281            ? Vfile_coding_system_alist
7282            : (EQ (operation, Qopen_network_stream)
7283               ? Vnetwork_coding_system_alist
7284               : Vprocess_coding_system_alist));
7285   if (NILP (chain))
7286     return Qnil;
7287
7288   for (; CONSP (chain); chain = XCDR (chain))
7289     {
7290       Lisp_Object elt;
7291       elt = XCAR (chain);
7292
7293       if (CONSP (elt)
7294           && ((STRINGP (target)
7295                && STRINGP (XCAR (elt))
7296                && fast_string_match (XCAR (elt), target) >= 0)
7297               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7298         {
7299           val = XCDR (elt);
7300           /* Here, if VAL is both a valid coding system and a valid
7301              function symbol, we return VAL as a coding system.  */
7302           if (CONSP (val))
7303             return val;
7304           if (! SYMBOLP (val))
7305             return Qnil;
7306           if (! NILP (Fcoding_system_p (val)))
7307             return Fcons (val, val);
7308           if (! NILP (Ffboundp (val)))
7309             {
7310               val = call1 (val, Flist (nargs, args));
7311               if (CONSP (val))
7312                 return val;
7313               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7314                 return Fcons (val, val);
7315             }
7316           return Qnil;
7317         }
7318     }
7319   return Qnil;
7320 }
7321
7322 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7323        Supdate_coding_systems_internal, 0, 0, 0,
7324        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7325 When values of any coding categories are changed, you must
7326 call this function.  */)
7327      ()
7328 {
7329   int i;
7330
7331   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7332     {
7333       Lisp_Object val;
7334
7335       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7336       if (!NILP (val))
7337         {
7338           if (! coding_system_table[i])
7339             coding_system_table[i] = ((struct coding_system *)
7340                                       xmalloc (sizeof (struct coding_system)));
7341           setup_coding_system (val, coding_system_table[i]);
7342         }
7343       else if (coding_system_table[i])
7344         {
7345           xfree (coding_system_table[i]);
7346           coding_system_table[i] = NULL;
7347         }
7348     }
7349
7350   return Qnil;
7351 }
7352
7353 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7354        Sset_coding_priority_internal, 0, 0, 0,
7355        doc: /* Update internal database for the current value of `coding-category-list'.
7356 This function is internal use only.  */)
7357      ()
7358 {
7359   int i = 0, idx;
7360   Lisp_Object val;
7361
7362   val = Vcoding_category_list;
7363
7364   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7365     {
7366       if (! SYMBOLP (XCAR (val)))
7367         break;
7368       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7369       if (idx >= CODING_CATEGORY_IDX_MAX)
7370         break;
7371       coding_priorities[i++] = (1 << idx);
7372       val = XCDR (val);
7373     }
7374   /* If coding-category-list is valid and contains all coding
7375      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7376      the following code saves Emacs from crashing.  */
7377   while (i < CODING_CATEGORY_IDX_MAX)
7378     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7379
7380   return Qnil;
7381 }
7382
7383 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7384        Sdefine_coding_system_internal, 1, 1, 0,
7385        doc: /* Register CODING-SYSTEM as a base coding system.
7386 This function is internal use only.  */)
7387      (coding_system)
7388      Lisp_Object coding_system;
7389 {
7390   Lisp_Object safe_chars, slot;
7391
7392   if (NILP (Fcheck_coding_system (coding_system)))
7393     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7394   safe_chars = coding_safe_chars (coding_system);
7395   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7396     error ("No valid safe-chars property for %s",
7397            SDATA (SYMBOL_NAME (coding_system)));
7398   if (EQ (safe_chars, Qt))
7399     {
7400       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7401         XSETCAR (Vcoding_system_safe_chars,
7402                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7403     }
7404   else
7405     {
7406       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7407       if (NILP (slot))
7408         XSETCDR (Vcoding_system_safe_chars,
7409                  nconc2 (XCDR (Vcoding_system_safe_chars),
7410                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7411       else
7412         XSETCDR (slot, safe_chars);
7413     }
7414   return Qnil;
7415 }
7416
7417 #endif /* emacs */
7418
7419 \f
7420 /*** 9. Post-amble ***/
7421
7422 void
7423 init_coding_once ()
7424 {
7425   int i;
7426
7427   /* Emacs' internal format specific initialize routine.  */
7428   for (i = 0; i <= 0x20; i++)
7429     emacs_code_class[i] = EMACS_control_code;
7430   emacs_code_class[0x0A] = EMACS_linefeed_code;
7431   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7432   for (i = 0x21 ; i < 0x7F; i++)
7433     emacs_code_class[i] = EMACS_ascii_code;
7434   emacs_code_class[0x7F] = EMACS_control_code;
7435   for (i = 0x80; i < 0xFF; i++)
7436     emacs_code_class[i] = EMACS_invalid_code;
7437   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7438   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7439   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7440   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7441
7442   /* ISO2022 specific initialize routine.  */
7443   for (i = 0; i < 0x20; i++)
7444     iso_code_class[i] = ISO_control_0;
7445   for (i = 0x21; i < 0x7F; i++)
7446     iso_code_class[i] = ISO_graphic_plane_0;
7447   for (i = 0x80; i < 0xA0; i++)
7448     iso_code_class[i] = ISO_control_1;
7449   for (i = 0xA1; i < 0xFF; i++)
7450     iso_code_class[i] = ISO_graphic_plane_1;
7451   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7452   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7453   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7454   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7455   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7456   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7457   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7458   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7459   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7460   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7461
7462   setup_coding_system (Qnil, &keyboard_coding);
7463   setup_coding_system (Qnil, &terminal_coding);
7464   setup_coding_system (Qnil, &safe_terminal_coding);
7465   setup_coding_system (Qnil, &default_buffer_file_coding);
7466
7467   bzero (coding_system_table, sizeof coding_system_table);
7468
7469   bzero (ascii_skip_code, sizeof ascii_skip_code);
7470   for (i = 0; i < 128; i++)
7471     ascii_skip_code[i] = 1;
7472
7473 #if defined (MSDOS) || defined (WINDOWSNT)
7474   system_eol_type = CODING_EOL_CRLF;
7475 #else
7476   system_eol_type = CODING_EOL_LF;
7477 #endif
7478
7479   inhibit_pre_post_conversion = 0;
7480 }
7481
7482 #ifdef emacs
7483
7484 void
7485 syms_of_coding ()
7486 {
7487   Qtarget_idx = intern ("target-idx");
7488   staticpro (&Qtarget_idx);
7489
7490   Qcoding_system_history = intern ("coding-system-history");
7491   staticpro (&Qcoding_system_history);
7492   Fset (Qcoding_system_history, Qnil);
7493
7494   /* Target FILENAME is the first argument.  */
7495   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7496   /* Target FILENAME is the third argument.  */
7497   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7498
7499   Qcall_process = intern ("call-process");
7500   staticpro (&Qcall_process);
7501   /* Target PROGRAM is the first argument.  */
7502   Fput (Qcall_process, Qtarget_idx, make_number (0));
7503
7504   Qcall_process_region = intern ("call-process-region");
7505   staticpro (&Qcall_process_region);
7506   /* Target PROGRAM is the third argument.  */
7507   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7508
7509   Qstart_process = intern ("start-process");
7510   staticpro (&Qstart_process);
7511   /* Target PROGRAM is the third argument.  */
7512   Fput (Qstart_process, Qtarget_idx, make_number (2));
7513
7514   Qopen_network_stream = intern ("open-network-stream");
7515   staticpro (&Qopen_network_stream);
7516   /* Target SERVICE is the fourth argument.  */
7517   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7518
7519   Qcoding_system = intern ("coding-system");
7520   staticpro (&Qcoding_system);
7521
7522   Qeol_type = intern ("eol-type");
7523   staticpro (&Qeol_type);
7524
7525   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7526   staticpro (&Qbuffer_file_coding_system);
7527
7528   Qpost_read_conversion = intern ("post-read-conversion");
7529   staticpro (&Qpost_read_conversion);
7530
7531   Qpre_write_conversion = intern ("pre-write-conversion");
7532   staticpro (&Qpre_write_conversion);
7533
7534   Qno_conversion = intern ("no-conversion");
7535   staticpro (&Qno_conversion);
7536
7537   Qundecided = intern ("undecided");
7538   staticpro (&Qundecided);
7539
7540   Qcoding_system_p = intern ("coding-system-p");
7541   staticpro (&Qcoding_system_p);
7542
7543   Qcoding_system_error = intern ("coding-system-error");
7544   staticpro (&Qcoding_system_error);
7545
7546   Fput (Qcoding_system_error, Qerror_conditions,
7547         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7548   Fput (Qcoding_system_error, Qerror_message,
7549         build_string ("Invalid coding system"));
7550
7551   Qcoding_category = intern ("coding-category");
7552   staticpro (&Qcoding_category);
7553   Qcoding_category_index = intern ("coding-category-index");
7554   staticpro (&Qcoding_category_index);
7555
7556   Vcoding_category_table
7557     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7558   staticpro (&Vcoding_category_table);
7559   {
7560     int i;
7561     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7562       {
7563         XVECTOR (Vcoding_category_table)->contents[i]
7564           = intern (coding_category_name[i]);
7565         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7566               Qcoding_category_index, make_number (i));
7567       }
7568   }
7569
7570   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7571   staticpro (&Vcoding_system_safe_chars);
7572
7573   Qtranslation_table = intern ("translation-table");
7574   staticpro (&Qtranslation_table);
7575   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7576
7577   Qtranslation_table_id = intern ("translation-table-id");
7578   staticpro (&Qtranslation_table_id);
7579
7580   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7581   staticpro (&Qtranslation_table_for_decode);
7582
7583   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7584   staticpro (&Qtranslation_table_for_encode);
7585
7586   Qsafe_chars = intern ("safe-chars");
7587   staticpro (&Qsafe_chars);
7588
7589   Qchar_coding_system = intern ("char-coding-system");
7590   staticpro (&Qchar_coding_system);
7591
7592   /* Intern this now in case it isn't already done.
7593      Setting this variable twice is harmless.
7594      But don't staticpro it here--that is done in alloc.c.  */
7595   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7596   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7597   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7598
7599   Qvalid_codes = intern ("valid-codes");
7600   staticpro (&Qvalid_codes);
7601
7602   Qemacs_mule = intern ("emacs-mule");
7603   staticpro (&Qemacs_mule);
7604
7605   Qraw_text = intern ("raw-text");
7606   staticpro (&Qraw_text);
7607
7608   Qutf_8 = intern ("utf-8");
7609   staticpro (&Qutf_8);
7610
7611   Qcoding_system_define_form = intern ("coding-system-define-form");
7612   staticpro (&Qcoding_system_define_form);
7613
7614   defsubr (&Scoding_system_p);
7615   defsubr (&Sread_coding_system);
7616   defsubr (&Sread_non_nil_coding_system);
7617   defsubr (&Scheck_coding_system);
7618   defsubr (&Sdetect_coding_region);
7619   defsubr (&Sdetect_coding_string);
7620   defsubr (&Sfind_coding_systems_region_internal);
7621   defsubr (&Sunencodable_char_position);
7622   defsubr (&Sdecode_coding_region);
7623   defsubr (&Sencode_coding_region);
7624   defsubr (&Sdecode_coding_string);
7625   defsubr (&Sencode_coding_string);
7626   defsubr (&Sdecode_sjis_char);
7627   defsubr (&Sencode_sjis_char);
7628   defsubr (&Sdecode_big5_char);
7629   defsubr (&Sencode_big5_char);
7630   defsubr (&Sset_terminal_coding_system_internal);
7631   defsubr (&Sset_safe_terminal_coding_system_internal);
7632   defsubr (&Sterminal_coding_system);
7633   defsubr (&Sset_keyboard_coding_system_internal);
7634   defsubr (&Skeyboard_coding_system);
7635   defsubr (&Sfind_operation_coding_system);
7636   defsubr (&Supdate_coding_systems_internal);
7637   defsubr (&Sset_coding_priority_internal);
7638   defsubr (&Sdefine_coding_system_internal);
7639
7640   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7641                doc: /* List of coding systems.
7642
7643 Do not alter the value of this variable manually.  This variable should be
7644 updated by the functions `make-coding-system' and
7645 `define-coding-system-alias'.  */);
7646   Vcoding_system_list = Qnil;
7647
7648   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7649                doc: /* Alist of coding system names.
7650 Each element is one element list of coding system name.
7651 This variable is given to `completing-read' as TABLE argument.
7652
7653 Do not alter the value of this variable manually.  This variable should be
7654 updated by the functions `make-coding-system' and
7655 `define-coding-system-alias'.  */);
7656   Vcoding_system_alist = Qnil;
7657
7658   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7659                doc: /* List of coding-categories (symbols) ordered by priority.
7660
7661 On detecting a coding system, Emacs tries code detection algorithms
7662 associated with each coding-category one by one in this order.  When
7663 one algorithm agrees with a byte sequence of source text, the coding
7664 system bound to the corresponding coding-category is selected.  */);
7665   {
7666     int i;
7667
7668     Vcoding_category_list = Qnil;
7669     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7670       Vcoding_category_list
7671         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7672                  Vcoding_category_list);
7673   }
7674
7675   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7676                doc: /* Specify the coding system for read operations.
7677 It is useful to bind this variable with `let', but do not set it globally.
7678 If the value is a coding system, it is used for decoding on read operation.
7679 If not, an appropriate element is used from one of the coding system alists:
7680 There are three such tables, `file-coding-system-alist',
7681 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7682   Vcoding_system_for_read = Qnil;
7683
7684   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7685                doc: /* Specify the coding system for write operations.
7686 Programs bind this variable with `let', but you should not set it globally.
7687 If the value is a coding system, it is used for encoding of output,
7688 when writing it to a file and when sending it to a file or subprocess.
7689
7690 If this does not specify a coding system, an appropriate element
7691 is used from one of the coding system alists:
7692 There are three such tables, `file-coding-system-alist',
7693 `process-coding-system-alist', and `network-coding-system-alist'.
7694 For output to files, if the above procedure does not specify a coding system,
7695 the value of `buffer-file-coding-system' is used.  */);
7696   Vcoding_system_for_write = Qnil;
7697
7698   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7699                doc: /* Coding system used in the latest file or process I/O.
7700 Also set by `encode-coding-region', `decode-coding-region',
7701 `encode-coding-string' and `decode-coding-string'.  */);
7702   Vlast_coding_system_used = Qnil;
7703
7704   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7705                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7706 See info node `Coding Systems' and info node `Text and Binary' concerning
7707 such conversion.  */);
7708   inhibit_eol_conversion = 0;
7709
7710   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7711                doc: /* Non-nil means process buffer inherits coding system of process output.
7712 Bind it to t if the process output is to be treated as if it were a file
7713 read from some filesystem.  */);
7714   inherit_process_coding_system = 0;
7715
7716   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7717                doc: /* Alist to decide a coding system to use for a file I/O operation.
7718 The format is ((PATTERN . VAL) ...),
7719 where PATTERN is a regular expression matching a file name,
7720 VAL is a coding system, a cons of coding systems, or a function symbol.
7721 If VAL is a coding system, it is used for both decoding and encoding
7722 the file contents.
7723 If VAL is a cons of coding systems, the car part is used for decoding,
7724 and the cdr part is used for encoding.
7725 If VAL is a function symbol, the function must return a coding system
7726 or a cons of coding systems which are used as above.  The function gets
7727 the arguments with which `find-operation-coding-system' was called.
7728
7729 See also the function `find-operation-coding-system'
7730 and the variable `auto-coding-alist'.  */);
7731   Vfile_coding_system_alist = Qnil;
7732
7733   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7734     doc: /* Alist to decide a coding system to use for a process I/O operation.
7735 The format is ((PATTERN . VAL) ...),
7736 where PATTERN is a regular expression matching a program name,
7737 VAL is a coding system, a cons of coding systems, or a function symbol.
7738 If VAL is a coding system, it is used for both decoding what received
7739 from the program and encoding what sent to the program.
7740 If VAL is a cons of coding systems, the car part is used for decoding,
7741 and the cdr part is used for encoding.
7742 If VAL is a function symbol, the function must return a coding system
7743 or a cons of coding systems which are used as above.
7744
7745 See also the function `find-operation-coding-system'.  */);
7746   Vprocess_coding_system_alist = Qnil;
7747
7748   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7749     doc: /* Alist to decide a coding system to use for a network I/O operation.
7750 The format is ((PATTERN . VAL) ...),
7751 where PATTERN is a regular expression matching a network service name
7752 or is a port number to connect to,
7753 VAL is a coding system, a cons of coding systems, or a function symbol.
7754 If VAL is a coding system, it is used for both decoding what received
7755 from the network stream and encoding what sent to the network stream.
7756 If VAL is a cons of coding systems, the car part is used for decoding,
7757 and the cdr part is used for encoding.
7758 If VAL is a function symbol, the function must return a coding system
7759 or a cons of coding systems which are used as above.
7760
7761 See also the function `find-operation-coding-system'.  */);
7762   Vnetwork_coding_system_alist = Qnil;
7763
7764   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7765                doc: /* Coding system to use with system messages.
7766 Also used for decoding keyboard input on X Window system.  */);
7767   Vlocale_coding_system = Qnil;
7768
7769   /* The eol mnemonics are reset in startup.el system-dependently.  */
7770   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7771                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7772   eol_mnemonic_unix = build_string (":");
7773
7774   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7775                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7776   eol_mnemonic_dos = build_string ("\\");
7777
7778   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7779                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7780   eol_mnemonic_mac = build_string ("/");
7781
7782   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7783                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7784   eol_mnemonic_undecided = build_string (":");
7785
7786   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7787                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7788   Venable_character_translation = Qt;
7789
7790   DEFVAR_LISP ("standard-translation-table-for-decode",
7791                &Vstandard_translation_table_for_decode,
7792                doc: /* Table for translating characters while decoding.  */);
7793   Vstandard_translation_table_for_decode = Qnil;
7794
7795   DEFVAR_LISP ("standard-translation-table-for-encode",
7796                &Vstandard_translation_table_for_encode,
7797                doc: /* Table for translating characters while encoding.  */);
7798   Vstandard_translation_table_for_encode = Qnil;
7799
7800   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7801                doc: /* Alist of charsets vs revision numbers.
7802 While encoding, if a charset (car part of an element) is found,
7803 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7804   Vcharset_revision_alist = Qnil;
7805
7806   DEFVAR_LISP ("default-process-coding-system",
7807                &Vdefault_process_coding_system,
7808                doc: /* Cons of coding systems used for process I/O by default.
7809 The car part is used for decoding a process output,
7810 the cdr part is used for encoding a text to be sent to a process.  */);
7811   Vdefault_process_coding_system = Qnil;
7812
7813   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7814                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7815 This is a vector of length 256.
7816 If Nth element is non-nil, the existence of code N in a file
7817 \(or output of subprocess) doesn't prevent it to be detected as
7818 a coding system of ISO 2022 variant which has a flag
7819 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7820 or reading output of a subprocess.
7821 Only 128th through 159th elements has a meaning.  */);
7822   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7823
7824   DEFVAR_LISP ("select-safe-coding-system-function",
7825                &Vselect_safe_coding_system_function,
7826                doc: /* Function to call to select safe coding system for encoding a text.
7827
7828 If set, this function is called to force a user to select a proper
7829 coding system which can encode the text in the case that a default
7830 coding system used in each operation can't encode the text.
7831
7832 The default value is `select-safe-coding-system' (which see).  */);
7833   Vselect_safe_coding_system_function = Qnil;
7834
7835   DEFVAR_BOOL ("coding-system-require-warning",
7836                &coding_system_require_warning,
7837                doc: /* Internal use only.
7838 If non-nil, on writing a file, `select-safe-coding-system-function' is
7839 called even if `coding-system-for-write' is non-nil.  The command
7840 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7841   coding_system_require_warning = 0;
7842
7843
7844   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7845                &inhibit_iso_escape_detection,
7846                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7847
7848 By default, on reading a file, Emacs tries to detect how the text is
7849 encoded.  This code detection is sensitive to escape sequences.  If
7850 the sequence is valid as ISO2022, the code is determined as one of
7851 the ISO2022 encodings, and the file is decoded by the corresponding
7852 coding system (e.g. `iso-2022-7bit').
7853
7854 However, there may be a case that you want to read escape sequences in
7855 a file as is.  In such a case, you can set this variable to non-nil.
7856 Then, as the code detection ignores any escape sequences, no file is
7857 detected as encoded in some ISO2022 encoding.  The result is that all
7858 escape sequences become visible in a buffer.
7859
7860 The default value is nil, and it is strongly recommended not to change
7861 it.  That is because many Emacs Lisp source files that contain
7862 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7863 in Emacs's distribution, and they won't be decoded correctly on
7864 reading if you suppress escape sequence detection.
7865
7866 The other way to read escape sequences in a file without decoding is
7867 to explicitly specify some coding system that doesn't use ISO2022's
7868 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7869   inhibit_iso_escape_detection = 0;
7870
7871   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7872                doc: /* Char table for translating self-inserting characters.
7873 This is applied to the result of input methods, not their input.  See also
7874 `keyboard-translate-table'.  */);
7875     Vtranslation_table_for_input = Qnil;
7876 }
7877
7878 char *
7879 emacs_strerror (error_number)
7880      int error_number;
7881 {
7882   char *str;
7883
7884   synchronize_system_messages_locale ();
7885   str = strerror (error_number);
7886
7887   if (! NILP (Vlocale_coding_system))
7888     {
7889       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7890                                                       Vlocale_coding_system,
7891                                                       0);
7892       str = (char *) SDATA (dec);
7893     }
7894
7895   return str;
7896 }
7897
7898 #endif /* emacs */
7899
7900 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
7901    (do not change this comment) */