src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 116   of the range 0x80..0x9F are in multibyte form.  */
 117 #if 0
 118 int
 119 detect_coding_emacs_mule (src, src_end, multibytep)
 120      unsigned char *src, *src_end;
 121      int multibytep;
 122 {
 123   ...
 124 }
 125 #endif
 126
 127 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 128
 129   These functions decode SRC_BYTES length of unibyte text at SOURCE
 130   encoded in CODING to Emacs' internal format.  The resulting
 131   multibyte text goes to a place pointed to by DESTINATION, the length
 132   of which should not exceed DST_BYTES.
 133
 134   These functions set the information of original and decoded texts in
 135   the members produced, produced_char, consumed, and consumed_char of
 136   the structure *CODING.  They also set the member result to one of
 137   CODING_FINISH_XXX indicating how the decoding finished.
 138
 139   DST_BYTES zero means that source area and destination area are
 140   overlapped, which means that we can produce a decoded text until it
 141   reaches at the head of not-yet-decoded source text.
 142
 143   Below is a template of these functions.  */
 144 #if 0
 145 static void
 146 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 147      struct coding_system *coding;
 148      unsigned char *source, *destination;
 149      int src_bytes, dst_bytes;
 150 {
 151   ...
 152 }
 153 #endif
 154
 155 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 156
 157   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 158   internal multibyte format to CODING.  The resulting unibyte text
 159   goes to a place pointed to by DESTINATION, the length of which
 160   should not exceed DST_BYTES.
 161
 162   These functions set the information of original and encoded texts in
 163   the members produced, produced_char, consumed, and consumed_char of
 164   the structure *CODING.  They also set the member result to one of
 165   CODING_FINISH_XXX indicating how the encoding finished.
 166
 167   DST_BYTES zero means that source area and destination area are
 168   overlapped, which means that we can produce a encoded text until it
 169   reaches at the head of not-yet-encoded source text.
 170
 171   Below is a template of these functions.  */
 172 #if 0
 173 static void
 174 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 175      struct coding_system *coding;
 176      unsigned char *source, *destination;
 177      int src_bytes, dst_bytes;
 178 {
 179   ...
 180 }
 181 #endif
 182
 183 /*** COMMONLY USED MACROS ***/
 184
 185 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 186    get one, two, and three bytes from the source text respectively.
 187    If there are not enough bytes in the source, they jump to
 188    `label_end_of_loop'.  The caller should set variables `coding',
 189    `src' and `src_end' to appropriate pointer in advance.  These
 190    macros are called from decoding routines `decode_coding_XXX', thus
 191    it is assumed that the source text is unibyte.  */
 192
 193 #define ONE_MORE_BYTE(c1)                                       \
 194   do {                                                          \
 195     if (src >= src_end)                                         \
 196       {                                                         \
 197         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 198         goto label_end_of_loop;                                 \
 199       }                                                         \
 200     c1 = *src++;                                                \
 201   } while (0)
 202
 203 #define TWO_MORE_BYTES(c1, c2)                                  \
 204   do {                                                          \
 205     if (src + 1 >= src_end)                                     \
 206       {                                                         \
 207         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 208         goto label_end_of_loop;                                 \
 209       }                                                         \
 210     c1 = *src++;                                                \
 211     c2 = *src++;                                                \
 212   } while (0)
 213
 214
 215 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 216    form if MULTIBYTEP is nonzero.  */
 217
 218 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 219   do {                                                          \
 220     if (src >= src_end)                                         \
 221       {                                                         \
 222         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 223         goto label_end_of_loop;                                 \
 224       }                                                         \
 225     c1 = *src++;                                                \
 226     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 227       c1 = *src++ - 0x20;                                       \
 228   } while (0)
 229
 230 /* Set C to the next character at the source text pointed by `src'.
 231    If there are not enough characters in the source, jump to
 232    `label_end_of_loop'.  The caller should set variables `coding'
 233    `src', `src_end', and `translation_table' to appropriate pointers
 234    in advance.  This macro is used in encoding routines
 235    `encode_coding_XXX', thus it assumes that the source text is in
 236    multibyte form except for 8-bit characters.  8-bit characters are
 237    in multibyte form if coding->src_multibyte is nonzero, else they
 238    are represented by a single byte.  */
 239
 240 #define ONE_MORE_CHAR(c)                                        \
 241   do {                                                          \
 242     int len = src_end - src;                                    \
 243     int bytes;                                                  \
 244     if (len <= 0)                                               \
 245       {                                                         \
 246         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 247         goto label_end_of_loop;                                 \
 248       }                                                         \
 249     if (coding->src_multibyte                                   \
 250         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 251       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 252     else                                                        \
 253       c = *src, bytes = 1;                                      \
 254     if (!NILP (translation_table))                              \
 255       c = translate_char (translation_table, c, -1, 0, 0);      \
 256     src += bytes;                                               \
 257   } while (0)
 258
 259
 260 /* Produce a multibyte form of characater C to `dst'.  Jump to
 261    `label_end_of_loop' if there's not enough space at `dst'.
 262
 263    If we are now in the middle of composition sequence, the decoded
 264    character may be ALTCHAR (for the current composition).  In that
 265    case, the character goes to coding->cmp_data->data instead of
 266    `dst'.
 267
 268    This macro is used in decoding routines.  */
 269
 270 #define EMIT_CHAR(c)                                                    \
 271   do {                                                                  \
 272     if (! COMPOSING_P (coding)                                          \
 273         || coding->composing == COMPOSITION_RELATIVE                    \
 274         || coding->composing == COMPOSITION_WITH_RULE)                  \
 275       {                                                                 \
 276         int bytes = CHAR_BYTES (c);                                     \
 277         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 278           {                                                             \
 279             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 280             goto label_end_of_loop;                                     \
 281           }                                                             \
 282         dst += CHAR_STRING (c, dst);                                    \
 283         coding->produced_char++;                                        \
 284       }                                                                 \
 285                                                                         \
 286     if (COMPOSING_P (coding)                                            \
 287         && coding->composing != COMPOSITION_RELATIVE)                   \
 288       {                                                                 \
 289         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 290         coding->composition_rule_follows                                \
 291           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 292       }                                                                 \
 293   } while (0)
 294
 295
 296 #define EMIT_ONE_BYTE(c)                                        \
 297   do {                                                          \
 298     if (dst >= (dst_bytes ? dst_end : src))                     \
 299       {                                                         \
 300         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 301         goto label_end_of_loop;                                 \
 302       }                                                         \
 303     *dst++ = c;                                                 \
 304   } while (0)
 305
 306 #define EMIT_TWO_BYTES(c1, c2)                                  \
 307   do {                                                          \
 308     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 309       {                                                         \
 310         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 311         goto label_end_of_loop;                                 \
 312       }                                                         \
 313     *dst++ = c1, *dst++ = c2;                                   \
 314   } while (0)
 315
 316 #define EMIT_BYTES(from, to)                                    \
 317   do {                                                          \
 318     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 319       {                                                         \
 320         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 321         goto label_end_of_loop;                                 \
 322       }                                                         \
 323     while (from < to)                                           \
 324       *dst++ = *from++;                                         \
 325   } while (0)
 326
 327 \f
 328 /*** 1. Preamble ***/
 329
 330 #ifdef emacs
 331 #include <config.h>
 332 #endif
 333
 334 #include <stdio.h>
 335
 336 #ifdef emacs
 337
 338 #include "lisp.h"
 339 #include "buffer.h"
 340 #include "charset.h"
 341 #include "composite.h"
 342 #include "ccl.h"
 343 #include "coding.h"
 344 #include "window.h"
 345
 346 #else  /* not emacs */
 347
 348 #include "mulelib.h"
 349
 350 #endif /* not emacs */
 351
 352 Lisp_Object Qcoding_system, Qeol_type;
 353 Lisp_Object Qbuffer_file_coding_system;
 354 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 355 Lisp_Object Qno_conversion, Qundecided;
 356 Lisp_Object Qcoding_system_history;
 357 Lisp_Object Qsafe_chars;
 358 Lisp_Object Qvalid_codes;
 359
 360 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 361 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 362 Lisp_Object Qstart_process, Qopen_network_stream;
 363 Lisp_Object Qtarget_idx;
 364
 365 Lisp_Object Vselect_safe_coding_system_function;
 366
 367 /* Mnemonic string for each format of end-of-line.  */
 368 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 369 /* Mnemonic string to indicate format of end-of-line is not yet
 370    decided.  */
 371 Lisp_Object eol_mnemonic_undecided;
 372
 373 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 374    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 375 int system_eol_type;
 376
 377 #ifdef emacs
 378
 379 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 380
 381 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 382
 383 /* Coding system emacs-mule and raw-text are for converting only
 384    end-of-line format.  */
 385 Lisp_Object Qemacs_mule, Qraw_text;
 386
 387 /* Coding-systems are handed between Emacs Lisp programs and C internal
 388    routines by the following three variables.  */
 389 /* Coding-system for reading files and receiving data from process.  */
 390 Lisp_Object Vcoding_system_for_read;
 391 /* Coding-system for writing files and sending data to process.  */
 392 Lisp_Object Vcoding_system_for_write;
 393 /* Coding-system actually used in the latest I/O.  */
 394 Lisp_Object Vlast_coding_system_used;
 395
 396 /* A vector of length 256 which contains information about special
 397    Latin codes (especially for dealing with Microsoft codes).  */
 398 Lisp_Object Vlatin_extra_code_table;
 399
 400 /* Flag to inhibit code conversion of end-of-line format.  */
 401 int inhibit_eol_conversion;
 402
 403 /* Flag to inhibit ISO2022 escape sequence detection.  */
 404 int inhibit_iso_escape_detection;
 405
 406 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 407 int inherit_process_coding_system;
 408
 409 /* Coding system to be used to encode text for terminal display.  */
 410 struct coding_system terminal_coding;
 411
 412 /* Coding system to be used to encode text for terminal display when
 413    terminal coding system is nil.  */
 414 struct coding_system safe_terminal_coding;
 415
 416 /* Coding system of what is sent from terminal keyboard.  */
 417 struct coding_system keyboard_coding;
 418
 419 /* Default coding system to be used to write a file.  */
 420 struct coding_system default_buffer_file_coding;
 421
 422 Lisp_Object Vfile_coding_system_alist;
 423 Lisp_Object Vprocess_coding_system_alist;
 424 Lisp_Object Vnetwork_coding_system_alist;
 425
 426 Lisp_Object Vlocale_coding_system;
 427
 428 #endif /* emacs */
 429
 430 Lisp_Object Qcoding_category, Qcoding_category_index;
 431
 432 /* List of symbols `coding-category-xxx' ordered by priority.  */
 433 Lisp_Object Vcoding_category_list;
 434
 435 /* Table of coding categories (Lisp symbols).  */
 436 Lisp_Object Vcoding_category_table;
 437
 438 /* Table of names of symbol for each coding-category.  */
 439 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 440   "coding-category-emacs-mule",
 441   "coding-category-sjis",
 442   "coding-category-iso-7",
 443   "coding-category-iso-7-tight",
 444   "coding-category-iso-8-1",
 445   "coding-category-iso-8-2",
 446   "coding-category-iso-7-else",
 447   "coding-category-iso-8-else",
 448   "coding-category-ccl",
 449   "coding-category-big5",
 450   "coding-category-utf-8",
 451   "coding-category-utf-16-be",
 452   "coding-category-utf-16-le",
 453   "coding-category-raw-text",
 454   "coding-category-binary"
 455 };
 456
 457 /* Table of pointers to coding systems corresponding to each coding
 458    categories.  */
 459 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 460
 461 /* Table of coding category masks.  Nth element is a mask for a coding
 462    cateogry of which priority is Nth.  */
 463 static
 464 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 465
 466 /* Flag to tell if we look up translation table on character code
 467    conversion.  */
 468 Lisp_Object Venable_character_translation;
 469 /* Standard translation table to look up on decoding (reading).  */
 470 Lisp_Object Vstandard_translation_table_for_decode;
 471 /* Standard translation table to look up on encoding (writing).  */
 472 Lisp_Object Vstandard_translation_table_for_encode;
 473
 474 Lisp_Object Qtranslation_table;
 475 Lisp_Object Qtranslation_table_id;
 476 Lisp_Object Qtranslation_table_for_decode;
 477 Lisp_Object Qtranslation_table_for_encode;
 478
 479 /* Alist of charsets vs revision number.  */
 480 Lisp_Object Vcharset_revision_alist;
 481
 482 /* Default coding systems used for process I/O.  */
 483 Lisp_Object Vdefault_process_coding_system;
 484
 485 /* Global flag to tell that we can't call post-read-conversion and
 486    pre-write-conversion functions.  Usually the value is zero, but it
 487    is set to 1 temporarily while such functions are running.  This is
 488    to avoid infinite recursive call.  */
 489 static int inhibit_pre_post_conversion;
 490
 491 /* Char-table containing safe coding systems of each character.  */
 492 Lisp_Object Vchar_coding_system_table;
 493 Lisp_Object Qchar_coding_system;
 494
 495 /* Return `safe-chars' property of coding system CODING.  Don't check
 496    validity of CODING.  */
 497
 498 Lisp_Object
 499 coding_safe_chars (coding)
 500      struct coding_system *coding;
 501 {
 502   Lisp_Object coding_spec, plist, safe_chars;
 503
 504   coding_spec = Fget (coding->symbol, Qcoding_system);
 505   plist = XVECTOR (coding_spec)->contents[3];
 506   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 507   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 508 }
 509
 510 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 511   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 512
 513 \f
 514 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 515
 516 /* Emacs' internal format for representation of multiple character
 517    sets is a kind of multi-byte encoding, i.e. characters are
 518    represented by variable-length sequences of one-byte codes.
 519
 520    ASCII characters and control characters (e.g. `tab', `newline') are
 521    represented by one-byte sequences which are their ASCII codes, in
 522    the range 0x00 through 0x7F.
 523
 524    8-bit characters of the range 0x80..0x9F are represented by
 525    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 526    code + 0x20).
 527
 528    8-bit characters of the range 0xA0..0xFF are represented by
 529    one-byte sequences which are their 8-bit code.
 530
 531    The other characters are represented by a sequence of `base
 532    leading-code', optional `extended leading-code', and one or two
 533    `position-code's.  The length of the sequence is determined by the
 534    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 535    whereas extended leading-code and position-code take the range 0xA0
 536    through 0xFF.  See `charset.h' for more details about leading-code
 537    and position-code.
 538
 539    --- CODE RANGE of Emacs' internal format ---
 540    character set        range
 541    -------------        -----
 542    ascii                0x00..0x7F
 543    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 544    eight-bit-graphic    0xA0..0xBF
 545    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 546    ---------------------------------------------
 547
 548    As this is the internal character representation, the format is
 549    usually not used externally (i.e. in a file or in a data sent to a
 550    process).  But, it is possible to have a text externally in this
 551    format (i.e. by encoding by the coding system `emacs-mule').
 552
 553    In that case, a sequence of one-byte codes has a slightly different
 554    form.
 555
 556    At first, all characters in eight-bit-control are represented by
 557    one-byte sequences which are their 8-bit code.
 558
 559    Next, character composition data are represented by the byte
 560    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 561    where,
 562         METHOD is 0xF0 plus one of composition method (enum
 563         composition_method),
 564
 565         BYTES is 0x20 plus a byte length of this composition data,
 566
 567         CHARS is 0x20 plus a number of characters composed by this
 568         data,
 569
 570         COMPONENTs are characters of multibye form or composition
 571         rules encoded by two-byte of ASCII codes.
 572
 573    In addition, for backward compatibility, the following formats are
 574    also recognized as composition data on decoding.
 575
 576    0x80 MSEQ ...
 577    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 578
 579    Here,
 580         MSEQ is a multibyte form but in these special format:
 581           ASCII: 0xA0 ASCII_CODE+0x80,
 582           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 583         RULE is a one byte code of the range 0xA0..0xF0 that
 584         represents a composition rule.
 585   */
 586
 587 enum emacs_code_class_type emacs_code_class[256];
 588
 589 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 590    Check if a text is encoded in Emacs' internal format.  If it is,
 591    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 592
 593 static int
 594 detect_coding_emacs_mule (src, src_end, multibytep)
 595       unsigned char *src, *src_end;
 596       int multibytep;
 597 {
 598   unsigned char c;
 599   int composing = 0;
 600   /* Dummy for ONE_MORE_BYTE.  */
 601   struct coding_system dummy_coding;
 602   struct coding_system *coding = &dummy_coding;
 603
 604   while (1)
 605     {
 606       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 607
 608       if (composing)
 609         {
 610           if (c < 0xA0)
 611             composing = 0;
 612           else if (c == 0xA0)
 613             {
 614               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 615               c &= 0x7F;
 616             }
 617           else
 618             c -= 0x20;
 619         }
 620
 621       if (c < 0x20)
 622         {
 623           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 624             return 0;
 625         }
 626       else if (c >= 0x80 && c < 0xA0)
 627         {
 628           if (c == 0x80)
 629             /* Old leading code for a composite character.  */
 630             composing = 1;
 631           else
 632             {
 633               unsigned char *src_base = src - 1;
 634               int bytes;
 635
 636               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 637                                                bytes))
 638                 return 0;
 639               src = src_base + bytes;
 640             }
 641         }
 642     }
 643  label_end_of_loop:
 644   return CODING_CATEGORY_MASK_EMACS_MULE;
 645 }
 646
 647
 648 /* Record the starting position START and METHOD of one composition.  */
 649
 650 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 651   do {                                                          \
 652     struct composition_data *cmp_data = coding->cmp_data;       \
 653     int *data = cmp_data->data + cmp_data->used;                \
 654     coding->cmp_data_start = cmp_data->used;                    \
 655     data[0] = -1;                                               \
 656     data[1] = cmp_data->char_offset + start;                    \
 657     data[3] = (int) method;                                     \
 658     cmp_data->used += 4;                                        \
 659   } while (0)
 660
 661 /* Record the ending position END of the current composition.  */
 662
 663 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 664   do {                                                          \
 665     struct composition_data *cmp_data = coding->cmp_data;       \
 666     int *data = cmp_data->data + coding->cmp_data_start;        \
 667     data[0] = cmp_data->used - coding->cmp_data_start;          \
 668     data[2] = cmp_data->char_offset + end;                      \
 669   } while (0)
 670
 671 /* Record one COMPONENT (alternate character or composition rule).  */
 672
 673 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
 674   (coding->cmp_data->data[coding->cmp_data->used++] = component)
 675
 676
 677 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 678    is not less than SRC_END, return -1 without inccrementing Src.  */
 679
 680 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 681
 682
 683 /* Decode a character represented as a component of composition
 684    sequence of Emacs 20 style at SRC.  Set C to that character, store
 685    its multibyte form sequence at P, and set P to the end of that
 686    sequence.  If no valid character is found, set C to -1.  */
 687
 688 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 689   do {                                                          \
 690     int bytes;                                                  \
 691                                                                 \
 692     c = SAFE_ONE_MORE_BYTE ();                                  \
 693     if (c < 0)                                                  \
 694       break;                                                    \
 695     if (CHAR_HEAD_P (c))                                        \
 696       c = -1;                                                   \
 697     else if (c == 0xA0)                                         \
 698       {                                                         \
 699         c = SAFE_ONE_MORE_BYTE ();                              \
 700         if (c < 0xA0)                                           \
 701           c = -1;                                               \
 702         else                                                    \
 703           {                                                     \
 704             c -= 0xA0;                                          \
 705             *p++ = c;                                           \
 706           }                                                     \
 707       }                                                         \
 708     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 709       {                                                         \
 710         unsigned char *p0 = p;                                  \
 711                                                                 \
 712         c -= 0x20;                                              \
 713         *p++ = c;                                               \
 714         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 715         while (--bytes)                                         \
 716           {                                                     \
 717             c = SAFE_ONE_MORE_BYTE ();                          \
 718             if (c < 0)                                          \
 719               break;                                            \
 720             *p++ = c;                                           \
 721           }                                                     \
 722         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 723           c = STRING_CHAR (p0, bytes);                          \
 724         else                                                    \
 725           c = -1;                                               \
 726       }                                                         \
 727     else                                                        \
 728       c = -1;                                                   \
 729   } while (0)
 730
 731
 732 /* Decode a composition rule represented as a component of composition
 733    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 734    valid rule is found, set C to -1.  */
 735
 736 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 737   do {                                                  \
 738     c = SAFE_ONE_MORE_BYTE ();                          \
 739     c -= 0xA0;                                          \
 740     if (c < 0 || c >= 81)                               \
 741       c = -1;                                           \
 742     else                                                \
 743       {                                                 \
 744         gref = c / 9, nref = c % 9;                     \
 745         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 746       }                                                 \
 747   } while (0)
 748
 749
 750 /* Decode composition sequence encoded by `emacs-mule' at the source
 751    pointed by SRC.  SRC_END is the end of source.  Store information
 752    of the composition in CODING->cmp_data.
 753
 754    For backward compatibility, decode also a composition sequence of
 755    Emacs 20 style.  In that case, the composition sequence contains
 756    characters that should be extracted into a buffer or string.  Store
 757    those characters at *DESTINATION in multibyte form.
 758
 759    If we encounter an invalid byte sequence, return 0.
 760    If we encounter an insufficient source or destination, or
 761    insufficient space in CODING->cmp_data, return 1.
 762    Otherwise, return consumed bytes in the source.
 763
 764 */
 765 static INLINE int
 766 decode_composition_emacs_mule (coding, src, src_end,
 767                                destination, dst_end, dst_bytes)
 768      struct coding_system *coding;
 769      unsigned char *src, *src_end, **destination, *dst_end;
 770      int dst_bytes;
 771 {
 772   unsigned char *dst = *destination;
 773   int method, data_len, nchars;
 774   unsigned char *src_base = src++;
 775   /* Store compoments of composition.  */
 776   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 777   int ncomponent;
 778   /* Store multibyte form of characters to be composed.  This is for
 779      Emacs 20 style composition sequence.  */
 780   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 781   unsigned char *bufp = buf;
 782   int c, i, gref, nref;
 783
 784   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 785       >= COMPOSITION_DATA_SIZE)
 786     {
 787       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 788       return -1;
 789     }
 790
 791   ONE_MORE_BYTE (c);
 792   if (c - 0xF0 >= COMPOSITION_RELATIVE
 793            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 794     {
 795       int with_rule;
 796
 797       method = c - 0xF0;
 798       with_rule = (method == COMPOSITION_WITH_RULE
 799                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 800       ONE_MORE_BYTE (c);
 801       data_len = c - 0xA0;
 802       if (data_len < 4
 803           || src_base + data_len > src_end)
 804         return 0;
 805       ONE_MORE_BYTE (c);
 806       nchars = c - 0xA0;
 807       if (c < 1)
 808         return 0;
 809       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 810         {
 811           if (ncomponent % 2 && with_rule)
 812             {
 813               ONE_MORE_BYTE (gref);
 814               gref -= 32;
 815               ONE_MORE_BYTE (nref);
 816               nref -= 32;
 817               c = COMPOSITION_ENCODE_RULE (gref, nref);
 818             }
 819           else
 820             {
 821               int bytes;
 822               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 823                 c = STRING_CHAR (src, bytes);
 824               else
 825                 c = *src, bytes = 1;
 826               src += bytes;
 827             }
 828           component[ncomponent] = c;
 829         }
 830     }
 831   else
 832     {
 833       /* This may be an old Emacs 20 style format.  See the comment at
 834          the section 2 of this file.  */
 835       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 836       if (src == src_end
 837           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 838         goto label_end_of_loop;
 839
 840       src_end = src;
 841       src = src_base + 1;
 842       if (c < 0xC0)
 843         {
 844           method = COMPOSITION_RELATIVE;
 845           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 846             {
 847               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 848               if (c < 0)
 849                 break;
 850               component[ncomponent++] = c;
 851             }
 852           if (ncomponent < 2)
 853             return 0;
 854           nchars = ncomponent;
 855         }
 856       else if (c == 0xFF)
 857         {
 858           method = COMPOSITION_WITH_RULE;
 859           src++;
 860           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 861           if (c < 0)
 862             return 0;
 863           component[0] = c;
 864           for (ncomponent = 1;
 865                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 866             {
 867               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 868               if (c < 0)
 869                 break;
 870               component[ncomponent++] = c;
 871               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 872               if (c < 0)
 873                 break;
 874               component[ncomponent++] = c;
 875             }
 876           if (ncomponent < 3)
 877             return 0;
 878           nchars = (ncomponent + 1) / 2;
 879         }
 880       else
 881         return 0;
 882     }
 883
 884   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 885     {
 886       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 887       for (i = 0; i < ncomponent; i++)
 888         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 889       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 890       if (buf < bufp)
 891         {
 892           unsigned char *p = buf;
 893           EMIT_BYTES (p, bufp);
 894           *destination += bufp - buf;
 895           coding->produced_char += nchars;
 896         }
 897       return (src - src_base);
 898     }
 899  label_end_of_loop:
 900   return -1;
 901 }
 902
 903 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 904
 905 static void
 906 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 907      struct coding_system *coding;
 908      unsigned char *source, *destination;
 909      int src_bytes, dst_bytes;
 910 {
 911   unsigned char *src = source;
 912   unsigned char *src_end = source + src_bytes;
 913   unsigned char *dst = destination;
 914   unsigned char *dst_end = destination + dst_bytes;
 915   /* SRC_BASE remembers the start position in source in each loop.
 916      The loop will be exited when there's not enough source code, or
 917      when there's not enough destination area to produce a
 918      character.  */
 919   unsigned char *src_base;
 920
 921   coding->produced_char = 0;
 922   while ((src_base = src) < src_end)
 923     {
 924       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 925       int bytes;
 926
 927       if (*src == '\r')
 928         {
 929           int c = *src++;
 930
 931           if (coding->eol_type == CODING_EOL_CR)
 932             c = '\n';
 933           else if (coding->eol_type == CODING_EOL_CRLF)
 934             {
 935               ONE_MORE_BYTE (c);
 936               if (c != '\n')
 937                 {
 938                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 939                     {
 940                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 941                       goto label_end_of_loop;
 942                     }
 943                   src--;
 944                   c = '\r';
 945                 }
 946             }
 947           *dst++ = c;
 948           coding->produced_char++;
 949           continue;
 950         }
 951       else if (*src == '\n')
 952         {
 953           if ((coding->eol_type == CODING_EOL_CR
 954                || coding->eol_type == CODING_EOL_CRLF)
 955               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 956             {
 957               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 958               goto label_end_of_loop;
 959             }
 960           *dst++ = *src++;
 961           coding->produced_char++;
 962           continue;
 963         }
 964       else if (*src == 0x80)
 965         {
 966           /* Start of composition data.  */
 967           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 968                                                          &dst, dst_end,
 969                                                          dst_bytes);
 970           if (consumed < 0)
 971             goto label_end_of_loop;
 972           else if (consumed > 0)
 973             {
 974               src += consumed;
 975               continue;
 976             }
 977           bytes = CHAR_STRING (*src, tmp);
 978           p = tmp;
 979           src++;
 980         }
 981       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 982         {
 983           p = src;
 984           src += bytes;
 985         }
 986       else
 987         {
 988           bytes = CHAR_STRING (*src, tmp);
 989           p = tmp;
 990           src++;
 991         }
 992       if (dst + bytes >= (dst_bytes ? dst_end : src))
 993         {
 994           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 995           break;
 996         }
 997       while (bytes--) *dst++ = *p++;
 998       coding->produced_char++;
 999     }
1000  label_end_of_loop:
1001   coding->consumed = coding->consumed_char = src_base - source;
1002   coding->produced = dst - destination;
1003 }
1004
1005
1006 /* Encode composition data stored at DATA into a special byte sequence
1007    starting by 0x80.  Update CODING->cmp_data_start and maybe
1008    CODING->cmp_data for the next call.  */
1009
1010 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1011   do {                                                                  \
1012     unsigned char buf[1024], *p0 = buf, *p;                             \
1013     int len = data[0];                                                  \
1014     int i;                                                              \
1015                                                                         \
1016     buf[0] = 0x80;                                                      \
1017     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1018     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1019     p = buf + 4;                                                        \
1020     if (data[3] == COMPOSITION_WITH_RULE                                \
1021         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1022       {                                                                 \
1023         p += CHAR_STRING (data[4], p);                                  \
1024         for (i = 5; i < len; i += 2)                                    \
1025           {                                                             \
1026             int gref, nref;                                             \
1027              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1028             *p++ = 0x20 + gref;                                         \
1029             *p++ = 0x20 + nref;                                         \
1030             p += CHAR_STRING (data[i + 1], p);                          \
1031           }                                                             \
1032       }                                                                 \
1033     else                                                                \
1034       {                                                                 \
1035         for (i = 4; i < len; i++)                                       \
1036           p += CHAR_STRING (data[i], p);                                \
1037       }                                                                 \
1038     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1039                                                                         \
1040     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1041       {                                                                 \
1042         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1043         goto label_end_of_loop;                                         \
1044       }                                                                 \
1045     while (p0 < p)                                                      \
1046       *dst++ = *p0++;                                                   \
1047     coding->cmp_data_start += data[0];                                  \
1048     if (coding->cmp_data_start == coding->cmp_data->used                \
1049         && coding->cmp_data->next)                                      \
1050       {                                                                 \
1051         coding->cmp_data = coding->cmp_data->next;                      \
1052         coding->cmp_data_start = 0;                                     \
1053       }                                                                 \
1054   } while (0)
1055
1056
1057 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1058                             unsigned char *, int, int));
1059
1060 static void
1061 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1062      struct coding_system *coding;
1063      unsigned char *source, *destination;
1064      int src_bytes, dst_bytes;
1065 {
1066   unsigned char *src = source;
1067   unsigned char *src_end = source + src_bytes;
1068   unsigned char *dst = destination;
1069   unsigned char *dst_end = destination + dst_bytes;
1070   unsigned char *src_base;
1071   int c;
1072   int char_offset;
1073   int *data;
1074
1075   Lisp_Object translation_table;
1076
1077   translation_table = Qnil;
1078
1079   /* Optimization for the case that there's no composition.  */
1080   if (!coding->cmp_data || coding->cmp_data->used == 0)
1081     {
1082       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1083       return;
1084     }
1085
1086   char_offset = coding->cmp_data->char_offset;
1087   data = coding->cmp_data->data + coding->cmp_data_start;
1088   while (1)
1089     {
1090       src_base = src;
1091
1092       /* If SRC starts a composition, encode the information about the
1093          composition in advance.  */
1094       if (coding->cmp_data_start < coding->cmp_data->used
1095           && char_offset + coding->consumed_char == data[1])
1096         {
1097           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1098           char_offset = coding->cmp_data->char_offset;
1099           data = coding->cmp_data->data + coding->cmp_data_start;
1100         }
1101
1102       ONE_MORE_CHAR (c);
1103       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1104                         || coding->eol_type == CODING_EOL_CR))
1105         {
1106           if (coding->eol_type == CODING_EOL_CRLF)
1107             EMIT_TWO_BYTES ('\r', c);
1108           else
1109             EMIT_ONE_BYTE ('\r');
1110         }
1111       else if (SINGLE_BYTE_CHAR_P (c))
1112         EMIT_ONE_BYTE (c);
1113       else
1114         EMIT_BYTES (src_base, src);
1115       coding->consumed_char++;
1116     }
1117  label_end_of_loop:
1118   coding->consumed = src_base - source;
1119   coding->produced = coding->produced_char = dst - destination;
1120   return;
1121 }
1122
1123 \f
1124 /*** 3. ISO2022 handlers ***/
1125
1126 /* The following note describes the coding system ISO2022 briefly.
1127    Since the intention of this note is to help understand the
1128    functions in this file, some parts are NOT ACCURATE or OVERLY
1129    SIMPLIFIED.  For thorough understanding, please refer to the
1130    original document of ISO2022.
1131
1132    ISO2022 provides many mechanisms to encode several character sets
1133    in 7-bit and 8-bit environments.  For 7-bite environments, all text
1134    is encoded using bytes less than 128.  This may make the encoded
1135    text a little bit longer, but the text passes more easily through
1136    several gateways, some of which strip off MSB (Most Signigant Bit).
1137
1138    There are two kinds of character sets: control character set and
1139    graphic character set.  The former contains control characters such
1140    as `newline' and `escape' to provide control functions (control
1141    functions are also provided by escape sequences).  The latter
1142    contains graphic characters such as 'A' and '-'.  Emacs recognizes
1143    two control character sets and many graphic character sets.
1144
1145    Graphic character sets are classified into one of the following
1146    four classes, according to the number of bytes (DIMENSION) and
1147    number of characters in one dimension (CHARS) of the set:
1148    - DIMENSION1_CHARS94
1149    - DIMENSION1_CHARS96
1150    - DIMENSION2_CHARS94
1151    - DIMENSION2_CHARS96
1152
1153    In addition, each character set is assigned an identification tag,
1154    unique for each set, called "final character" (denoted as <F>
1155    hereafter).  The <F> of each character set is decided by ECMA(*)
1156    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1157    (0x30..0x3F are for private use only).
1158
1159    Note (*): ECMA = European Computer Manufacturers Association
1160
1161    Here are examples of graphic character set [NAME(<F>)]:
1162         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1163         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1164         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1165         o DIMENSION2_CHARS96 -- none for the moment
1166
1167    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1168         C0 [0x00..0x1F] -- control character plane 0
1169         GL [0x20..0x7F] -- graphic character plane 0
1170         C1 [0x80..0x9F] -- control character plane 1
1171         GR [0xA0..0xFF] -- graphic character plane 1
1172
1173    A control character set is directly designated and invoked to C0 or
1174    C1 by an escape sequence.  The most common case is that:
1175    - ISO646's  control character set is designated/invoked to C0, and
1176    - ISO6429's control character set is designated/invoked to C1,
1177    and usually these designations/invocations are omitted in encoded
1178    text.  In a 7-bit environment, only C0 can be used, and a control
1179    character for C1 is encoded by an appropriate escape sequence to
1180    fit into the environment.  All control characters for C1 are
1181    defined to have corresponding escape sequences.
1182
1183    A graphic character set is at first designated to one of four
1184    graphic registers (G0 through G3), then these graphic registers are
1185    invoked to GL or GR.  These designations and invocations can be
1186    done independently.  The most common case is that G0 is invoked to
1187    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1188    these invocations and designations are omitted in encoded text.
1189    In a 7-bit environment, only GL can be used.
1190
1191    When a graphic character set of CHARS94 is invoked to GL, codes
1192    0x20 and 0x7F of the GL area work as control characters SPACE and
1193    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1194    be used.
1195
1196    There are two ways of invocation: locking-shift and single-shift.
1197    With locking-shift, the invocation lasts until the next different
1198    invocation, whereas with single-shift, the invocation affects the
1199    following character only and doesn't affect the locking-shift
1200    state.  Invocations are done by the following control characters or
1201    escape sequences:
1202
1203    ----------------------------------------------------------------------
1204    abbrev  function                  cntrl escape seq   description
1205    ----------------------------------------------------------------------
1206    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1207    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1208    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1209    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1210    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1211    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1212    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1213    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1214    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1215    ----------------------------------------------------------------------
1216    (*) These are not used by any known coding system.
1217
1218    Control characters for these functions are defined by macros
1219    ISO_CODE_XXX in `coding.h'.
1220
1221    Designations are done by the following escape sequences:
1222    ----------------------------------------------------------------------
1223    escape sequence      description
1224    ----------------------------------------------------------------------
1225    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1226    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1227    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1228    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1229    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1230    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1231    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1232    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1233    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1234    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1235    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1236    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1237    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1238    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1239    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1240    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1241    ----------------------------------------------------------------------
1242
1243    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1244    of dimension 1, chars 94, and final character <F>, etc...
1245
1246    Note (*): Although these designations are not allowed in ISO2022,
1247    Emacs accepts them on decoding, and produces them on encoding
1248    CHARS96 character sets in a coding system which is characterized as
1249    7-bit environment, non-locking-shift, and non-single-shift.
1250
1251    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1252    '(' can be omitted.  We refer to this as "short-form" hereafter.
1253
1254    Now you may notice that there are a lot of ways for encoding the
1255    same multilingual text in ISO2022.  Actually, there exist many
1256    coding systems such as Compound Text (used in X11's inter client
1257    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
1258    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
1259    localized platforms), and all of these are variants of ISO2022.
1260
1261    In addition to the above, Emacs handles two more kinds of escape
1262    sequences: ISO6429's direction specification and Emacs' private
1263    sequence for specifying character composition.
1264
1265    ISO6429's direction specification takes the following form:
1266         o CSI ']'      -- end of the current direction
1267         o CSI '0' ']'  -- end of the current direction
1268         o CSI '1' ']'  -- start of left-to-right text
1269         o CSI '2' ']'  -- start of right-to-left text
1270    The control character CSI (0x9B: control sequence introducer) is
1271    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1272
1273    Character composition specification takes the following form:
1274         o ESC '0' -- start relative composition
1275         o ESC '1' -- end composition
1276         o ESC '2' -- start rule-base composition (*)
1277         o ESC '3' -- start relative composition with alternate chars  (**)
1278         o ESC '4' -- start rule-base composition with alternate chars  (**)
1279   Since these are not standard escape sequences of any ISO standard,
1280   the use of them for these meaning is restricted to Emacs only.
1281
1282   (*) This form is used only in Emacs 20.5 and the older versions,
1283   but the newer versions can safely decode it.
1284   (**) This form is used only in Emacs 21.1 and the newer versions,
1285   and the older versions can't decode it.
1286
1287   Here's a list of examples usages of these composition escape
1288   sequences (categorized by `enum composition_method').
1289
1290   COMPOSITION_RELATIVE:
1291         ESC 0 CHAR [ CHAR ] ESC 1
1292   COMPOSITOIN_WITH_RULE:
1293         ESC 2 CHAR [ RULE CHAR ] ESC 1
1294   COMPOSITION_WITH_ALTCHARS:
1295         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1296   COMPOSITION_WITH_RULE_ALTCHARS:
1297         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1298
1299 enum iso_code_class_type iso_code_class[256];
1300
1301 #define CHARSET_OK(idx, charset, c)                                     \
1302   (coding_system_table[idx]                                             \
1303    && (charset == CHARSET_ASCII                                         \
1304        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
1305            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1306    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1307                                               charset)                  \
1308        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1309
1310 #define SHIFT_OUT_OK(idx) \
1311   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1312
1313 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1314    Check if a text is encoded in ISO2022.  If it is, returns an
1315    integer in which appropriate flag bits any of:
1316         CODING_CATEGORY_MASK_ISO_7
1317         CODING_CATEGORY_MASK_ISO_7_TIGHT
1318         CODING_CATEGORY_MASK_ISO_8_1
1319         CODING_CATEGORY_MASK_ISO_8_2
1320         CODING_CATEGORY_MASK_ISO_7_ELSE
1321         CODING_CATEGORY_MASK_ISO_8_ELSE
1322    are set.  If a code which should never appear in ISO2022 is found,
1323    returns 0.  */
1324
1325 static int
1326 detect_coding_iso2022 (src, src_end, multibytep)
1327      unsigned char *src, *src_end;
1328      int multibytep;
1329 {
1330   int mask = CODING_CATEGORY_MASK_ISO;
1331   int mask_found = 0;
1332   int reg[4], shift_out = 0, single_shifting = 0;
1333   int c, c1, charset;
1334   /* Dummy for ONE_MORE_BYTE.  */
1335   struct coding_system dummy_coding;
1336   struct coding_system *coding = &dummy_coding;
1337   Lisp_Object safe_chars;
1338
1339   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1340   while (mask && src < src_end)
1341     {
1342       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1343       switch (c)
1344         {
1345         case ISO_CODE_ESC:
1346           if (inhibit_iso_escape_detection)
1347             break;
1348           single_shifting = 0;
1349           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1350           if (c >= '(' && c <= '/')
1351             {
1352               /* Designation sequence for a charset of dimension 1.  */
1353               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1354               if (c1 < ' ' || c1 >= 0x80
1355                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1356                 /* Invalid designation sequence.  Just ignore.  */
1357                 break;
1358               reg[(c - '(') % 4] = charset;
1359             }
1360           else if (c == '$')
1361             {
1362               /* Designation sequence for a charset of dimension 2.  */
1363               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1364               if (c >= '@' && c <= 'B')
1365                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1366                 reg[0] = charset = iso_charset_table[1][0][c];
1367               else if (c >= '(' && c <= '/')
1368                 {
1369                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1370                   if (c1 < ' ' || c1 >= 0x80
1371                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1372                     /* Invalid designation sequence.  Just ignore.  */
1373                     break;
1374                   reg[(c - '(') % 4] = charset;
1375                 }
1376               else
1377                 /* Invalid designation sequence.  Just ignore.  */
1378                 break;
1379             }
1380           else if (c == 'N' || c == 'O')
1381             {
1382               /* ESC <Fe> for SS2 or SS3.  */
1383               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1384               break;
1385             }
1386           else if (c >= '0' && c <= '4')
1387             {
1388               /* ESC <Fp> for start/end composition.  */
1389               mask_found |= CODING_CATEGORY_MASK_ISO;
1390               break;
1391             }
1392           else
1393             /* Invalid escape sequence.  Just ignore.  */
1394             break;
1395
1396           /* We found a valid designation sequence for CHARSET.  */
1397           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1398           c = MAKE_CHAR (charset, 0, 0);
1399           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1400             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1401           else
1402             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1403           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1404             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1405           else
1406             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1407           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1408             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1409           else
1410             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1411           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1412             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1413           else
1414             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1415           break;
1416
1417         case ISO_CODE_SO:
1418           if (inhibit_iso_escape_detection)
1419             break;
1420           single_shifting = 0;
1421           if (shift_out == 0
1422               && (reg[1] >= 0
1423                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1424                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1425             {
1426               /* Locking shift out.  */
1427               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1428               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1429             }
1430           break;
1431
1432         case ISO_CODE_SI:
1433           if (inhibit_iso_escape_detection)
1434             break;
1435           single_shifting = 0;
1436           if (shift_out == 1)
1437             {
1438               /* Locking shift in.  */
1439               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1440               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1441             }
1442           break;
1443
1444         case ISO_CODE_CSI:
1445           single_shifting = 0;
1446         case ISO_CODE_SS2:
1447         case ISO_CODE_SS3:
1448           {
1449             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1450
1451             if (inhibit_iso_escape_detection)
1452               break;
1453             if (c != ISO_CODE_CSI)
1454               {
1455                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1456                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1457                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1458                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1459                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1460                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1461                 single_shifting = 1;
1462               }
1463             if (VECTORP (Vlatin_extra_code_table)
1464                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1465               {
1466                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1467                     & CODING_FLAG_ISO_LATIN_EXTRA)
1468                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1469                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1470                     & CODING_FLAG_ISO_LATIN_EXTRA)
1471                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1472               }
1473             mask &= newmask;
1474             mask_found |= newmask;
1475           }
1476           break;
1477
1478         default:
1479           if (c < 0x80)
1480             {
1481               single_shifting = 0;
1482               break;
1483             }
1484           else if (c < 0xA0)
1485             {
1486               single_shifting = 0;
1487               if (VECTORP (Vlatin_extra_code_table)
1488                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1489                 {
1490                   int newmask = 0;
1491
1492                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1493                       & CODING_FLAG_ISO_LATIN_EXTRA)
1494                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1495                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1496                       & CODING_FLAG_ISO_LATIN_EXTRA)
1497                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1498                   mask &= newmask;
1499                   mask_found |= newmask;
1500                 }
1501               else
1502                 return 0;
1503             }
1504           else
1505             {
1506               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1507                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1508               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1509               /* Check the length of succeeding codes of the range
1510                  0xA0..0FF.  If the byte length is odd, we exclude
1511                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1512                  when we are not single shifting.  */
1513               if (!single_shifting
1514                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1515                 {
1516                   int i = 1;
1517                   while (src < src_end)
1518                     {
1519                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1520                       if (c < 0xA0)
1521                         break;
1522                       i++;
1523                     }
1524
1525                   if (i & 1 && src < src_end)
1526                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1527                   else
1528                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1529                 }
1530             }
1531           break;
1532         }
1533     }
1534  label_end_of_loop:
1535   return (mask & mask_found);
1536 }
1537
1538 /* Decode a character of which charset is CHARSET, the 1st position
1539    code is C1, the 2nd position code is C2, and return the decoded
1540    character code.  If the variable `translation_table' is non-nil,
1541    returned the translated code.  */
1542
1543 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1544   (NILP (translation_table)                     \
1545    ? MAKE_CHAR (charset, c1, c2)                \
1546    : translate_char (translation_table, -1, charset, c1, c2))
1547
1548 /* Set designation state into CODING.  */
1549 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1550   do {                                                                     \
1551     int charset, c;                                                        \
1552                                                                            \
1553     if (final_char < '0' || final_char >= 128)                             \
1554       goto label_invalid_code;                                             \
1555     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1556                                  make_number (chars),                      \
1557                                  make_number (final_char));                \
1558     c = MAKE_CHAR (charset, 0, 0);                                         \
1559     if (charset >= 0                                                       \
1560         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1561             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1562       {                                                                    \
1563         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1564             && reg == 0                                                    \
1565             && charset == CHARSET_ASCII)                                   \
1566           {                                                                \
1567             /* We should insert this designation sequence as is so         \
1568                that it is surely written back to a file.  */               \
1569             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1570             goto label_invalid_code;                                       \
1571           }                                                                \
1572         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1573         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1574             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1575           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1576         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1577       }                                                                    \
1578     else                                                                   \
1579       {                                                                    \
1580         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1581         goto label_invalid_code;                                           \
1582       }                                                                    \
1583   } while (0)
1584
1585 /* Allocate a memory block for storing information about compositions.
1586    The block is chained to the already allocated blocks.  */
1587
1588 void
1589 coding_allocate_composition_data (coding, char_offset)
1590      struct coding_system *coding;
1591      int char_offset;
1592 {
1593   struct composition_data *cmp_data
1594     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1595
1596   cmp_data->char_offset = char_offset;
1597   cmp_data->used = 0;
1598   cmp_data->prev = coding->cmp_data;
1599   cmp_data->next = NULL;
1600   if (coding->cmp_data)
1601     coding->cmp_data->next = cmp_data;
1602   coding->cmp_data = cmp_data;
1603   coding->cmp_data_start = 0;
1604 }
1605
1606 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1607    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1608    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1609    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1610    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1611   */
1612
1613 #define DECODE_COMPOSITION_START(c1)                                       \
1614   do {                                                                     \
1615     if (coding->composing == COMPOSITION_DISABLED)                         \
1616       {                                                                    \
1617         *dst++ = ISO_CODE_ESC;                                             \
1618         *dst++ = c1 & 0x7f;                                                \
1619         coding->produced_char += 2;                                        \
1620       }                                                                    \
1621     else if (!COMPOSING_P (coding))                                        \
1622       {                                                                    \
1623         /* This is surely the start of a composition.  We must be sure     \
1624            that coding->cmp_data has enough space to store the             \
1625            information about the composition.  If not, terminate the       \
1626            current decoding loop, allocate one more memory block for       \
1627            coding->cmp_data in the calller, then start the decoding        \
1628            loop again.  We can't allocate memory here directly because     \
1629            it may cause buffer/string relocation.  */                      \
1630         if (!coding->cmp_data                                              \
1631             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1632                 >= COMPOSITION_DATA_SIZE))                                 \
1633           {                                                                \
1634             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1635             goto label_end_of_loop;                                        \
1636           }                                                                \
1637         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1638                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1639                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1640                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1641         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1642                                       coding->composing);                  \
1643         coding->composition_rule_follows = 0;                              \
1644       }                                                                    \
1645     else                                                                   \
1646       {                                                                    \
1647         /* We are already handling a composition.  If the method is        \
1648            the following two, the codes following the current escape       \
1649            sequence are actual characters stored in a buffer.  */          \
1650         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1651             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1652           {                                                                \
1653             coding->composing = COMPOSITION_RELATIVE;                      \
1654             coding->composition_rule_follows = 0;                          \
1655           }                                                                \
1656       }                                                                    \
1657   } while (0)
1658
1659 /* Handle compositoin end sequence ESC 1.  */
1660
1661 #define DECODE_COMPOSITION_END(c1)                                      \
1662   do {                                                                  \
1663     if (coding->composing == COMPOSITION_DISABLED)                      \
1664       {                                                                 \
1665         *dst++ = ISO_CODE_ESC;                                          \
1666         *dst++ = c1;                                                    \
1667         coding->produced_char += 2;                                     \
1668       }                                                                 \
1669     else                                                                \
1670       {                                                                 \
1671         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1672         coding->composing = COMPOSITION_NO;                             \
1673       }                                                                 \
1674   } while (0)
1675
1676 /* Decode a composition rule from the byte C1 (and maybe one more byte
1677    from SRC) and store one encoded composition rule in
1678    coding->cmp_data.  */
1679
1680 #define DECODE_COMPOSITION_RULE(c1)                                     \
1681   do {                                                                  \
1682     int rule = 0;                                                       \
1683     (c1) -= 32;                                                         \
1684     if (c1 < 81)                /* old format (before ver.21) */        \
1685       {                                                                 \
1686         int gref = (c1) / 9;                                            \
1687         int nref = (c1) % 9;                                            \
1688         if (gref == 4) gref = 10;                                       \
1689         if (nref == 4) nref = 10;                                       \
1690         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1691       }                                                                 \
1692     else if (c1 < 93)           /* new format (after ver.21) */         \
1693       {                                                                 \
1694         ONE_MORE_BYTE (c2);                                             \
1695         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1696       }                                                                 \
1697     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1698     coding->composition_rule_follows = 0;                               \
1699   } while (0)
1700
1701
1702 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1703
1704 static void
1705 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1706      struct coding_system *coding;
1707      unsigned char *source, *destination;
1708      int src_bytes, dst_bytes;
1709 {
1710   unsigned char *src = source;
1711   unsigned char *src_end = source + src_bytes;
1712   unsigned char *dst = destination;
1713   unsigned char *dst_end = destination + dst_bytes;
1714   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1715   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1716   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1717   /* SRC_BASE remembers the start position in source in each loop.
1718      The loop will be exited when there's not enough source code
1719      (within macro ONE_MORE_BYTE), or when there's not enough
1720      destination area to produce a character (within macro
1721      EMIT_CHAR).  */
1722   unsigned char *src_base;
1723   int c, charset;
1724   Lisp_Object translation_table;
1725   Lisp_Object safe_chars;
1726
1727   safe_chars = coding_safe_chars (coding);
1728
1729   if (NILP (Venable_character_translation))
1730     translation_table = Qnil;
1731   else
1732     {
1733       translation_table = coding->translation_table_for_decode;
1734       if (NILP (translation_table))
1735         translation_table = Vstandard_translation_table_for_decode;
1736     }
1737
1738   coding->result = CODING_FINISH_NORMAL;
1739
1740   while (1)
1741     {
1742       int c1, c2;
1743
1744       src_base = src;
1745       ONE_MORE_BYTE (c1);
1746
1747       /* We produce no character or one character.  */
1748       switch (iso_code_class [c1])
1749         {
1750         case ISO_0x20_or_0x7F:
1751           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1752             {
1753               DECODE_COMPOSITION_RULE (c1);
1754               continue;
1755             }
1756           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1757             {
1758               /* This is SPACE or DEL.  */
1759               charset = CHARSET_ASCII;
1760               break;
1761             }
1762           /* This is a graphic character, we fall down ...  */
1763
1764         case ISO_graphic_plane_0:
1765           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1766             {
1767               DECODE_COMPOSITION_RULE (c1);
1768               continue;
1769             }
1770           charset = charset0;
1771           break;
1772
1773         case ISO_0xA0_or_0xFF:
1774           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1775               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1776             goto label_invalid_code;
1777           /* This is a graphic character, we fall down ... */
1778
1779         case ISO_graphic_plane_1:
1780           if (charset1 < 0)
1781             goto label_invalid_code;
1782           charset = charset1;
1783           break;
1784
1785         case ISO_control_0:
1786           if (COMPOSING_P (coding))
1787             DECODE_COMPOSITION_END ('1');
1788
1789           /* All ISO2022 control characters in this class have the
1790              same representation in Emacs internal format.  */
1791           if (c1 == '\n'
1792               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1793               && (coding->eol_type == CODING_EOL_CR
1794                   || coding->eol_type == CODING_EOL_CRLF))
1795             {
1796               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1797               goto label_end_of_loop;
1798             }
1799           charset = CHARSET_ASCII;
1800           break;
1801
1802         case ISO_control_1:
1803           if (COMPOSING_P (coding))
1804             DECODE_COMPOSITION_END ('1');
1805           goto label_invalid_code;
1806
1807         case ISO_carriage_return:
1808           if (COMPOSING_P (coding))
1809             DECODE_COMPOSITION_END ('1');
1810
1811           if (coding->eol_type == CODING_EOL_CR)
1812             c1 = '\n';
1813           else if (coding->eol_type == CODING_EOL_CRLF)
1814             {
1815               ONE_MORE_BYTE (c1);
1816               if (c1 != ISO_CODE_LF)
1817                 {
1818                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1819                     {
1820                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1821                       goto label_end_of_loop;
1822                     }
1823                   src--;
1824                   c1 = '\r';
1825                 }
1826             }
1827           charset = CHARSET_ASCII;
1828           break;
1829
1830         case ISO_shift_out:
1831           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1832               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1833             goto label_invalid_code;
1834           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1835           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1836           continue;
1837
1838         case ISO_shift_in:
1839           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1840             goto label_invalid_code;
1841           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1842           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1843           continue;
1844
1845         case ISO_single_shift_2_7:
1846         case ISO_single_shift_2:
1847           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1848             goto label_invalid_code;
1849           /* SS2 is handled as an escape sequence of ESC 'N' */
1850           c1 = 'N';
1851           goto label_escape_sequence;
1852
1853         case ISO_single_shift_3:
1854           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1855             goto label_invalid_code;
1856           /* SS2 is handled as an escape sequence of ESC 'O' */
1857           c1 = 'O';
1858           goto label_escape_sequence;
1859
1860         case ISO_control_sequence_introducer:
1861           /* CSI is handled as an escape sequence of ESC '[' ...  */
1862           c1 = '[';
1863           goto label_escape_sequence;
1864
1865         case ISO_escape:
1866           ONE_MORE_BYTE (c1);
1867         label_escape_sequence:
1868           /* Escape sequences handled by Emacs are invocation,
1869              designation, direction specification, and character
1870              composition specification.  */
1871           switch (c1)
1872             {
1873             case '&':           /* revision of following character set */
1874               ONE_MORE_BYTE (c1);
1875               if (!(c1 >= '@' && c1 <= '~'))
1876                 goto label_invalid_code;
1877               ONE_MORE_BYTE (c1);
1878               if (c1 != ISO_CODE_ESC)
1879                 goto label_invalid_code;
1880               ONE_MORE_BYTE (c1);
1881               goto label_escape_sequence;
1882
1883             case '$':           /* designation of 2-byte character set */
1884               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1885                 goto label_invalid_code;
1886               ONE_MORE_BYTE (c1);
1887               if (c1 >= '@' && c1 <= 'B')
1888                 {       /* designation of JISX0208.1978, GB2312.1980,
1889                            or JISX0208.1980 */
1890                   DECODE_DESIGNATION (0, 2, 94, c1);
1891                 }
1892               else if (c1 >= 0x28 && c1 <= 0x2B)
1893                 {       /* designation of DIMENSION2_CHARS94 character set */
1894                   ONE_MORE_BYTE (c2);
1895                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1896                 }
1897               else if (c1 >= 0x2C && c1 <= 0x2F)
1898                 {       /* designation of DIMENSION2_CHARS96 character set */
1899                   ONE_MORE_BYTE (c2);
1900                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1901                 }
1902               else
1903                 goto label_invalid_code;
1904               /* We must update these variables now.  */
1905               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1906               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1907               continue;
1908
1909             case 'n':           /* invocation of locking-shift-2 */
1910               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1911                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1912                 goto label_invalid_code;
1913               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1914               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1915               continue;
1916
1917             case 'o':           /* invocation of locking-shift-3 */
1918               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1919                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1920                 goto label_invalid_code;
1921               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1922               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1923               continue;
1924
1925             case 'N':           /* invocation of single-shift-2 */
1926               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1927                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1928                 goto label_invalid_code;
1929               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1930               ONE_MORE_BYTE (c1);
1931               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1932                 goto label_invalid_code;
1933               break;
1934
1935             case 'O':           /* invocation of single-shift-3 */
1936               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1937                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1938                 goto label_invalid_code;
1939               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1940               ONE_MORE_BYTE (c1);
1941               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1942                 goto label_invalid_code;
1943               break;
1944
1945             case '0': case '2': case '3': case '4': /* start composition */
1946               DECODE_COMPOSITION_START (c1);
1947               continue;
1948
1949             case '1':           /* end composition */
1950               DECODE_COMPOSITION_END (c1);
1951               continue;
1952
1953             case '[':           /* specification of direction */
1954               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1955                 goto label_invalid_code;
1956               /* For the moment, nested direction is not supported.
1957                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1958                  left-to-right, and nozero means right-to-left.  */
1959               ONE_MORE_BYTE (c1);
1960               switch (c1)
1961                 {
1962                 case ']':       /* end of the current direction */
1963                   coding->mode &= ~CODING_MODE_DIRECTION;
1964
1965                 case '0':       /* end of the current direction */
1966                 case '1':       /* start of left-to-right direction */
1967                   ONE_MORE_BYTE (c1);
1968                   if (c1 == ']')
1969                     coding->mode &= ~CODING_MODE_DIRECTION;
1970                   else
1971                     goto label_invalid_code;
1972                   break;
1973
1974                 case '2':       /* start of right-to-left direction */
1975                   ONE_MORE_BYTE (c1);
1976                   if (c1 == ']')
1977                     coding->mode |= CODING_MODE_DIRECTION;
1978                   else
1979                     goto label_invalid_code;
1980                   break;
1981
1982                 default:
1983                   goto label_invalid_code;
1984                 }
1985               continue;
1986
1987             default:
1988               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1989                 goto label_invalid_code;
1990               if (c1 >= 0x28 && c1 <= 0x2B)
1991                 {       /* designation of DIMENSION1_CHARS94 character set */
1992                   ONE_MORE_BYTE (c2);
1993                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1994                 }
1995               else if (c1 >= 0x2C && c1 <= 0x2F)
1996                 {       /* designation of DIMENSION1_CHARS96 character set */
1997                   ONE_MORE_BYTE (c2);
1998                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1999                 }
2000               else
2001                 goto label_invalid_code;
2002               /* We must update these variables now.  */
2003               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2004               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2005               continue;
2006             }
2007         }
2008
2009       /* Now we know CHARSET and 1st position code C1 of a character.
2010          Produce a multibyte sequence for that character while getting
2011          2nd position code C2 if necessary.  */
2012       if (CHARSET_DIMENSION (charset) == 2)
2013         {
2014           ONE_MORE_BYTE (c2);
2015           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2016             /* C2 is not in a valid range.  */
2017             goto label_invalid_code;
2018         }
2019       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2020       EMIT_CHAR (c);
2021       continue;
2022
2023     label_invalid_code:
2024       coding->errors++;
2025       if (COMPOSING_P (coding))
2026         DECODE_COMPOSITION_END ('1');
2027       src = src_base;
2028       c = *src++;
2029       EMIT_CHAR (c);
2030     }
2031
2032  label_end_of_loop:
2033   coding->consumed = coding->consumed_char = src_base - source;
2034   coding->produced = dst - destination;
2035   return;
2036 }
2037
2038
2039 /* ISO2022 encoding stuff.  */
2040
2041 /*
2042    It is not enough to say just "ISO2022" on encoding, we have to
2043    specify more details.  In Emacs, each coding system of ISO2022
2044    variant has the following specifications:
2045         1. Initial designation to G0 thru G3.
2046         2. Allows short-form designation?
2047         3. ASCII should be designated to G0 before control characters?
2048         4. ASCII should be designated to G0 at end of line?
2049         5. 7-bit environment or 8-bit environment?
2050         6. Use locking-shift?
2051         7. Use Single-shift?
2052    And the following two are only for Japanese:
2053         8. Use ASCII in place of JIS0201-1976-Roman?
2054         9. Use JISX0208-1983 in place of JISX0208-1978?
2055    These specifications are encoded in `coding->flags' as flag bits
2056    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2057    details.
2058 */
2059
2060 /* Produce codes (escape sequence) for designating CHARSET to graphic
2061    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2062    '@', 'A', or 'B' and the coding system CODING allows, produce
2063    designation sequence of short-form.  */
2064
2065 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2066   do {                                                                  \
2067     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2068     char *intermediate_char_94 = "()*+";                                \
2069     char *intermediate_char_96 = ",-./";                                \
2070     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2071                                                                         \
2072     if (revision < 255)                                                 \
2073       {                                                                 \
2074         *dst++ = ISO_CODE_ESC;                                          \
2075         *dst++ = '&';                                                   \
2076         *dst++ = '@' + revision;                                        \
2077       }                                                                 \
2078     *dst++ = ISO_CODE_ESC;                                              \
2079     if (CHARSET_DIMENSION (charset) == 1)                               \
2080       {                                                                 \
2081         if (CHARSET_CHARS (charset) == 94)                              \
2082           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2083         else                                                            \
2084           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2085       }                                                                 \
2086     else                                                                \
2087       {                                                                 \
2088         *dst++ = '$';                                                   \
2089         if (CHARSET_CHARS (charset) == 94)                              \
2090           {                                                             \
2091             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2092                 || reg != 0                                             \
2093                 || final_char < '@' || final_char > 'B')                \
2094               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2095           }                                                             \
2096         else                                                            \
2097           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2098       }                                                                 \
2099     *dst++ = final_char;                                                \
2100     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2101   } while (0)
2102
2103 /* The following two macros produce codes (control character or escape
2104    sequence) for ISO2022 single-shift functions (single-shift-2 and
2105    single-shift-3).  */
2106
2107 #define ENCODE_SINGLE_SHIFT_2                           \
2108   do {                                                  \
2109     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2110       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2111     else                                                \
2112       *dst++ = ISO_CODE_SS2;                            \
2113     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2114   } while (0)
2115
2116 #define ENCODE_SINGLE_SHIFT_3                           \
2117   do {                                                  \
2118     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2119       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2120     else                                                \
2121       *dst++ = ISO_CODE_SS3;                            \
2122     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2123   } while (0)
2124
2125 /* The following four macros produce codes (control character or
2126    escape sequence) for ISO2022 locking-shift functions (shift-in,
2127    shift-out, locking-shift-2, and locking-shift-3).  */
2128
2129 #define ENCODE_SHIFT_IN                         \
2130   do {                                          \
2131     *dst++ = ISO_CODE_SI;                       \
2132     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2133   } while (0)
2134
2135 #define ENCODE_SHIFT_OUT                        \
2136   do {                                          \
2137     *dst++ = ISO_CODE_SO;                       \
2138     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2139   } while (0)
2140
2141 #define ENCODE_LOCKING_SHIFT_2                  \
2142   do {                                          \
2143     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2144     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2145   } while (0)
2146
2147 #define ENCODE_LOCKING_SHIFT_3                  \
2148   do {                                          \
2149     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2150     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2151   } while (0)
2152
2153 /* Produce codes for a DIMENSION1 character whose character set is
2154    CHARSET and whose position-code is C1.  Designation and invocation
2155    sequences are also produced in advance if necessary.  */
2156
2157 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2158   do {                                                                  \
2159     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2160       {                                                                 \
2161         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2162           *dst++ = c1 & 0x7F;                                           \
2163         else                                                            \
2164           *dst++ = c1 | 0x80;                                           \
2165         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2166         break;                                                          \
2167       }                                                                 \
2168     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2169       {                                                                 \
2170         *dst++ = c1 & 0x7F;                                             \
2171         break;                                                          \
2172       }                                                                 \
2173     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2174       {                                                                 \
2175         *dst++ = c1 | 0x80;                                             \
2176         break;                                                          \
2177       }                                                                 \
2178     else                                                                \
2179       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2180          must invoke it, or, at first, designate it to some graphic     \
2181          register.  Then repeat the loop to actually produce the        \
2182          character.  */                                                 \
2183       dst = encode_invocation_designation (charset, coding, dst);       \
2184   } while (1)
2185
2186 /* Produce codes for a DIMENSION2 character whose character set is
2187    CHARSET and whose position-codes are C1 and C2.  Designation and
2188    invocation codes are also produced in advance if necessary.  */
2189
2190 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2191   do {                                                                  \
2192     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2193       {                                                                 \
2194         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2195           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2196         else                                                            \
2197           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2198         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2199         break;                                                          \
2200       }                                                                 \
2201     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2202       {                                                                 \
2203         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2204         break;                                                          \
2205       }                                                                 \
2206     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2207       {                                                                 \
2208         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2209         break;                                                          \
2210       }                                                                 \
2211     else                                                                \
2212       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2213          must invoke it, or, at first, designate it to some graphic     \
2214          register.  Then repeat the loop to actually produce the        \
2215          character.  */                                                 \
2216       dst = encode_invocation_designation (charset, coding, dst);       \
2217   } while (1)
2218
2219 #define ENCODE_ISO_CHARACTER(c)                                 \
2220   do {                                                          \
2221     int charset, c1, c2;                                        \
2222                                                                 \
2223     SPLIT_CHAR (c, charset, c1, c2);                            \
2224     if (CHARSET_DEFINED_P (charset))                            \
2225       {                                                         \
2226         if (CHARSET_DIMENSION (charset) == 1)                   \
2227           {                                                     \
2228             if (charset == CHARSET_ASCII                        \
2229                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2230               charset = charset_latin_jisx0201;                 \
2231             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2232           }                                                     \
2233         else                                                    \
2234           {                                                     \
2235             if (charset == charset_jisx0208                     \
2236                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2237               charset = charset_jisx0208_1978;                  \
2238             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2239           }                                                     \
2240       }                                                         \
2241     else                                                        \
2242       {                                                         \
2243         *dst++ = c1;                                            \
2244         if (c2 >= 0)                                            \
2245           *dst++ = c2;                                          \
2246       }                                                         \
2247   } while (0)
2248
2249
2250 /* Instead of encoding character C, produce one or two `?'s.  */
2251
2252 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2253   do {                                                                  \
2254     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2255     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2256       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2257   } while (0)
2258
2259
2260 /* Produce designation and invocation codes at a place pointed by DST
2261    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2262    Return new DST.  */
2263
2264 unsigned char *
2265 encode_invocation_designation (charset, coding, dst)
2266      int charset;
2267      struct coding_system *coding;
2268      unsigned char *dst;
2269 {
2270   int reg;                      /* graphic register number */
2271
2272   /* At first, check designations.  */
2273   for (reg = 0; reg < 4; reg++)
2274     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2275       break;
2276
2277   if (reg >= 4)
2278     {
2279       /* CHARSET is not yet designated to any graphic registers.  */
2280       /* At first check the requested designation.  */
2281       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2282       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2283         /* Since CHARSET requests no special designation, designate it
2284            to graphic register 0.  */
2285         reg = 0;
2286
2287       ENCODE_DESIGNATION (charset, reg, coding);
2288     }
2289
2290   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2291       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2292     {
2293       /* Since the graphic register REG is not invoked to any graphic
2294          planes, invoke it to graphic plane 0.  */
2295       switch (reg)
2296         {
2297         case 0:                 /* graphic register 0 */
2298           ENCODE_SHIFT_IN;
2299           break;
2300
2301         case 1:                 /* graphic register 1 */
2302           ENCODE_SHIFT_OUT;
2303           break;
2304
2305         case 2:                 /* graphic register 2 */
2306           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2307             ENCODE_SINGLE_SHIFT_2;
2308           else
2309             ENCODE_LOCKING_SHIFT_2;
2310           break;
2311
2312         case 3:                 /* graphic register 3 */
2313           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2314             ENCODE_SINGLE_SHIFT_3;
2315           else
2316             ENCODE_LOCKING_SHIFT_3;
2317           break;
2318         }
2319     }
2320
2321   return dst;
2322 }
2323
2324 /* Produce 2-byte codes for encoded composition rule RULE.  */
2325
2326 #define ENCODE_COMPOSITION_RULE(rule)           \
2327   do {                                          \
2328     int gref, nref;                             \
2329     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2330     *dst++ = 32 + 81 + gref;                    \
2331     *dst++ = 32 + nref;                         \
2332   } while (0)
2333
2334 /* Produce codes for indicating the start of a composition sequence
2335    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2336    which specify information about the composition.  See the comment
2337    in coding.h for the format of DATA.  */
2338
2339 #define ENCODE_COMPOSITION_START(coding, data)                          \
2340   do {                                                                  \
2341     coding->composing = data[3];                                        \
2342     *dst++ = ISO_CODE_ESC;                                              \
2343     if (coding->composing == COMPOSITION_RELATIVE)                      \
2344       *dst++ = '0';                                                     \
2345     else                                                                \
2346       {                                                                 \
2347         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2348                   ? '3' : '4');                                         \
2349         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2350         coding->composition_rule_follows = 0;                           \
2351       }                                                                 \
2352   } while (0)
2353
2354 /* Produce codes for indicating the end of the current composition.  */
2355
2356 #define ENCODE_COMPOSITION_END(coding, data)                    \
2357   do {                                                          \
2358     *dst++ = ISO_CODE_ESC;                                      \
2359     *dst++ = '1';                                               \
2360     coding->cmp_data_start += data[0];                          \
2361     coding->composing = COMPOSITION_NO;                         \
2362     if (coding->cmp_data_start == coding->cmp_data->used        \
2363         && coding->cmp_data->next)                              \
2364       {                                                         \
2365         coding->cmp_data = coding->cmp_data->next;              \
2366         coding->cmp_data_start = 0;                             \
2367       }                                                         \
2368   } while (0)
2369
2370 /* Produce composition start sequence ESC 0.  Here, this sequence
2371    doesn't mean the start of a new composition but means that we have
2372    just produced components (alternate chars and composition rules) of
2373    the composition and the actual text follows in SRC.  */
2374
2375 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2376   do {                                          \
2377     *dst++ = ISO_CODE_ESC;                      \
2378     *dst++ = '0';                               \
2379     coding->composing = COMPOSITION_RELATIVE;   \
2380   } while (0)
2381
2382 /* The following three macros produce codes for indicating direction
2383    of text.  */
2384 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2385   do {                                                  \
2386     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2387       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2388     else                                                \
2389       *dst++ = ISO_CODE_CSI;                            \
2390   } while (0)
2391
2392 #define ENCODE_DIRECTION_R2L    \
2393   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2394
2395 #define ENCODE_DIRECTION_L2R    \
2396   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2397
2398 /* Produce codes for designation and invocation to reset the graphic
2399    planes and registers to initial state.  */
2400 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2401   do {                                                                      \
2402     int reg;                                                                \
2403     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2404       ENCODE_SHIFT_IN;                                                      \
2405     for (reg = 0; reg < 4; reg++)                                           \
2406       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2407           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2408               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2409         ENCODE_DESIGNATION                                                  \
2410           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2411   } while (0)
2412
2413 /* Produce designation sequences of charsets in the line started from
2414    SRC to a place pointed by DST, and return updated DST.
2415
2416    If the current block ends before any end-of-line, we may fail to
2417    find all the necessary designations.  */
2418
2419 static unsigned char *
2420 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2421      struct coding_system *coding;
2422      Lisp_Object translation_table;
2423      unsigned char *src, *src_end, *dst;
2424 {
2425   int charset, c, found = 0, reg;
2426   /* Table of charsets to be designated to each graphic register.  */
2427   int r[4];
2428
2429   for (reg = 0; reg < 4; reg++)
2430     r[reg] = -1;
2431
2432   while (found < 4)
2433     {
2434       ONE_MORE_CHAR (c);
2435       if (c == '\n')
2436         break;
2437
2438       charset = CHAR_CHARSET (c);
2439       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2440       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2441         {
2442           found++;
2443           r[reg] = charset;
2444         }
2445     }
2446
2447  label_end_of_loop:
2448   if (found)
2449     {
2450       for (reg = 0; reg < 4; reg++)
2451         if (r[reg] >= 0
2452             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2453           ENCODE_DESIGNATION (r[reg], reg, coding);
2454     }
2455
2456   return dst;
2457 }
2458
2459 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2460
2461 static void
2462 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2463      struct coding_system *coding;
2464      unsigned char *source, *destination;
2465      int src_bytes, dst_bytes;
2466 {
2467   unsigned char *src = source;
2468   unsigned char *src_end = source + src_bytes;
2469   unsigned char *dst = destination;
2470   unsigned char *dst_end = destination + dst_bytes;
2471   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2472      from DST_END to assure overflow checking is necessary only at the
2473      head of loop.  */
2474   unsigned char *adjusted_dst_end = dst_end - 19;
2475   /* SRC_BASE remembers the start position in source in each loop.
2476      The loop will be exited when there's not enough source text to
2477      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2478      there's not enough destination area to produce encoded codes
2479      (within macro EMIT_BYTES).  */
2480   unsigned char *src_base;
2481   int c;
2482   Lisp_Object translation_table;
2483   Lisp_Object safe_chars;
2484
2485   safe_chars = coding_safe_chars (coding);
2486
2487   if (NILP (Venable_character_translation))
2488     translation_table = Qnil;
2489   else
2490     {
2491       translation_table = coding->translation_table_for_encode;
2492       if (NILP (translation_table))
2493         translation_table = Vstandard_translation_table_for_encode;
2494     }
2495
2496   coding->consumed_char = 0;
2497   coding->errors = 0;
2498   while (1)
2499     {
2500       src_base = src;
2501
2502       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2503         {
2504           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2505           break;
2506         }
2507
2508       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2509           && CODING_SPEC_ISO_BOL (coding))
2510         {
2511           /* We have to produce designation sequences if any now.  */
2512           dst = encode_designation_at_bol (coding, translation_table,
2513                                            src, src_end, dst);
2514           CODING_SPEC_ISO_BOL (coding) = 0;
2515         }
2516
2517       /* Check composition start and end.  */
2518       if (coding->composing != COMPOSITION_DISABLED
2519           && coding->cmp_data_start < coding->cmp_data->used)
2520         {
2521           struct composition_data *cmp_data = coding->cmp_data;
2522           int *data = cmp_data->data + coding->cmp_data_start;
2523           int this_pos = cmp_data->char_offset + coding->consumed_char;
2524
2525           if (coding->composing == COMPOSITION_RELATIVE)
2526             {
2527               if (this_pos == data[2])
2528                 {
2529                   ENCODE_COMPOSITION_END (coding, data);
2530                   cmp_data = coding->cmp_data;
2531                   data = cmp_data->data + coding->cmp_data_start;
2532                 }
2533             }
2534           else if (COMPOSING_P (coding))
2535             {
2536               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2537               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2538                 /* We have consumed components of the composition.
2539                    What follows in SRC is the compositions's base
2540                    text.  */
2541                 ENCODE_COMPOSITION_FAKE_START (coding);
2542               else
2543                 {
2544                   int c = cmp_data->data[coding->cmp_data_index++];
2545                   if (coding->composition_rule_follows)
2546                     {
2547                       ENCODE_COMPOSITION_RULE (c);
2548                       coding->composition_rule_follows = 0;
2549                     }
2550                   else
2551                     {
2552                       if (coding->flags & CODING_FLAG_ISO_SAFE
2553                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2554                         ENCODE_UNSAFE_CHARACTER (c);
2555                       else
2556                         ENCODE_ISO_CHARACTER (c);
2557                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2558                         coding->composition_rule_follows = 1;
2559                     }
2560                   continue;
2561                 }
2562             }
2563           if (!COMPOSING_P (coding))
2564             {
2565               if (this_pos == data[1])
2566                 {
2567                   ENCODE_COMPOSITION_START (coding, data);
2568                   continue;
2569                 }
2570             }
2571         }
2572
2573       ONE_MORE_CHAR (c);
2574
2575       /* Now encode the character C.  */
2576       if (c < 0x20 || c == 0x7F)
2577         {
2578           if (c == '\r')
2579             {
2580               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2581                 {
2582                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2583                     ENCODE_RESET_PLANE_AND_REGISTER;
2584                   *dst++ = c;
2585                   continue;
2586                 }
2587               /* fall down to treat '\r' as '\n' ...  */
2588               c = '\n';
2589             }
2590           if (c == '\n')
2591             {
2592               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2593                 ENCODE_RESET_PLANE_AND_REGISTER;
2594               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2595                 bcopy (coding->spec.iso2022.initial_designation,
2596                        coding->spec.iso2022.current_designation,
2597                        sizeof coding->spec.iso2022.initial_designation);
2598               if (coding->eol_type == CODING_EOL_LF
2599                   || coding->eol_type == CODING_EOL_UNDECIDED)
2600                 *dst++ = ISO_CODE_LF;
2601               else if (coding->eol_type == CODING_EOL_CRLF)
2602                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2603               else
2604                 *dst++ = ISO_CODE_CR;
2605               CODING_SPEC_ISO_BOL (coding) = 1;
2606             }
2607           else
2608             {
2609               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2610                 ENCODE_RESET_PLANE_AND_REGISTER;
2611               *dst++ = c;
2612             }
2613         }
2614       else if (ASCII_BYTE_P (c))
2615         ENCODE_ISO_CHARACTER (c);
2616       else if (SINGLE_BYTE_CHAR_P (c))
2617         {
2618           *dst++ = c;
2619           coding->errors++;
2620         }
2621       else if (coding->flags & CODING_FLAG_ISO_SAFE
2622                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2623         ENCODE_UNSAFE_CHARACTER (c);
2624       else
2625         ENCODE_ISO_CHARACTER (c);
2626
2627       coding->consumed_char++;
2628     }
2629
2630  label_end_of_loop:
2631   coding->consumed = src_base - source;
2632   coding->produced = coding->produced_char = dst - destination;
2633 }
2634
2635 \f
2636 /*** 4. SJIS and BIG5 handlers ***/
2637
2638 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2639    quite widely.  So, for the moment, Emacs supports them in the bare
2640    C code.  But, in the future, they may be supported only by CCL.  */
2641
2642 /* SJIS is a coding system encoding three character sets: ASCII, right
2643    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2644    as is.  A character of charset katakana-jisx0201 is encoded by
2645    "position-code + 0x80".  A character of charset japanese-jisx0208
2646    is encoded in 2-byte but two position-codes are divided and shifted
2647    so that it fit in the range below.
2648
2649    --- CODE RANGE of SJIS ---
2650    (character set)      (range)
2651    ASCII                0x00 .. 0x7F
2652    KATAKANA-JISX0201    0xA0 .. 0xDF
2653    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2654             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2655    -------------------------------
2656
2657 */
2658
2659 /* BIG5 is a coding system encoding two character sets: ASCII and
2660    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2661    character set and is encoded in two-byte.
2662
2663    --- CODE RANGE of BIG5 ---
2664    (character set)      (range)
2665    ASCII                0x00 .. 0x7F
2666    Big5 (1st byte)      0xA1 .. 0xFE
2667         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2668    --------------------------
2669
2670    Since the number of characters in Big5 is larger than maximum
2671    characters in Emacs' charset (96x96), it can't be handled as one
2672    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2673    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2674    contains frequently used characters and the latter contains less
2675    frequently used characters.  */
2676
2677 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2678    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2679    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2680    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2681
2682 /* Number of Big5 characters which have the same code in 1st byte.  */
2683 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2684
2685 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2686   do {                                                                  \
2687     unsigned int temp                                                   \
2688       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2689     if (b1 < 0xC9)                                                      \
2690       charset = charset_big5_1;                                         \
2691     else                                                                \
2692       {                                                                 \
2693         charset = charset_big5_2;                                       \
2694         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2695       }                                                                 \
2696     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2697     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2698   } while (0)
2699
2700 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2701   do {                                                                  \
2702     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2703     if (charset == charset_big5_2)                                      \
2704       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2705     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2706     b2 = temp % BIG5_SAME_ROW;                                          \
2707     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2708   } while (0)
2709
2710 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2711    Check if a text is encoded in SJIS.  If it is, return
2712    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2713
2714 static int
2715 detect_coding_sjis (src, src_end, multibytep)
2716      unsigned char *src, *src_end;
2717      int multibytep;
2718 {
2719   int c;
2720   /* Dummy for ONE_MORE_BYTE.  */
2721   struct coding_system dummy_coding;
2722   struct coding_system *coding = &dummy_coding;
2723
2724   while (1)
2725     {
2726       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2727       if (c >= 0x81)
2728         {
2729           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2730             {
2731               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2732               if (c < 0x40 || c == 0x7F || c > 0xFC)
2733                 return 0;
2734             }
2735           else if (c > 0xDF)
2736             return 0;
2737         }
2738     }
2739  label_end_of_loop:
2740   return CODING_CATEGORY_MASK_SJIS;
2741 }
2742
2743 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2744    Check if a text is encoded in BIG5.  If it is, return
2745    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2746
2747 static int
2748 detect_coding_big5 (src, src_end, multibytep)
2749      unsigned char *src, *src_end;
2750      int multibytep;
2751 {
2752   int c;
2753   /* Dummy for ONE_MORE_BYTE.  */
2754   struct coding_system dummy_coding;
2755   struct coding_system *coding = &dummy_coding;
2756
2757   while (1)
2758     {
2759       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2760       if (c >= 0xA1)
2761         {
2762           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2763           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2764             return 0;
2765         }
2766     }
2767  label_end_of_loop:
2768   return CODING_CATEGORY_MASK_BIG5;
2769 }
2770
2771 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2772    Check if a text is encoded in UTF-8.  If it is, return
2773    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2774
2775 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2776 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2777 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2778 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2779 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2780 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2781 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2782
2783 static int
2784 detect_coding_utf_8 (src, src_end, multibytep)
2785      unsigned char *src, *src_end;
2786      int multibytep;
2787 {
2788   unsigned char c;
2789   int seq_maybe_bytes;
2790   /* Dummy for ONE_MORE_BYTE.  */
2791   struct coding_system dummy_coding;
2792   struct coding_system *coding = &dummy_coding;
2793
2794   while (1)
2795     {
2796       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2797       if (UTF_8_1_OCTET_P (c))
2798         continue;
2799       else if (UTF_8_2_OCTET_LEADING_P (c))
2800         seq_maybe_bytes = 1;
2801       else if (UTF_8_3_OCTET_LEADING_P (c))
2802         seq_maybe_bytes = 2;
2803       else if (UTF_8_4_OCTET_LEADING_P (c))
2804         seq_maybe_bytes = 3;
2805       else if (UTF_8_5_OCTET_LEADING_P (c))
2806         seq_maybe_bytes = 4;
2807       else if (UTF_8_6_OCTET_LEADING_P (c))
2808         seq_maybe_bytes = 5;
2809       else
2810         return 0;
2811
2812       do
2813         {
2814           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2815           if (!UTF_8_EXTRA_OCTET_P (c))
2816             return 0;
2817           seq_maybe_bytes--;
2818         }
2819       while (seq_maybe_bytes > 0);
2820     }
2821
2822  label_end_of_loop:
2823   return CODING_CATEGORY_MASK_UTF_8;
2824 }
2825
2826 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2827    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2828    Little Endian (otherwise).  If it is, return
2829    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2830    else return 0.  */
2831
2832 #define UTF_16_INVALID_P(val)   \
2833   (((val) == 0xFFFE)            \
2834    || ((val) == 0xFFFF))
2835
2836 #define UTF_16_HIGH_SURROGATE_P(val) \
2837   (((val) & 0xD800) == 0xD800)
2838
2839 #define UTF_16_LOW_SURROGATE_P(val) \
2840   (((val) & 0xDC00) == 0xDC00)
2841
2842 static int
2843 detect_coding_utf_16 (src, src_end, multibytep)
2844      unsigned char *src, *src_end;
2845      int multibytep;
2846 {
2847   unsigned char c1, c2;
2848   /* Dummy for TWO_MORE_BYTES.  */
2849   struct coding_system dummy_coding;
2850   struct coding_system *coding = &dummy_coding;
2851
2852   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2853   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2854
2855   if ((c1 == 0xFF) && (c2 == 0xFE))
2856     return CODING_CATEGORY_MASK_UTF_16_LE;
2857   else if ((c1 == 0xFE) && (c2 == 0xFF))
2858     return CODING_CATEGORY_MASK_UTF_16_BE;
2859
2860  label_end_of_loop:
2861   return 0;
2862 }
2863
2864 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2865    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2866
2867 static void
2868 decode_coding_sjis_big5 (coding, source, destination,
2869                          src_bytes, dst_bytes, sjis_p)
2870      struct coding_system *coding;
2871      unsigned char *source, *destination;
2872      int src_bytes, dst_bytes;
2873      int sjis_p;
2874 {
2875   unsigned char *src = source;
2876   unsigned char *src_end = source + src_bytes;
2877   unsigned char *dst = destination;
2878   unsigned char *dst_end = destination + dst_bytes;
2879   /* SRC_BASE remembers the start position in source in each loop.
2880      The loop will be exited when there's not enough source code
2881      (within macro ONE_MORE_BYTE), or when there's not enough
2882      destination area to produce a character (within macro
2883      EMIT_CHAR).  */
2884   unsigned char *src_base;
2885   Lisp_Object translation_table;
2886
2887   if (NILP (Venable_character_translation))
2888     translation_table = Qnil;
2889   else
2890     {
2891       translation_table = coding->translation_table_for_decode;
2892       if (NILP (translation_table))
2893         translation_table = Vstandard_translation_table_for_decode;
2894     }
2895
2896   coding->produced_char = 0;
2897   while (1)
2898     {
2899       int c, charset, c1, c2;
2900
2901       src_base = src;
2902       ONE_MORE_BYTE (c1);
2903
2904       if (c1 < 0x80)
2905         {
2906           charset = CHARSET_ASCII;
2907           if (c1 < 0x20)
2908             {
2909               if (c1 == '\r')
2910                 {
2911                   if (coding->eol_type == CODING_EOL_CRLF)
2912                     {
2913                       ONE_MORE_BYTE (c2);
2914                       if (c2 == '\n')
2915                         c1 = c2;
2916                       else if (coding->mode
2917                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2918                         {
2919                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2920                           goto label_end_of_loop;
2921                         }
2922                       else
2923                         /* To process C2 again, SRC is subtracted by 1.  */
2924                         src--;
2925                     }
2926                   else if (coding->eol_type == CODING_EOL_CR)
2927                     c1 = '\n';
2928                 }
2929               else if (c1 == '\n'
2930                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2931                        && (coding->eol_type == CODING_EOL_CR
2932                            || coding->eol_type == CODING_EOL_CRLF))
2933                 {
2934                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2935                   goto label_end_of_loop;
2936                 }
2937             }
2938         }
2939       else
2940         {
2941           if (sjis_p)
2942             {
2943               if (c1 >= 0xF0)
2944                 goto label_invalid_code;
2945               if (c1 < 0xA0 || c1 >= 0xE0)
2946                 {
2947                   /* SJIS -> JISX0208 */
2948                   ONE_MORE_BYTE (c2);
2949                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2950                     goto label_invalid_code;
2951                   DECODE_SJIS (c1, c2, c1, c2);
2952                   charset = charset_jisx0208;
2953                 }
2954               else
2955                 /* SJIS -> JISX0201-Kana */
2956                 charset = charset_katakana_jisx0201;
2957             }
2958           else
2959             {
2960               /* BIG5 -> Big5 */
2961               if (c1 < 0xA1 || c1 > 0xFE)
2962                 goto label_invalid_code;
2963               ONE_MORE_BYTE (c2);
2964               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2965                 goto label_invalid_code;
2966               DECODE_BIG5 (c1, c2, charset, c1, c2);
2967             }
2968         }
2969
2970       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2971       EMIT_CHAR (c);
2972       continue;
2973
2974     label_invalid_code:
2975       coding->errors++;
2976       src = src_base;
2977       c = *src++;
2978       EMIT_CHAR (c);
2979     }
2980
2981  label_end_of_loop:
2982   coding->consumed = coding->consumed_char = src_base - source;
2983   coding->produced = dst - destination;
2984   return;
2985 }
2986
2987 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2988    This function can encode charsets `ascii', `katakana-jisx0201',
2989    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2990    are sure that all these charsets are registered as official charset
2991    (i.e. do not have extended leading-codes).  Characters of other
2992    charsets are produced without any encoding.  If SJIS_P is 1, encode
2993    SJIS text, else encode BIG5 text.  */
2994
2995 static void
2996 encode_coding_sjis_big5 (coding, source, destination,
2997                          src_bytes, dst_bytes, sjis_p)
2998      struct coding_system *coding;
2999      unsigned char *source, *destination;
3000      int src_bytes, dst_bytes;
3001      int sjis_p;
3002 {
3003   unsigned char *src = source;
3004   unsigned char *src_end = source + src_bytes;
3005   unsigned char *dst = destination;
3006   unsigned char *dst_end = destination + dst_bytes;
3007   /* SRC_BASE remembers the start position in source in each loop.
3008      The loop will be exited when there's not enough source text to
3009      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3010      there's not enough destination area to produce encoded codes
3011      (within macro EMIT_BYTES).  */
3012   unsigned char *src_base;
3013   Lisp_Object translation_table;
3014
3015   if (NILP (Venable_character_translation))
3016     translation_table = Qnil;
3017   else
3018     {
3019       translation_table = coding->translation_table_for_encode;
3020       if (NILP (translation_table))
3021         translation_table = Vstandard_translation_table_for_encode;
3022     }
3023
3024   while (1)
3025     {
3026       int c, charset, c1, c2;
3027
3028       src_base = src;
3029       ONE_MORE_CHAR (c);
3030
3031       /* Now encode the character C.  */
3032       if (SINGLE_BYTE_CHAR_P (c))
3033         {
3034           switch (c)
3035             {
3036             case '\r':
3037               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3038                 {
3039                   EMIT_ONE_BYTE (c);
3040                   break;
3041                 }
3042               c = '\n';
3043             case '\n':
3044               if (coding->eol_type == CODING_EOL_CRLF)
3045                 {
3046                   EMIT_TWO_BYTES ('\r', c);
3047                   break;
3048                 }
3049               else if (coding->eol_type == CODING_EOL_CR)
3050                 c = '\r';
3051             default:
3052               EMIT_ONE_BYTE (c);
3053             }
3054         }
3055       else
3056         {
3057           SPLIT_CHAR (c, charset, c1, c2);
3058           if (sjis_p)
3059             {
3060               if (charset == charset_jisx0208
3061                   || charset == charset_jisx0208_1978)
3062                 {
3063                   ENCODE_SJIS (c1, c2, c1, c2);
3064                   EMIT_TWO_BYTES (c1, c2);
3065                 }
3066               else if (charset == charset_katakana_jisx0201)
3067                 EMIT_ONE_BYTE (c1 | 0x80);
3068               else if (charset == charset_latin_jisx0201)
3069                 EMIT_ONE_BYTE (c1);
3070               else
3071                 /* There's no way other than producing the internal
3072                    codes as is.  */
3073                 EMIT_BYTES (src_base, src);
3074             }
3075           else
3076             {
3077               if (charset == charset_big5_1 || charset == charset_big5_2)
3078                 {
3079                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3080                   EMIT_TWO_BYTES (c1, c2);
3081                 }
3082               else
3083                 /* There's no way other than producing the internal
3084                    codes as is.  */
3085                 EMIT_BYTES (src_base, src);
3086             }
3087         }
3088       coding->consumed_char++;
3089     }
3090
3091  label_end_of_loop:
3092   coding->consumed = src_base - source;
3093   coding->produced = coding->produced_char = dst - destination;
3094 }
3095
3096 \f
3097 /*** 5. CCL handlers ***/
3098
3099 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3100    Check if a text is encoded in a coding system of which
3101    encoder/decoder are written in CCL program.  If it is, return
3102    CODING_CATEGORY_MASK_CCL, else return 0.  */
3103
3104 static int
3105 detect_coding_ccl (src, src_end, multibytep)
3106      unsigned char *src, *src_end;
3107      int multibytep;
3108 {
3109   unsigned char *valid;
3110   int c;
3111   /* Dummy for ONE_MORE_BYTE.  */
3112   struct coding_system dummy_coding;
3113   struct coding_system *coding = &dummy_coding;
3114
3115   /* No coding system is assigned to coding-category-ccl.  */
3116   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3117     return 0;
3118
3119   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3120   while (1)
3121     {
3122       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3123       if (! valid[c])
3124         return 0;
3125     }
3126  label_end_of_loop:
3127   return CODING_CATEGORY_MASK_CCL;
3128 }
3129
3130 \f
3131 /*** 6. End-of-line handlers ***/
3132
3133 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3134
3135 static void
3136 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3137      struct coding_system *coding;
3138      unsigned char *source, *destination;
3139      int src_bytes, dst_bytes;
3140 {
3141   unsigned char *src = source;
3142   unsigned char *dst = destination;
3143   unsigned char *src_end = src + src_bytes;
3144   unsigned char *dst_end = dst + dst_bytes;
3145   Lisp_Object translation_table;
3146   /* SRC_BASE remembers the start position in source in each loop.
3147      The loop will be exited when there's not enough source code
3148      (within macro ONE_MORE_BYTE), or when there's not enough
3149      destination area to produce a character (within macro
3150      EMIT_CHAR).  */
3151   unsigned char *src_base;
3152   int c;
3153
3154   translation_table = Qnil;
3155   switch (coding->eol_type)
3156     {
3157     case CODING_EOL_CRLF:
3158       while (1)
3159         {
3160           src_base = src;
3161           ONE_MORE_BYTE (c);
3162           if (c == '\r')
3163             {
3164               ONE_MORE_BYTE (c);
3165               if (c != '\n')
3166                 {
3167                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3168                     {
3169                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
3170                       goto label_end_of_loop;
3171                     }
3172                   src--;
3173                   c = '\r';
3174                 }
3175             }
3176           else if (c == '\n'
3177                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3178             {
3179               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3180               goto label_end_of_loop;
3181             }
3182           EMIT_CHAR (c);
3183         }
3184       break;
3185
3186     case CODING_EOL_CR:
3187       while (1)
3188         {
3189           src_base = src;
3190           ONE_MORE_BYTE (c);
3191           if (c == '\n')
3192             {
3193               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3194                 {
3195                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3196                   goto label_end_of_loop;
3197                 }
3198             }
3199           else if (c == '\r')
3200             c = '\n';
3201           EMIT_CHAR (c);
3202         }
3203       break;
3204
3205     default:                    /* no need for EOL handling */
3206       while (1)
3207         {
3208           src_base = src;
3209           ONE_MORE_BYTE (c);
3210           EMIT_CHAR (c);
3211         }
3212     }
3213
3214  label_end_of_loop:
3215   coding->consumed = coding->consumed_char = src_base - source;
3216   coding->produced = dst - destination;
3217   return;
3218 }
3219
3220 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3221    format of end-of-line according to `coding->eol_type'.  It also
3222    convert multibyte form 8-bit characers to unibyte if
3223    CODING->src_multibyte is nonzero.  If `coding->mode &
3224    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3225    also means end-of-line.  */
3226
3227 static void
3228 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3229      struct coding_system *coding;
3230      unsigned char *source, *destination;
3231      int src_bytes, dst_bytes;
3232 {
3233   unsigned char *src = source;
3234   unsigned char *dst = destination;
3235   unsigned char *src_end = src + src_bytes;
3236   unsigned char *dst_end = dst + dst_bytes;
3237   Lisp_Object translation_table;
3238   /* SRC_BASE remembers the start position in source in each loop.
3239      The loop will be exited when there's not enough source text to
3240      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3241      there's not enough destination area to produce encoded codes
3242      (within macro EMIT_BYTES).  */
3243   unsigned char *src_base;
3244   int c;
3245   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3246
3247   translation_table = Qnil;
3248   if (coding->src_multibyte
3249       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3250     {
3251       src_end--;
3252       src_bytes--;
3253       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3254     }
3255
3256   if (coding->eol_type == CODING_EOL_CRLF)
3257     {
3258       while (src < src_end)
3259         {
3260           src_base = src;
3261           c = *src++;
3262           if (c >= 0x20)
3263             EMIT_ONE_BYTE (c);
3264           else if (c == '\n' || (c == '\r' && selective_display))
3265             EMIT_TWO_BYTES ('\r', '\n');
3266           else
3267             EMIT_ONE_BYTE (c);
3268         }
3269       src_base = src;
3270     label_end_of_loop:
3271       ;
3272     }
3273   else
3274     {
3275       if (!dst_bytes || src_bytes <= dst_bytes)
3276         {
3277           safe_bcopy (src, dst, src_bytes);
3278           src_base = src_end;
3279           dst += src_bytes;
3280         }
3281       else
3282         {
3283           if (coding->src_multibyte
3284               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3285             dst_bytes--;
3286           safe_bcopy (src, dst, dst_bytes);
3287           src_base = src + dst_bytes;
3288           dst = destination + dst_bytes;
3289           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3290         }
3291       if (coding->eol_type == CODING_EOL_CR)
3292         {
3293           for (src = destination; src < dst; src++)
3294             if (*src == '\n') *src = '\r';
3295         }
3296       else if (selective_display)
3297         {
3298           for (src = destination; src < dst; src++)
3299             if (*src == '\r') *src = '\n';
3300         }
3301     }
3302   if (coding->src_multibyte)
3303     dst = destination + str_as_unibyte (destination, dst - destination);
3304
3305   coding->consumed = src_base - source;
3306   coding->produced = dst - destination;
3307   coding->produced_char = coding->produced;
3308 }
3309
3310 \f
3311 /*** 7. C library functions ***/
3312
3313 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
3314    has a property `coding-system'.  The value of this property is a
3315    vector of length 5 (called as coding-vector).  Among elements of
3316    this vector, the first (element[0]) and the fifth (element[4])
3317    carry important information for decoding/encoding.  Before
3318    decoding/encoding, this information should be set in fields of a
3319    structure of type `coding_system'.
3320
3321    A value of property `coding-system' can be a symbol of another
3322    subsidiary coding-system.  In that case, Emacs gets coding-vector
3323    from that symbol.
3324
3325    `element[0]' contains information to be set in `coding->type'.  The
3326    value and its meaning is as follows:
3327
3328    0 -- coding_type_emacs_mule
3329    1 -- coding_type_sjis
3330    2 -- coding_type_iso2022
3331    3 -- coding_type_big5
3332    4 -- coding_type_ccl encoder/decoder written in CCL
3333    nil -- coding_type_no_conversion
3334    t -- coding_type_undecided (automatic conversion on decoding,
3335                                no-conversion on encoding)
3336
3337    `element[4]' contains information to be set in `coding->flags' and
3338    `coding->spec'.  The meaning varies by `coding->type'.
3339
3340    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3341    of length 32 (of which the first 13 sub-elements are used now).
3342    Meanings of these sub-elements are:
3343
3344    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3345         If the value is an integer of valid charset, the charset is
3346         assumed to be designated to graphic register N initially.
3347
3348         If the value is minus, it is a minus value of charset which
3349         reserves graphic register N, which means that the charset is
3350         not designated initially but should be designated to graphic
3351         register N just before encoding a character in that charset.
3352
3353         If the value is nil, graphic register N is never used on
3354         encoding.
3355
3356    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3357         Each value takes t or nil.  See the section ISO2022 of
3358         `coding.h' for more information.
3359
3360    If `coding->type' is `coding_type_big5', element[4] is t to denote
3361    BIG5-ETen or nil to denote BIG5-HKU.
3362
3363    If `coding->type' takes the other value, element[4] is ignored.
3364
3365    Emacs Lisp's coding system also carries information about format of
3366    end-of-line in a value of property `eol-type'.  If the value is
3367    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3368    means CODING_EOL_CR.  If it is not integer, it should be a vector
3369    of subsidiary coding systems of which property `eol-type' has one
3370    of above values.
3371
3372 */
3373
3374 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3375    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3376    is setup so that no conversion is necessary and return -1, else
3377    return 0.  */
3378
3379 int
3380 setup_coding_system (coding_system, coding)
3381      Lisp_Object coding_system;
3382      struct coding_system *coding;
3383 {
3384   Lisp_Object coding_spec, coding_type, eol_type, plist;
3385   Lisp_Object val;
3386
3387   /* At first, zero clear all members.  */
3388   bzero (coding, sizeof (struct coding_system));
3389
3390   /* Initialize some fields required for all kinds of coding systems.  */
3391   coding->symbol = coding_system;
3392   coding->heading_ascii = -1;
3393   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3394   coding->composing = COMPOSITION_DISABLED;
3395   coding->cmp_data = NULL;
3396
3397   if (NILP (coding_system))
3398     goto label_invalid_coding_system;
3399
3400   coding_spec = Fget (coding_system, Qcoding_system);
3401
3402   if (!VECTORP (coding_spec)
3403       || XVECTOR (coding_spec)->size != 5
3404       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3405     goto label_invalid_coding_system;
3406
3407   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3408   if (VECTORP (eol_type))
3409     {
3410       coding->eol_type = CODING_EOL_UNDECIDED;
3411       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3412     }
3413   else if (XFASTINT (eol_type) == 1)
3414     {
3415       coding->eol_type = CODING_EOL_CRLF;
3416       coding->common_flags
3417         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3418     }
3419   else if (XFASTINT (eol_type) == 2)
3420     {
3421       coding->eol_type = CODING_EOL_CR;
3422       coding->common_flags
3423         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3424     }
3425   else
3426     coding->eol_type = CODING_EOL_LF;
3427
3428   coding_type = XVECTOR (coding_spec)->contents[0];
3429   /* Try short cut.  */
3430   if (SYMBOLP (coding_type))
3431     {
3432       if (EQ (coding_type, Qt))
3433         {
3434           coding->type = coding_type_undecided;
3435           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3436         }
3437       else
3438         coding->type = coding_type_no_conversion;
3439       /* Initialize this member.  Any thing other than
3440          CODING_CATEGORY_IDX_UTF_16_BE and
3441          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3442          special treatment in detect_eol.  */
3443       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3444
3445       return 0;
3446     }
3447
3448   /* Get values of coding system properties:
3449      `post-read-conversion', `pre-write-conversion',
3450      `translation-table-for-decode', `translation-table-for-encode'.  */
3451   plist = XVECTOR (coding_spec)->contents[3];
3452   /* Pre & post conversion functions should be disabled if
3453      inhibit_eol_conversion is nozero.  This is the case that a code
3454      conversion function is called while those functions are running.  */
3455   if (! inhibit_pre_post_conversion)
3456     {
3457       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3458       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3459     }
3460   val = Fplist_get (plist, Qtranslation_table_for_decode);
3461   if (SYMBOLP (val))
3462     val = Fget (val, Qtranslation_table_for_decode);
3463   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3464   val = Fplist_get (plist, Qtranslation_table_for_encode);
3465   if (SYMBOLP (val))
3466     val = Fget (val, Qtranslation_table_for_encode);
3467   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3468   val = Fplist_get (plist, Qcoding_category);
3469   if (!NILP (val))
3470     {
3471       val = Fget (val, Qcoding_category_index);
3472       if (INTEGERP (val))
3473         coding->category_idx = XINT (val);
3474       else
3475         goto label_invalid_coding_system;
3476     }
3477   else
3478     goto label_invalid_coding_system;
3479
3480   /* If the coding system has non-nil `composition' property, enable
3481      composition handling.  */
3482   val = Fplist_get (plist, Qcomposition);
3483   if (!NILP (val))
3484     coding->composing = COMPOSITION_NO;
3485
3486   switch (XFASTINT (coding_type))
3487     {
3488     case 0:
3489       coding->type = coding_type_emacs_mule;
3490       coding->common_flags
3491         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3492       coding->composing = COMPOSITION_NO;
3493       if (!NILP (coding->post_read_conversion))
3494         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3495       if (!NILP (coding->pre_write_conversion))
3496         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3497       break;
3498
3499     case 1:
3500       coding->type = coding_type_sjis;
3501       coding->common_flags
3502         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3503       break;
3504
3505     case 2:
3506       coding->type = coding_type_iso2022;
3507       coding->common_flags
3508         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3509       {
3510         Lisp_Object val, temp;
3511         Lisp_Object *flags;
3512         int i, charset, reg_bits = 0;
3513
3514         val = XVECTOR (coding_spec)->contents[4];
3515
3516         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3517           goto label_invalid_coding_system;
3518
3519         flags = XVECTOR (val)->contents;
3520         coding->flags
3521           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3522              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3523              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3524              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3525              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3526              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3527              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3528              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3529              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3530              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3531              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3532              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3533              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3534              );
3535
3536         /* Invoke graphic register 0 to plane 0.  */
3537         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3538         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3539         CODING_SPEC_ISO_INVOCATION (coding, 1)
3540           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3541         /* Not single shifting at first.  */
3542         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3543         /* Beginning of buffer should also be regarded as bol. */
3544         CODING_SPEC_ISO_BOL (coding) = 1;
3545
3546         for (charset = 0; charset <= MAX_CHARSET; charset++)
3547           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3548         val = Vcharset_revision_alist;
3549         while (CONSP (val))
3550           {
3551             charset = get_charset_id (Fcar_safe (XCAR (val)));
3552             if (charset >= 0
3553                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3554                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3555               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3556             val = XCDR (val);
3557           }
3558
3559         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3560            FLAGS[REG] can be one of below:
3561                 integer CHARSET: CHARSET occupies register I,
3562                 t: designate nothing to REG initially, but can be used
3563                   by any charsets,
3564                 list of integer, nil, or t: designate the first
3565                   element (if integer) to REG initially, the remaining
3566                   elements (if integer) is designated to REG on request,
3567                   if an element is t, REG can be used by any charsets,
3568                 nil: REG is never used.  */
3569         for (charset = 0; charset <= MAX_CHARSET; charset++)
3570           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3571             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3572         for (i = 0; i < 4; i++)
3573           {
3574             if (INTEGERP (flags[i])
3575                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3576                 || (charset = get_charset_id (flags[i])) >= 0)
3577               {
3578                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3579                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3580               }
3581             else if (EQ (flags[i], Qt))
3582               {
3583                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3584                 reg_bits |= 1 << i;
3585                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3586               }
3587             else if (CONSP (flags[i]))
3588               {
3589                 Lisp_Object tail;
3590                 tail = flags[i];
3591
3592                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3593                 if (INTEGERP (XCAR (tail))
3594                     && (charset = XINT (XCAR (tail)),
3595                         CHARSET_VALID_P (charset))
3596                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3597                   {
3598                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3599                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3600                   }
3601                 else
3602                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3603                 tail = XCDR (tail);
3604                 while (CONSP (tail))
3605                   {
3606                     if (INTEGERP (XCAR (tail))
3607                         && (charset = XINT (XCAR (tail)),
3608                             CHARSET_VALID_P (charset))
3609                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3610                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3611                         = i;
3612                     else if (EQ (XCAR (tail), Qt))
3613                       reg_bits |= 1 << i;
3614                     tail = XCDR (tail);
3615                   }
3616               }
3617             else
3618               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3619
3620             CODING_SPEC_ISO_DESIGNATION (coding, i)
3621               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3622           }
3623
3624         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3625           {
3626             /* REG 1 can be used only by locking shift in 7-bit env.  */
3627             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3628               reg_bits &= ~2;
3629             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3630               /* Without any shifting, only REG 0 and 1 can be used.  */
3631               reg_bits &= 3;
3632           }
3633
3634         if (reg_bits)
3635           for (charset = 0; charset <= MAX_CHARSET; charset++)
3636             {
3637               if (CHARSET_VALID_P (charset)
3638                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3639                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3640                 {
3641                   /* There exist some default graphic registers to be
3642                      used by CHARSET.  */
3643
3644                   /* We had better avoid designating a charset of
3645                      CHARS96 to REG 0 as far as possible.  */
3646                   if (CHARSET_CHARS (charset) == 96)
3647                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3648                       = (reg_bits & 2
3649                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3650                   else
3651                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3652                       = (reg_bits & 1
3653                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3654                 }
3655             }
3656       }
3657       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3658       coding->spec.iso2022.last_invalid_designation_register = -1;
3659       break;
3660
3661     case 3:
3662       coding->type = coding_type_big5;
3663       coding->common_flags
3664         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3665       coding->flags
3666         = (NILP (XVECTOR (coding_spec)->contents[4])
3667            ? CODING_FLAG_BIG5_HKU
3668            : CODING_FLAG_BIG5_ETEN);
3669       break;
3670
3671     case 4:
3672       coding->type = coding_type_ccl;
3673       coding->common_flags
3674         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3675       {
3676         val = XVECTOR (coding_spec)->contents[4];
3677         if (! CONSP (val)
3678             || setup_ccl_program (&(coding->spec.ccl.decoder),
3679                                   XCAR (val)) < 0
3680             || setup_ccl_program (&(coding->spec.ccl.encoder),
3681                                   XCDR (val)) < 0)
3682           goto label_invalid_coding_system;
3683
3684         bzero (coding->spec.ccl.valid_codes, 256);
3685         val = Fplist_get (plist, Qvalid_codes);
3686         if (CONSP (val))
3687           {
3688             Lisp_Object this;
3689
3690             for (; CONSP (val); val = XCDR (val))
3691               {
3692                 this = XCAR (val);
3693                 if (INTEGERP (this)
3694                     && XINT (this) >= 0 && XINT (this) < 256)
3695                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3696                 else if (CONSP (this)
3697                          && INTEGERP (XCAR (this))
3698                          && INTEGERP (XCDR (this)))
3699                   {
3700                     int start = XINT (XCAR (this));
3701                     int end = XINT (XCDR (this));
3702
3703                     if (start >= 0 && start <= end && end < 256)
3704                       while (start <= end)
3705                         coding->spec.ccl.valid_codes[start++] = 1;
3706                   }
3707               }
3708           }
3709       }
3710       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3711       coding->spec.ccl.cr_carryover = 0;
3712       coding->spec.ccl.eight_bit_carryover[0] = 0;
3713       break;
3714
3715     case 5:
3716       coding->type = coding_type_raw_text;
3717       break;
3718
3719     default:
3720       goto label_invalid_coding_system;
3721     }
3722   return 0;
3723
3724  label_invalid_coding_system:
3725   coding->type = coding_type_no_conversion;
3726   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3727   coding->common_flags = 0;
3728   coding->eol_type = CODING_EOL_LF;
3729   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3730   return -1;
3731 }
3732
3733 /* Free memory blocks allocated for storing composition information.  */
3734
3735 void
3736 coding_free_composition_data (coding)
3737      struct coding_system *coding;
3738 {
3739   struct composition_data *cmp_data = coding->cmp_data, *next;
3740
3741   if (!cmp_data)
3742     return;
3743   /* Memory blocks are chained.  At first, rewind to the first, then,
3744      free blocks one by one.  */
3745   while (cmp_data->prev)
3746     cmp_data = cmp_data->prev;
3747   while (cmp_data)
3748     {
3749       next = cmp_data->next;
3750       xfree (cmp_data);
3751       cmp_data = next;
3752     }
3753   coding->cmp_data = NULL;
3754 }
3755
3756 /* Set `char_offset' member of all memory blocks pointed by
3757    coding->cmp_data to POS.  */
3758
3759 void
3760 coding_adjust_composition_offset (coding, pos)
3761      struct coding_system *coding;
3762      int pos;
3763 {
3764   struct composition_data *cmp_data;
3765
3766   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3767     cmp_data->char_offset = pos;
3768 }
3769
3770 /* Setup raw-text or one of its subsidiaries in the structure
3771    coding_system CODING according to the already setup value eol_type
3772    in CODING.  CODING should be setup for some coding system in
3773    advance.  */
3774
3775 void
3776 setup_raw_text_coding_system (coding)
3777      struct coding_system *coding;
3778 {
3779   if (coding->type != coding_type_raw_text)
3780     {
3781       coding->symbol = Qraw_text;
3782       coding->type = coding_type_raw_text;
3783       if (coding->eol_type != CODING_EOL_UNDECIDED)
3784         {
3785           Lisp_Object subsidiaries;
3786           subsidiaries = Fget (Qraw_text, Qeol_type);
3787
3788           if (VECTORP (subsidiaries)
3789               && XVECTOR (subsidiaries)->size == 3)
3790             coding->symbol
3791               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3792         }
3793       setup_coding_system (coding->symbol, coding);
3794     }
3795   return;
3796 }
3797
3798 /* Emacs has a mechanism to automatically detect a coding system if it
3799    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3800    it's impossible to distinguish some coding systems accurately
3801    because they use the same range of codes.  So, at first, coding
3802    systems are categorized into 7, those are:
3803
3804    o coding-category-emacs-mule
3805
3806         The category for a coding system which has the same code range
3807         as Emacs' internal format.  Assigned the coding-system (Lisp
3808         symbol) `emacs-mule' by default.
3809
3810    o coding-category-sjis
3811
3812         The category for a coding system which has the same code range
3813         as SJIS.  Assigned the coding-system (Lisp
3814         symbol) `japanese-shift-jis' by default.
3815
3816    o coding-category-iso-7
3817
3818         The category for a coding system which has the same code range
3819         as ISO2022 of 7-bit environment.  This doesn't use any locking
3820         shift and single shift functions.  This can encode/decode all
3821         charsets.  Assigned the coding-system (Lisp symbol)
3822         `iso-2022-7bit' by default.
3823
3824    o coding-category-iso-7-tight
3825
3826         Same as coding-category-iso-7 except that this can
3827         encode/decode only the specified charsets.
3828
3829    o coding-category-iso-8-1
3830
3831         The category for a coding system which has the same code range
3832         as ISO2022 of 8-bit environment and graphic plane 1 used only
3833         for DIMENSION1 charset.  This doesn't use any locking shift
3834         and single shift functions.  Assigned the coding-system (Lisp
3835         symbol) `iso-latin-1' by default.
3836
3837    o coding-category-iso-8-2
3838
3839         The category for a coding system which has the same code range
3840         as ISO2022 of 8-bit environment and graphic plane 1 used only
3841         for DIMENSION2 charset.  This doesn't use any locking shift
3842         and single shift functions.  Assigned the coding-system (Lisp
3843         symbol) `japanese-iso-8bit' by default.
3844
3845    o coding-category-iso-7-else
3846
3847         The category for a coding system which has the same code range
3848         as ISO2022 of 7-bit environemnt but uses locking shift or
3849         single shift functions.  Assigned the coding-system (Lisp
3850         symbol) `iso-2022-7bit-lock' by default.
3851
3852    o coding-category-iso-8-else
3853
3854         The category for a coding system which has the same code range
3855         as ISO2022 of 8-bit environemnt but uses locking shift or
3856         single shift functions.  Assigned the coding-system (Lisp
3857         symbol) `iso-2022-8bit-ss2' by default.
3858
3859    o coding-category-big5
3860
3861         The category for a coding system which has the same code range
3862         as BIG5.  Assigned the coding-system (Lisp symbol)
3863         `cn-big5' by default.
3864
3865    o coding-category-utf-8
3866
3867         The category for a coding system which has the same code range
3868         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3869         symbol) `utf-8' by default.
3870
3871    o coding-category-utf-16-be
3872
3873         The category for a coding system in which a text has an
3874         Unicode signature (cf. Unicode Standard) in the order of BIG
3875         endian at the head.  Assigned the coding-system (Lisp symbol)
3876         `utf-16-be' by default.
3877
3878    o coding-category-utf-16-le
3879
3880         The category for a coding system in which a text has an
3881         Unicode signature (cf. Unicode Standard) in the order of
3882         LITTLE endian at the head.  Assigned the coding-system (Lisp
3883         symbol) `utf-16-le' by default.
3884
3885    o coding-category-ccl
3886
3887         The category for a coding system of which encoder/decoder is
3888         written in CCL programs.  The default value is nil, i.e., no
3889         coding system is assigned.
3890
3891    o coding-category-binary
3892
3893         The category for a coding system not categorized in any of the
3894         above.  Assigned the coding-system (Lisp symbol)
3895         `no-conversion' by default.
3896
3897    Each of them is a Lisp symbol and the value is an actual
3898    `coding-system's (this is also a Lisp symbol) assigned by a user.
3899    What Emacs does actually is to detect a category of coding system.
3900    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3901    decide only one possible category, it selects a category of the
3902    highest priority.  Priorities of categories are also specified by a
3903    user in a Lisp variable `coding-category-list'.
3904
3905 */
3906
3907 static
3908 int ascii_skip_code[256];
3909
3910 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3911    If it detects possible coding systems, return an integer in which
3912    appropriate flag bits are set.  Flag bits are defined by macros
3913    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3914    it should point the table `coding_priorities'.  In that case, only
3915    the flag bit for a coding system of the highest priority is set in
3916    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3917    range 0x80..0x9F are in multibyte form.
3918
3919    How many ASCII characters are at the head is returned as *SKIP.  */
3920
3921 static int
3922 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3923      unsigned char *source;
3924      int src_bytes, *priorities, *skip;
3925      int multibytep;
3926 {
3927   register unsigned char c;
3928   unsigned char *src = source, *src_end = source + src_bytes;
3929   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3930   int i;
3931
3932   /* At first, skip all ASCII characters and control characters except
3933      for three ISO2022 specific control characters.  */
3934   ascii_skip_code[ISO_CODE_SO] = 0;
3935   ascii_skip_code[ISO_CODE_SI] = 0;
3936   ascii_skip_code[ISO_CODE_ESC] = 0;
3937
3938  label_loop_detect_coding:
3939   while (src < src_end && ascii_skip_code[*src]) src++;
3940   *skip = src - source;
3941
3942   if (src >= src_end)
3943     /* We found nothing other than ASCII.  There's nothing to do.  */
3944     return 0;
3945
3946   c = *src;
3947   /* The text seems to be encoded in some multilingual coding system.
3948      Now, try to find in which coding system the text is encoded.  */
3949   if (c < 0x80)
3950     {
3951       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3952       /* C is an ISO2022 specific control code of C0.  */
3953       mask = detect_coding_iso2022 (src, src_end, multibytep);
3954       if (mask == 0)
3955         {
3956           /* No valid ISO2022 code follows C.  Try again.  */
3957           src++;
3958           if (c == ISO_CODE_ESC)
3959             ascii_skip_code[ISO_CODE_ESC] = 1;
3960           else
3961             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3962           goto label_loop_detect_coding;
3963         }
3964       if (priorities)
3965         {
3966           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3967             {
3968               if (mask & priorities[i])
3969                 return priorities[i];
3970             }
3971           return CODING_CATEGORY_MASK_RAW_TEXT;
3972         }
3973     }
3974   else
3975     {
3976       int try;
3977
3978       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3979         c = *src++ - 0x20;
3980
3981       if (c < 0xA0)
3982         {
3983           /* C is the first byte of SJIS character code,
3984              or a leading-code of Emacs' internal format (emacs-mule),
3985              or the first byte of UTF-16.  */
3986           try = (CODING_CATEGORY_MASK_SJIS
3987                   | CODING_CATEGORY_MASK_EMACS_MULE
3988                   | CODING_CATEGORY_MASK_UTF_16_BE
3989                   | CODING_CATEGORY_MASK_UTF_16_LE);
3990
3991           /* Or, if C is a special latin extra code,
3992              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3993              or is an ISO2022 control-sequence-introducer (CSI),
3994              we should also consider the possibility of ISO2022 codings.  */
3995           if ((VECTORP (Vlatin_extra_code_table)
3996                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3997               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3998               || (c == ISO_CODE_CSI
3999                   && (src < src_end
4000                       && (*src == ']'
4001                           || ((*src == '0' || *src == '1' || *src == '2')
4002                               && src + 1 < src_end
4003                               && src[1] == ']')))))
4004             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4005                      | CODING_CATEGORY_MASK_ISO_8BIT);
4006         }
4007       else
4008         /* C is a character of ISO2022 in graphic plane right,
4009            or a SJIS's 1-byte character code (i.e. JISX0201),
4010            or the first byte of BIG5's 2-byte code,
4011            or the first byte of UTF-8/16.  */
4012         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4013                 | CODING_CATEGORY_MASK_ISO_8BIT
4014                 | CODING_CATEGORY_MASK_SJIS
4015                 | CODING_CATEGORY_MASK_BIG5
4016                 | CODING_CATEGORY_MASK_UTF_8
4017                 | CODING_CATEGORY_MASK_UTF_16_BE
4018                 | CODING_CATEGORY_MASK_UTF_16_LE);
4019
4020       /* Or, we may have to consider the possibility of CCL.  */
4021       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4022           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4023               ->spec.ccl.valid_codes)[c])
4024         try |= CODING_CATEGORY_MASK_CCL;
4025
4026       mask = 0;
4027       utf16_examined_p = iso2022_examined_p = 0;
4028       if (priorities)
4029         {
4030           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4031             {
4032               if (!iso2022_examined_p
4033                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4034                 {
4035                   mask |= detect_coding_iso2022 (src, src_end);
4036                   iso2022_examined_p = 1;
4037                 }
4038               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4039                 mask |= detect_coding_sjis (src, src_end, multibytep);
4040               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4041                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4042               else if (!utf16_examined_p
4043                        && (priorities[i] & try &
4044                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4045                 {
4046                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4047                   utf16_examined_p = 1;
4048                 }
4049               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4050                 mask |= detect_coding_big5 (src, src_end, multibytep);
4051               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4052                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4053               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4054                 mask |= detect_coding_ccl (src, src_end, multibytep);
4055               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4056                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4057               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4058                 mask |= CODING_CATEGORY_MASK_BINARY;
4059               if (mask & priorities[i])
4060                 return priorities[i];
4061             }
4062           return CODING_CATEGORY_MASK_RAW_TEXT;
4063         }
4064       if (try & CODING_CATEGORY_MASK_ISO)
4065         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4066       if (try & CODING_CATEGORY_MASK_SJIS)
4067         mask |= detect_coding_sjis (src, src_end, multibytep);
4068       if (try & CODING_CATEGORY_MASK_BIG5)
4069         mask |= detect_coding_big5 (src, src_end, multibytep);
4070       if (try & CODING_CATEGORY_MASK_UTF_8)
4071         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4072       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4073         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4074       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4075         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4076       if (try & CODING_CATEGORY_MASK_CCL)
4077         mask |= detect_coding_ccl (src, src_end, multibytep);
4078     }
4079   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4080 }
4081
4082 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4083    The information of the detected coding system is set in CODING.  */
4084
4085 void
4086 detect_coding (coding, src, src_bytes)
4087      struct coding_system *coding;
4088      unsigned char *src;
4089      int src_bytes;
4090 {
4091   unsigned int idx;
4092   int skip, mask;
4093   Lisp_Object val;
4094
4095   val = Vcoding_category_list;
4096   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4097                              coding->src_multibyte);
4098   coding->heading_ascii = skip;
4099
4100   if (!mask) return;
4101
4102   /* We found a single coding system of the highest priority in MASK.  */
4103   idx = 0;
4104   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4105   if (! mask)
4106     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4107
4108   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
4109
4110   if (coding->eol_type != CODING_EOL_UNDECIDED)
4111     {
4112       Lisp_Object tmp;
4113
4114       tmp = Fget (val, Qeol_type);
4115       if (VECTORP (tmp))
4116         val = XVECTOR (tmp)->contents[coding->eol_type];
4117     }
4118
4119   /* Setup this new coding system while preserving some slots.  */
4120   {
4121     int src_multibyte = coding->src_multibyte;
4122     int dst_multibyte = coding->dst_multibyte;
4123
4124     setup_coding_system (val, coding);
4125     coding->src_multibyte = src_multibyte;
4126     coding->dst_multibyte = dst_multibyte;
4127     coding->heading_ascii = skip;
4128   }
4129 }
4130
4131 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4132    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4133    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4134
4135    How many non-eol characters are at the head is returned as *SKIP.  */
4136
4137 #define MAX_EOL_CHECK_COUNT 3
4138
4139 static int
4140 detect_eol_type (source, src_bytes, skip)
4141      unsigned char *source;
4142      int src_bytes, *skip;
4143 {
4144   unsigned char *src = source, *src_end = src + src_bytes;
4145   unsigned char c;
4146   int total = 0;                /* How many end-of-lines are found so far.  */
4147   int eol_type = CODING_EOL_UNDECIDED;
4148   int this_eol_type;
4149
4150   *skip = 0;
4151
4152   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4153     {
4154       c = *src++;
4155       if (c == '\n' || c == '\r')
4156         {
4157           if (*skip == 0)
4158             *skip = src - 1 - source;
4159           total++;
4160           if (c == '\n')
4161             this_eol_type = CODING_EOL_LF;
4162           else if (src >= src_end || *src != '\n')
4163             this_eol_type = CODING_EOL_CR;
4164           else
4165             this_eol_type = CODING_EOL_CRLF, src++;
4166
4167           if (eol_type == CODING_EOL_UNDECIDED)
4168             /* This is the first end-of-line.  */
4169             eol_type = this_eol_type;
4170           else if (eol_type != this_eol_type)
4171             {
4172               /* The found type is different from what found before.  */
4173               eol_type = CODING_EOL_INCONSISTENT;
4174               break;
4175             }
4176         }
4177     }
4178
4179   if (*skip == 0)
4180     *skip = src_end - source;
4181   return eol_type;
4182 }
4183
4184 /* Like detect_eol_type, but detect EOL type in 2-octet
4185    big-endian/little-endian format for coding systems utf-16-be and
4186    utf-16-le.  */
4187
4188 static int
4189 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4190      unsigned char *source;
4191      int src_bytes, *skip;
4192 {
4193   unsigned char *src = source, *src_end = src + src_bytes;
4194   unsigned int c1, c2;
4195   int total = 0;                /* How many end-of-lines are found so far.  */
4196   int eol_type = CODING_EOL_UNDECIDED;
4197   int this_eol_type;
4198   int msb, lsb;
4199
4200   if (big_endian_p)
4201     msb = 0, lsb = 1;
4202   else
4203     msb = 1, lsb = 0;
4204
4205   *skip = 0;
4206
4207   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4208     {
4209       c1 = (src[msb] << 8) | (src[lsb]);
4210       src += 2;
4211
4212       if (c1 == '\n' || c1 == '\r')
4213         {
4214           if (*skip == 0)
4215             *skip = src - 2 - source;
4216           total++;
4217           if (c1 == '\n')
4218             {
4219               this_eol_type = CODING_EOL_LF;
4220             }
4221           else
4222             {
4223               if ((src + 1) >= src_end)
4224                 {
4225                   this_eol_type = CODING_EOL_CR;
4226                 }
4227               else
4228                 {
4229                   c2 = (src[msb] << 8) | (src[lsb]);
4230                   if (c2 == '\n')
4231                     this_eol_type = CODING_EOL_CRLF, src += 2;
4232                   else
4233                     this_eol_type = CODING_EOL_CR;
4234                 }
4235             }
4236
4237           if (eol_type == CODING_EOL_UNDECIDED)
4238             /* This is the first end-of-line.  */
4239             eol_type = this_eol_type;
4240           else if (eol_type != this_eol_type)
4241             {
4242               /* The found type is different from what found before.  */
4243               eol_type = CODING_EOL_INCONSISTENT;
4244               break;
4245             }
4246         }
4247     }
4248
4249   if (*skip == 0)
4250     *skip = src_end - source;
4251   return eol_type;
4252 }
4253
4254 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4255    is encoded.  If it detects an appropriate format of end-of-line, it
4256    sets the information in *CODING.  */
4257
4258 void
4259 detect_eol (coding, src, src_bytes)
4260      struct coding_system *coding;
4261      unsigned char *src;
4262      int src_bytes;
4263 {
4264   Lisp_Object val;
4265   int skip;
4266   int eol_type;
4267
4268   switch (coding->category_idx)
4269     {
4270     case CODING_CATEGORY_IDX_UTF_16_BE:
4271       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4272       break;
4273     case CODING_CATEGORY_IDX_UTF_16_LE:
4274       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4275       break;
4276     default:
4277       eol_type = detect_eol_type (src, src_bytes, &skip);
4278       break;
4279     }
4280
4281   if (coding->heading_ascii > skip)
4282     coding->heading_ascii = skip;
4283   else
4284     skip = coding->heading_ascii;
4285
4286   if (eol_type == CODING_EOL_UNDECIDED)
4287     return;
4288   if (eol_type == CODING_EOL_INCONSISTENT)
4289     {
4290 #if 0
4291       /* This code is suppressed until we find a better way to
4292          distinguish raw text file and binary file.  */
4293
4294       /* If we have already detected that the coding is raw-text, the
4295          coding should actually be no-conversion.  */
4296       if (coding->type == coding_type_raw_text)
4297         {
4298           setup_coding_system (Qno_conversion, coding);
4299           return;
4300         }
4301       /* Else, let's decode only text code anyway.  */
4302 #endif /* 0 */
4303       eol_type = CODING_EOL_LF;
4304     }
4305
4306   val = Fget (coding->symbol, Qeol_type);
4307   if (VECTORP (val) && XVECTOR (val)->size == 3)
4308     {
4309       int src_multibyte = coding->src_multibyte;
4310       int dst_multibyte = coding->dst_multibyte;
4311
4312       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4313       coding->src_multibyte = src_multibyte;
4314       coding->dst_multibyte = dst_multibyte;
4315       coding->heading_ascii = skip;
4316     }
4317 }
4318
4319 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4320
4321 #define DECODING_BUFFER_MAG(coding)                     \
4322   (coding->type == coding_type_iso2022                  \
4323    ? 3                                                  \
4324    : (coding->type == coding_type_ccl                   \
4325       ? coding->spec.ccl.decoder.buf_magnification      \
4326       : 2))
4327
4328 /* Return maximum size (bytes) of a buffer enough for decoding
4329    SRC_BYTES of text encoded in CODING.  */
4330
4331 int
4332 decoding_buffer_size (coding, src_bytes)
4333      struct coding_system *coding;
4334      int src_bytes;
4335 {
4336   return (src_bytes * DECODING_BUFFER_MAG (coding)
4337           + CONVERSION_BUFFER_EXTRA_ROOM);
4338 }
4339
4340 /* Return maximum size (bytes) of a buffer enough for encoding
4341    SRC_BYTES of text to CODING.  */
4342
4343 int
4344 encoding_buffer_size (coding, src_bytes)
4345      struct coding_system *coding;
4346      int src_bytes;
4347 {
4348   int magnification;
4349
4350   if (coding->type == coding_type_ccl)
4351     magnification = coding->spec.ccl.encoder.buf_magnification;
4352   else if (CODING_REQUIRE_ENCODING (coding))
4353     magnification = 3;
4354   else
4355     magnification = 1;
4356
4357   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4358 }
4359
4360 /* Working buffer for code conversion.  */
4361 struct conversion_buffer
4362 {
4363   int size;                     /* size of data.  */
4364   int on_stack;                 /* 1 if allocated by alloca.  */
4365   unsigned char *data;
4366 };
4367
4368 /* Don't use alloca for allocating memory space larger than this, lest
4369    we overflow their stack.  */
4370 #define MAX_ALLOCA 16*1024
4371
4372 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4373 #define allocate_conversion_buffer(buf, len)            \
4374   do {                                                  \
4375     if (len < MAX_ALLOCA)                               \
4376       {                                                 \
4377         buf.data = (unsigned char *) alloca (len);      \
4378         buf.on_stack = 1;                               \
4379       }                                                 \
4380     else                                                \
4381       {                                                 \
4382         buf.data = (unsigned char *) xmalloc (len);     \
4383         buf.on_stack = 0;                               \
4384       }                                                 \
4385     buf.size = len;                                     \
4386   } while (0)
4387
4388 /* Double the allocated memory for *BUF.  */
4389 static void
4390 extend_conversion_buffer (buf)
4391      struct conversion_buffer *buf;
4392 {
4393   if (buf->on_stack)
4394     {
4395       unsigned char *save = buf->data;
4396       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4397       bcopy (save, buf->data, buf->size);
4398       buf->on_stack = 0;
4399     }
4400   else
4401     {
4402       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4403     }
4404   buf->size *= 2;
4405 }
4406
4407 /* Free the allocated memory for BUF if it is not on stack.  */
4408 static void
4409 free_conversion_buffer (buf)
4410      struct conversion_buffer *buf;
4411 {
4412   if (!buf->on_stack)
4413     xfree (buf->data);
4414 }
4415
4416 int
4417 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4418      struct coding_system *coding;
4419      unsigned char *source, *destination;
4420      int src_bytes, dst_bytes, encodep;
4421 {
4422   struct ccl_program *ccl
4423     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4424   unsigned char *dst = destination;
4425
4426   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4427   if (encodep)
4428     {
4429       /* On encoding, EOL format is converted within ccl_driver.  For
4430          that, setup proper information in the structure CCL.  */
4431       ccl->eol_type = coding->eol_type;
4432       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4433         ccl->eol_type = CODING_EOL_LF;
4434       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4435     }
4436   ccl->multibyte = coding->src_multibyte;
4437   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4438     {
4439       /* Move carryover bytes to DESTINATION.  */
4440       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4441       while (*p)
4442         *dst++ = *p++;
4443       coding->spec.ccl.eight_bit_carryover[0] = 0;
4444       if (dst_bytes)
4445         dst_bytes -= dst - destination;
4446     }
4447
4448   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4449                                   &(coding->consumed))
4450                       + dst - destination);
4451
4452   if (encodep)
4453     {
4454       coding->produced_char = coding->produced;
4455       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4456     }
4457   else
4458     {
4459       /* On decoding, the destination should always multibyte.  But,
4460          CCL program might have been generated an invalid multibyte
4461          sequence.  Here we make such a sequence valid as
4462          multibyte.  */
4463       int bytes
4464         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4465
4466       if ((coding->consumed < src_bytes
4467            || !ccl->last_block)
4468           && coding->produced >= 1
4469           && destination[coding->produced - 1] >= 0x80)
4470         {
4471           /* We should not convert the tailing 8-bit codes to
4472              multibyte form even if they doesn't form a valid
4473              multibyte sequence.  They may form a valid sequence in
4474              the next call.  */
4475           int carryover = 0;
4476
4477           if (destination[coding->produced - 1] < 0xA0)
4478             carryover = 1;
4479           else if (coding->produced >= 2)
4480             {
4481               if (destination[coding->produced - 2] >= 0x80)
4482                 {
4483                   if (destination[coding->produced - 2] < 0xA0)
4484                     carryover = 2;
4485                   else if (coding->produced >= 3
4486                            && destination[coding->produced - 3] >= 0x80
4487                            && destination[coding->produced - 3] < 0xA0)
4488                     carryover = 3;
4489                 }
4490             }
4491           if (carryover > 0)
4492             {
4493               BCOPY_SHORT (destination + coding->produced - carryover,
4494                            coding->spec.ccl.eight_bit_carryover,
4495                            carryover);
4496               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4497               coding->produced -= carryover;
4498             }
4499         }
4500       coding->produced = str_as_multibyte (destination, bytes,
4501                                            coding->produced,
4502                                            &(coding->produced_char));
4503     }
4504
4505   switch (ccl->status)
4506     {
4507     case CCL_STAT_SUSPEND_BY_SRC:
4508       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4509       break;
4510     case CCL_STAT_SUSPEND_BY_DST:
4511       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4512       break;
4513     case CCL_STAT_QUIT:
4514     case CCL_STAT_INVALID_CMD:
4515       coding->result = CODING_FINISH_INTERRUPT;
4516       break;
4517     default:
4518       coding->result = CODING_FINISH_NORMAL;
4519       break;
4520     }
4521   return coding->result;
4522 }
4523
4524 /* Decode EOL format of the text at PTR of BYTES length destructively
4525    according to CODING->eol_type.  This is called after the CCL
4526    program produced a decoded text at PTR.  If we do CRLF->LF
4527    conversion, update CODING->produced and CODING->produced_char.  */
4528
4529 static void
4530 decode_eol_post_ccl (coding, ptr, bytes)
4531      struct coding_system *coding;
4532      unsigned char *ptr;
4533      int bytes;
4534 {
4535   Lisp_Object val, saved_coding_symbol;
4536   unsigned char *pend = ptr + bytes;
4537   int dummy;
4538
4539   /* Remember the current coding system symbol.  We set it back when
4540      an inconsistent EOL is found so that `last-coding-system-used' is
4541      set to the coding system that doesn't specify EOL conversion.  */
4542   saved_coding_symbol = coding->symbol;
4543
4544   coding->spec.ccl.cr_carryover = 0;
4545   if (coding->eol_type == CODING_EOL_UNDECIDED)
4546     {
4547       /* Here, to avoid the call of setup_coding_system, we directly
4548          call detect_eol_type.  */
4549       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4550       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4551         coding->eol_type = CODING_EOL_LF;
4552       if (coding->eol_type != CODING_EOL_UNDECIDED)
4553         {
4554           val = Fget (coding->symbol, Qeol_type);
4555           if (VECTORP (val) && XVECTOR (val)->size == 3)
4556             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4557         }
4558       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4559     }
4560
4561   if (coding->eol_type == CODING_EOL_LF
4562       || coding->eol_type == CODING_EOL_UNDECIDED)
4563     {
4564       /* We have nothing to do.  */
4565       ptr = pend;
4566     }
4567   else if (coding->eol_type == CODING_EOL_CRLF)
4568     {
4569       unsigned char *pstart = ptr, *p = ptr;
4570
4571       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4572           && *(pend - 1) == '\r')
4573         {
4574           /* If the last character is CR, we can't handle it here
4575              because LF will be in the not-yet-decoded source text.
4576              Recorded that the CR is not yet processed.  */
4577           coding->spec.ccl.cr_carryover = 1;
4578           coding->produced--;
4579           coding->produced_char--;
4580           pend--;
4581         }
4582       while (ptr < pend)
4583         {
4584           if (*ptr == '\r')
4585             {
4586               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4587                 {
4588                   *p++ = '\n';
4589                   ptr += 2;
4590                 }
4591               else
4592                 {
4593                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4594                     goto undo_eol_conversion;
4595                   *p++ = *ptr++;
4596                 }
4597             }
4598           else if (*ptr == '\n'
4599                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4600             goto undo_eol_conversion;
4601           else
4602             *p++ = *ptr++;
4603           continue;
4604
4605         undo_eol_conversion:
4606           /* We have faced with inconsistent EOL format at PTR.
4607              Convert all LFs before PTR back to CRLFs.  */
4608           for (p--, ptr--; p >= pstart; p--)
4609             {
4610               if (*p == '\n')
4611                 *ptr-- = '\n', *ptr-- = '\r';
4612               else
4613                 *ptr-- = *p;
4614             }
4615           /*  If carryover is recorded, cancel it because we don't
4616               convert CRLF anymore.  */
4617           if (coding->spec.ccl.cr_carryover)
4618             {
4619               coding->spec.ccl.cr_carryover = 0;
4620               coding->produced++;
4621               coding->produced_char++;
4622               pend++;
4623             }
4624           p = ptr = pend;
4625           coding->eol_type = CODING_EOL_LF;
4626           coding->symbol = saved_coding_symbol;
4627         }
4628       if (p < pend)
4629         {
4630           /* As each two-byte sequence CRLF was converted to LF, (PEND
4631              - P) is the number of deleted characters.  */
4632           coding->produced -= pend - p;
4633           coding->produced_char -= pend - p;
4634         }
4635     }
4636   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4637     {
4638       unsigned char *p = ptr;
4639
4640       for (; ptr < pend; ptr++)
4641         {
4642           if (*ptr == '\r')
4643             *ptr = '\n';
4644           else if (*ptr == '\n'
4645                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4646             {
4647               for (; p < ptr; p++)
4648                 {
4649                   if (*p == '\n')
4650                     *p = '\r';
4651                 }
4652               ptr = pend;
4653               coding->eol_type = CODING_EOL_LF;
4654               coding->symbol = saved_coding_symbol;
4655             }
4656         }
4657     }
4658 }
4659
4660 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4661    decoding, it may detect coding system and format of end-of-line if
4662    those are not yet decided.  The source should be unibyte, the
4663    result is multibyte if CODING->dst_multibyte is nonzero, else
4664    unibyte.  */
4665
4666 int
4667 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4668      struct coding_system *coding;
4669      unsigned char *source, *destination;
4670      int src_bytes, dst_bytes;
4671 {
4672   if (coding->type == coding_type_undecided)
4673     detect_coding (coding, source, src_bytes);
4674
4675   if (coding->eol_type == CODING_EOL_UNDECIDED
4676       && coding->type != coding_type_ccl)
4677     detect_eol (coding, source, src_bytes);
4678
4679   coding->produced = coding->produced_char = 0;
4680   coding->consumed = coding->consumed_char = 0;
4681   coding->errors = 0;
4682   coding->result = CODING_FINISH_NORMAL;
4683
4684   switch (coding->type)
4685     {
4686     case coding_type_sjis:
4687       decode_coding_sjis_big5 (coding, source, destination,
4688                                src_bytes, dst_bytes, 1);
4689       break;
4690
4691     case coding_type_iso2022:
4692       decode_coding_iso2022 (coding, source, destination,
4693                              src_bytes, dst_bytes);
4694       break;
4695
4696     case coding_type_big5:
4697       decode_coding_sjis_big5 (coding, source, destination,
4698                                src_bytes, dst_bytes, 0);
4699       break;
4700
4701     case coding_type_emacs_mule:
4702       decode_coding_emacs_mule (coding, source, destination,
4703                                 src_bytes, dst_bytes);
4704       break;
4705
4706     case coding_type_ccl:
4707       if (coding->spec.ccl.cr_carryover)
4708         {
4709           /* Set the CR which is not processed by the previous call of
4710              decode_eol_post_ccl in DESTINATION.  */
4711           *destination = '\r';
4712           coding->produced++;
4713           coding->produced_char++;
4714           dst_bytes--;
4715         }
4716       ccl_coding_driver (coding, source,
4717                          destination + coding->spec.ccl.cr_carryover,
4718                          src_bytes, dst_bytes, 0);
4719       if (coding->eol_type != CODING_EOL_LF)
4720         decode_eol_post_ccl (coding, destination, coding->produced);
4721       break;
4722
4723     default:
4724       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4725     }
4726
4727   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4728       && coding->mode & CODING_MODE_LAST_BLOCK
4729       && coding->consumed == src_bytes)
4730     coding->result = CODING_FINISH_NORMAL;
4731
4732   if (coding->mode & CODING_MODE_LAST_BLOCK
4733       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4734     {
4735       unsigned char *src = source + coding->consumed;
4736       unsigned char *dst = destination + coding->produced;
4737
4738       src_bytes -= coding->consumed;
4739       coding->errors++;
4740       if (COMPOSING_P (coding))
4741         DECODE_COMPOSITION_END ('1');
4742       while (src_bytes--)
4743         {
4744           int c = *src++;
4745           dst += CHAR_STRING (c, dst);
4746           coding->produced_char++;
4747         }
4748       coding->consumed = coding->consumed_char = src - source;
4749       coding->produced = dst - destination;
4750       coding->result = CODING_FINISH_NORMAL;
4751     }
4752
4753   if (!coding->dst_multibyte)
4754     {
4755       coding->produced = str_as_unibyte (destination, coding->produced);
4756       coding->produced_char = coding->produced;
4757     }
4758
4759   return coding->result;
4760 }
4761
4762 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4763    multibyteness of the source is CODING->src_multibyte, the
4764    multibyteness of the result is always unibyte.  */
4765
4766 int
4767 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4768      struct coding_system *coding;
4769      unsigned char *source, *destination;
4770      int src_bytes, dst_bytes;
4771 {
4772   coding->produced = coding->produced_char = 0;
4773   coding->consumed = coding->consumed_char = 0;
4774   coding->errors = 0;
4775   coding->result = CODING_FINISH_NORMAL;
4776
4777   switch (coding->type)
4778     {
4779     case coding_type_sjis:
4780       encode_coding_sjis_big5 (coding, source, destination,
4781                                src_bytes, dst_bytes, 1);
4782       break;
4783
4784     case coding_type_iso2022:
4785       encode_coding_iso2022 (coding, source, destination,
4786                              src_bytes, dst_bytes);
4787       break;
4788
4789     case coding_type_big5:
4790       encode_coding_sjis_big5 (coding, source, destination,
4791                                src_bytes, dst_bytes, 0);
4792       break;
4793
4794     case coding_type_emacs_mule:
4795       encode_coding_emacs_mule (coding, source, destination,
4796                                 src_bytes, dst_bytes);
4797       break;
4798
4799     case coding_type_ccl:
4800       ccl_coding_driver (coding, source, destination,
4801                          src_bytes, dst_bytes, 1);
4802       break;
4803
4804     default:
4805       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4806     }
4807
4808   if (coding->mode & CODING_MODE_LAST_BLOCK
4809       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4810     {
4811       unsigned char *src = source + coding->consumed;
4812       unsigned char *src_end = src + src_bytes;
4813       unsigned char *dst = destination + coding->produced;
4814
4815       if (coding->type == coding_type_iso2022)
4816         ENCODE_RESET_PLANE_AND_REGISTER;
4817       if (COMPOSING_P (coding))
4818         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4819       if (coding->consumed < src_bytes)
4820         {
4821           int len = src_bytes - coding->consumed;
4822
4823           BCOPY_SHORT (source + coding->consumed, dst, len);
4824           if (coding->src_multibyte)
4825             len = str_as_unibyte (dst, len);
4826           dst += len;
4827           coding->consumed = src_bytes;
4828         }
4829       coding->produced = coding->produced_char = dst - destination;
4830       coding->result = CODING_FINISH_NORMAL;
4831     }
4832
4833   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4834       && coding->consumed == src_bytes)
4835     coding->result = CODING_FINISH_NORMAL;
4836
4837   return coding->result;
4838 }
4839
4840 /* Scan text in the region between *BEG and *END (byte positions),
4841    skip characters which we don't have to decode by coding system
4842    CODING at the head and tail, then set *BEG and *END to the region
4843    of the text we actually have to convert.  The caller should move
4844    the gap out of the region in advance if the region is from a
4845    buffer.
4846
4847    If STR is not NULL, *BEG and *END are indices into STR.  */
4848
4849 static void
4850 shrink_decoding_region (beg, end, coding, str)
4851      int *beg, *end;
4852      struct coding_system *coding;
4853      unsigned char *str;
4854 {
4855   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4856   int eol_conversion;
4857   Lisp_Object translation_table;
4858
4859   if (coding->type == coding_type_ccl
4860       || coding->type == coding_type_undecided
4861       || coding->eol_type != CODING_EOL_LF
4862       || !NILP (coding->post_read_conversion)
4863       || coding->composing != COMPOSITION_DISABLED)
4864     {
4865       /* We can't skip any data.  */
4866       return;
4867     }
4868   if (coding->type == coding_type_no_conversion
4869       || coding->type == coding_type_raw_text
4870       || coding->type == coding_type_emacs_mule)
4871     {
4872       /* We need no conversion, but don't have to skip any data here.
4873          Decoding routine handles them effectively anyway.  */
4874       return;
4875     }
4876
4877   translation_table = coding->translation_table_for_decode;
4878   if (NILP (translation_table) && !NILP (Venable_character_translation))
4879     translation_table = Vstandard_translation_table_for_decode;
4880   if (CHAR_TABLE_P (translation_table))
4881     {
4882       int i;
4883       for (i = 0; i < 128; i++)
4884         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4885           break;
4886       if (i < 128)
4887         /* Some ASCII character should be translated.  We give up
4888            shrinking.  */
4889         return;
4890     }
4891
4892   if (coding->heading_ascii >= 0)
4893     /* Detection routine has already found how much we can skip at the
4894        head.  */
4895     *beg += coding->heading_ascii;
4896
4897   if (str)
4898     {
4899       begp_orig = begp = str + *beg;
4900       endp_orig = endp = str + *end;
4901     }
4902   else
4903     {
4904       begp_orig = begp = BYTE_POS_ADDR (*beg);
4905       endp_orig = endp = begp + *end - *beg;
4906     }
4907
4908   eol_conversion = (coding->eol_type == CODING_EOL_CR
4909                     || coding->eol_type == CODING_EOL_CRLF);
4910
4911   switch (coding->type)
4912     {
4913     case coding_type_sjis:
4914     case coding_type_big5:
4915       /* We can skip all ASCII characters at the head.  */
4916       if (coding->heading_ascii < 0)
4917         {
4918           if (eol_conversion)
4919             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4920           else
4921             while (begp < endp && *begp < 0x80) begp++;
4922         }
4923       /* We can skip all ASCII characters at the tail except for the
4924          second byte of SJIS or BIG5 code.  */
4925       if (eol_conversion)
4926         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4927       else
4928         while (begp < endp && endp[-1] < 0x80) endp--;
4929       /* Do not consider LF as ascii if preceded by CR, since that
4930          confuses eol decoding. */
4931       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4932         endp++;
4933       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4934         endp++;
4935       break;
4936
4937     case coding_type_iso2022:
4938       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4939         /* We can't skip any data.  */
4940         break;
4941       if (coding->heading_ascii < 0)
4942         {
4943           /* We can skip all ASCII characters at the head except for a
4944              few control codes.  */
4945           while (begp < endp && (c = *begp) < 0x80
4946                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4947                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4948                  && (!eol_conversion || c != ISO_CODE_LF))
4949             begp++;
4950         }
4951       switch (coding->category_idx)
4952         {
4953         case CODING_CATEGORY_IDX_ISO_8_1:
4954         case CODING_CATEGORY_IDX_ISO_8_2:
4955           /* We can skip all ASCII characters at the tail.  */
4956           if (eol_conversion)
4957             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4958           else
4959             while (begp < endp && endp[-1] < 0x80) endp--;
4960           /* Do not consider LF as ascii if preceded by CR, since that
4961              confuses eol decoding. */
4962           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4963             endp++;
4964           break;
4965
4966         case CODING_CATEGORY_IDX_ISO_7:
4967         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4968           {
4969             /* We can skip all charactes at the tail except for 8-bit
4970                codes and ESC and the following 2-byte at the tail.  */
4971             unsigned char *eight_bit = NULL;
4972
4973             if (eol_conversion)
4974               while (begp < endp
4975                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4976                 {
4977                   if (!eight_bit && c & 0x80) eight_bit = endp;
4978                   endp--;
4979                 }
4980             else
4981               while (begp < endp
4982                      && (c = endp[-1]) != ISO_CODE_ESC)
4983                 {
4984                   if (!eight_bit && c & 0x80) eight_bit = endp;
4985                   endp--;
4986                 }
4987             /* Do not consider LF as ascii if preceded by CR, since that
4988                confuses eol decoding. */
4989             if (begp < endp && endp < endp_orig
4990                 && endp[-1] == '\r' && endp[0] == '\n')
4991               endp++;
4992             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4993               {
4994                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4995                   /* This is an ASCII designation sequence.  We can
4996                      surely skip the tail.  But, if we have
4997                      encountered an 8-bit code, skip only the codes
4998                      after that.  */
4999                   endp = eight_bit ? eight_bit : endp + 2;
5000                 else
5001                   /* Hmmm, we can't skip the tail.  */
5002                   endp = endp_orig;
5003               }
5004             else if (eight_bit)
5005               endp = eight_bit;
5006           }
5007         }
5008       break;
5009
5010     default:
5011       abort ();
5012     }
5013   *beg += begp - begp_orig;
5014   *end += endp - endp_orig;
5015   return;
5016 }
5017
5018 /* Like shrink_decoding_region but for encoding.  */
5019
5020 static void
5021 shrink_encoding_region (beg, end, coding, str)
5022      int *beg, *end;
5023      struct coding_system *coding;
5024      unsigned char *str;
5025 {
5026   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5027   int eol_conversion;
5028   Lisp_Object translation_table;
5029
5030   if (coding->type == coding_type_ccl
5031       || coding->eol_type == CODING_EOL_CRLF
5032       || coding->eol_type == CODING_EOL_CR
5033       || coding->cmp_data && coding->cmp_data->used > 0)
5034     {
5035       /* We can't skip any data.  */
5036       return;
5037     }
5038   if (coding->type == coding_type_no_conversion
5039       || coding->type == coding_type_raw_text
5040       || coding->type == coding_type_emacs_mule
5041       || coding->type == coding_type_undecided)
5042     {
5043       /* We need no conversion, but don't have to skip any data here.
5044          Encoding routine handles them effectively anyway.  */
5045       return;
5046     }
5047
5048   translation_table = coding->translation_table_for_encode;
5049   if (NILP (translation_table) && !NILP (Venable_character_translation))
5050     translation_table = Vstandard_translation_table_for_encode;
5051   if (CHAR_TABLE_P (translation_table))
5052     {
5053       int i;
5054       for (i = 0; i < 128; i++)
5055         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5056           break;
5057       if (i < 128)
5058         /* Some ASCII character should be tranlsated.  We give up
5059            shrinking.  */
5060         return;
5061     }
5062
5063   if (str)
5064     {
5065       begp_orig = begp = str + *beg;
5066       endp_orig = endp = str + *end;
5067     }
5068   else
5069     {
5070       begp_orig = begp = BYTE_POS_ADDR (*beg);
5071       endp_orig = endp = begp + *end - *beg;
5072     }
5073
5074   eol_conversion = (coding->eol_type == CODING_EOL_CR
5075                     || coding->eol_type == CODING_EOL_CRLF);
5076
5077   /* Here, we don't have to check coding->pre_write_conversion because
5078      the caller is expected to have handled it already.  */
5079   switch (coding->type)
5080     {
5081     case coding_type_iso2022:
5082       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5083         /* We can't skip any data.  */
5084         break;
5085       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5086         {
5087           unsigned char *bol = begp;
5088           while (begp < endp && *begp < 0x80)
5089             {
5090               begp++;
5091               if (begp[-1] == '\n')
5092                 bol = begp;
5093             }
5094           begp = bol;
5095           goto label_skip_tail;
5096         }
5097       /* fall down ... */
5098
5099     case coding_type_sjis:
5100     case coding_type_big5:
5101       /* We can skip all ASCII characters at the head and tail.  */
5102       if (eol_conversion)
5103         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5104       else
5105         while (begp < endp && *begp < 0x80) begp++;
5106     label_skip_tail:
5107       if (eol_conversion)
5108         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5109       else
5110         while (begp < endp && *(endp - 1) < 0x80) endp--;
5111       break;
5112
5113     default:
5114       abort ();
5115     }
5116
5117   *beg += begp - begp_orig;
5118   *end += endp - endp_orig;
5119   return;
5120 }
5121
5122 /* As shrinking conversion region requires some overhead, we don't try
5123    shrinking if the length of conversion region is less than this
5124    value.  */
5125 static int shrink_conversion_region_threshhold = 1024;
5126
5127 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5128   do {                                                                  \
5129     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5130       {                                                                 \
5131         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5132         else shrink_decoding_region (beg, end, coding, str);            \
5133       }                                                                 \
5134   } while (0)
5135
5136 static Lisp_Object
5137 code_convert_region_unwind (dummy)
5138      Lisp_Object dummy;
5139 {
5140   inhibit_pre_post_conversion = 0;
5141   return Qnil;
5142 }
5143
5144 /* Store information about all compositions in the range FROM and TO
5145    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5146    buffer or a string, defaults to the current buffer.  */
5147
5148 void
5149 coding_save_composition (coding, from, to, obj)
5150      struct coding_system *coding;
5151      int from, to;
5152      Lisp_Object obj;
5153 {
5154   Lisp_Object prop;
5155   int start, end;
5156
5157   if (coding->composing == COMPOSITION_DISABLED)
5158     return;
5159   if (!coding->cmp_data)
5160     coding_allocate_composition_data (coding, from);
5161   if (!find_composition (from, to, &start, &end, &prop, obj)
5162       || end > to)
5163     return;
5164   if (start < from
5165       && (!find_composition (end, to, &start, &end, &prop, obj)
5166           || end > to))
5167     return;
5168   coding->composing = COMPOSITION_NO;
5169   do
5170     {
5171       if (COMPOSITION_VALID_P (start, end, prop))
5172         {
5173           enum composition_method method = COMPOSITION_METHOD (prop);
5174           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5175               >= COMPOSITION_DATA_SIZE)
5176             coding_allocate_composition_data (coding, from);
5177           /* For relative composition, we remember start and end
5178              positions, for the other compositions, we also remember
5179              components.  */
5180           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5181           if (method != COMPOSITION_RELATIVE)
5182             {
5183               /* We must store a*/
5184               Lisp_Object val, ch;
5185
5186               val = COMPOSITION_COMPONENTS (prop);
5187               if (CONSP (val))
5188                 while (CONSP (val))
5189                   {
5190                     ch = XCAR (val), val = XCDR (val);
5191                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5192                   }
5193               else if (VECTORP (val) || STRINGP (val))
5194                 {
5195                   int len = (VECTORP (val)
5196                              ? XVECTOR (val)->size : XSTRING (val)->size);
5197                   int i;
5198                   for (i = 0; i < len; i++)
5199                     {
5200                       ch = (STRINGP (val)
5201                             ? Faref (val, make_number (i))
5202                             : XVECTOR (val)->contents[i]);
5203                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5204                     }
5205                 }
5206               else              /* INTEGERP (val) */
5207                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5208             }
5209           CODING_ADD_COMPOSITION_END (coding, end - from);
5210         }
5211       start = end;
5212     }
5213   while (start < to
5214          && find_composition (start, to, &start, &end, &prop, obj)
5215          && end <= to);
5216
5217   /* Make coding->cmp_data point to the first memory block.  */
5218   while (coding->cmp_data->prev)
5219     coding->cmp_data = coding->cmp_data->prev;
5220   coding->cmp_data_start = 0;
5221 }
5222
5223 /* Reflect the saved information about compositions to OBJ.
5224    CODING->cmp_data points to a memory block for the informaiton.  OBJ
5225    is a buffer or a string, defaults to the current buffer.  */
5226
5227 void
5228 coding_restore_composition (coding, obj)
5229      struct coding_system *coding;
5230      Lisp_Object obj;
5231 {
5232   struct composition_data *cmp_data = coding->cmp_data;
5233
5234   if (!cmp_data)
5235     return;
5236
5237   while (cmp_data->prev)
5238     cmp_data = cmp_data->prev;
5239
5240   while (cmp_data)
5241     {
5242       int i;
5243
5244       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5245            i += cmp_data->data[i])
5246         {
5247           int *data = cmp_data->data + i;
5248           enum composition_method method = (enum composition_method) data[3];
5249           Lisp_Object components;
5250
5251           if (method == COMPOSITION_RELATIVE)
5252             components = Qnil;
5253           else
5254             {
5255               int len = data[0] - 4, j;
5256               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5257
5258               for (j = 0; j < len; j++)
5259                 args[j] = make_number (data[4 + j]);
5260               components = (method == COMPOSITION_WITH_ALTCHARS
5261                             ? Fstring (len, args) : Fvector (len, args));
5262             }
5263           compose_text (data[1], data[2], components, Qnil, obj);
5264         }
5265       cmp_data = cmp_data->next;
5266     }
5267 }
5268
5269 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5270    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5271    coding system CODING, and return the status code of code conversion
5272    (currently, this value has no meaning).
5273
5274    How many characters (and bytes) are converted to how many
5275    characters (and bytes) are recorded in members of the structure
5276    CODING.
5277
5278    If REPLACE is nonzero, we do various things as if the original text
5279    is deleted and a new text is inserted.  See the comments in
5280    replace_range (insdel.c) to know what we are doing.
5281
5282    If REPLACE is zero, it is assumed that the source text is unibyte.
5283    Otherwize, it is assumed that the source text is multibyte.  */
5284
5285 int
5286 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5287      int from, from_byte, to, to_byte, encodep, replace;
5288      struct coding_system *coding;
5289 {
5290   int len = to - from, len_byte = to_byte - from_byte;
5291   int require, inserted, inserted_byte;
5292   int head_skip, tail_skip, total_skip = 0;
5293   Lisp_Object saved_coding_symbol;
5294   int first = 1;
5295   unsigned char *src, *dst;
5296   Lisp_Object deletion;
5297   int orig_point = PT, orig_len = len;
5298   int prev_Z;
5299   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5300
5301   deletion = Qnil;
5302   saved_coding_symbol = Qnil;
5303
5304   if (from < PT && PT < to)
5305     {
5306       TEMP_SET_PT_BOTH (from, from_byte);
5307       orig_point = from;
5308     }
5309
5310   if (replace)
5311     {
5312       int saved_from = from;
5313       int saved_inhibit_modification_hooks;
5314
5315       prepare_to_modify_buffer (from, to, &from);
5316       if (saved_from != from)
5317         {
5318           to = from + len;
5319           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5320           len_byte = to_byte - from_byte;
5321         }
5322
5323       /* The code conversion routine can not preserve text properties
5324          for now.  So, we must remove all text properties in the
5325          region.  Here, we must suppress all modification hooks.  */
5326       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5327       inhibit_modification_hooks = 1;
5328       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5329       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5330     }
5331
5332   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5333     {
5334       /* We must detect encoding of text and eol format.  */
5335
5336       if (from < GPT && to > GPT)
5337         move_gap_both (from, from_byte);
5338       if (coding->type == coding_type_undecided)
5339         {
5340           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5341           if (coding->type == coding_type_undecided)
5342             {
5343               /* It seems that the text contains only ASCII, but we
5344                  should not leave it undecided because the deeper
5345                  decoding routine (decode_coding) tries to detect the
5346                  encodings again in vain.  */
5347               coding->type = coding_type_emacs_mule;
5348               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5349             }
5350         }
5351       if (coding->eol_type == CODING_EOL_UNDECIDED
5352           && coding->type != coding_type_ccl)
5353         {
5354           saved_coding_symbol = coding->symbol;
5355           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5356           if (coding->eol_type == CODING_EOL_UNDECIDED)
5357             coding->eol_type = CODING_EOL_LF;
5358           /* We had better recover the original eol format if we
5359              encounter an inconsitent eol format while decoding.  */
5360           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5361         }
5362     }
5363
5364   /* Now we convert the text.  */
5365
5366   /* For encoding, we must process pre-write-conversion in advance.  */
5367   if (! inhibit_pre_post_conversion
5368       && encodep
5369       && SYMBOLP (coding->pre_write_conversion)
5370       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5371     {
5372       /* The function in pre-write-conversion may put a new text in a
5373          new buffer.  */
5374       struct buffer *prev = current_buffer;
5375       Lisp_Object new;
5376       int count = specpdl_ptr - specpdl;
5377
5378       record_unwind_protect (code_convert_region_unwind, Qnil);
5379       /* We should not call any more pre-write/post-read-conversion
5380          functions while this pre-write-conversion is running.  */
5381       inhibit_pre_post_conversion = 1;
5382       call2 (coding->pre_write_conversion,
5383              make_number (from), make_number (to));
5384       inhibit_pre_post_conversion = 0;
5385       /* Discard the unwind protect.  */
5386       specpdl_ptr--;
5387
5388       if (current_buffer != prev)
5389         {
5390           len = ZV - BEGV;
5391           new = Fcurrent_buffer ();
5392           set_buffer_internal_1 (prev);
5393           del_range_2 (from, from_byte, to, to_byte, 0);
5394           TEMP_SET_PT_BOTH (from, from_byte);
5395           insert_from_buffer (XBUFFER (new), 1, len, 0);
5396           Fkill_buffer (new);
5397           if (orig_point >= to)
5398             orig_point += len - orig_len;
5399           else if (orig_point > from)
5400             orig_point = from;
5401           orig_len = len;
5402           to = from + len;
5403           from_byte = CHAR_TO_BYTE (from);
5404           to_byte = CHAR_TO_BYTE (to);
5405           len_byte = to_byte - from_byte;
5406           TEMP_SET_PT_BOTH (from, from_byte);
5407         }
5408     }
5409
5410   if (replace)
5411     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5412
5413   if (coding->composing != COMPOSITION_DISABLED)
5414     {
5415       if (encodep)
5416         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5417       else
5418         coding_allocate_composition_data (coding, from);
5419     }
5420
5421   /* Try to skip the heading and tailing ASCIIs.  */
5422   if (coding->type != coding_type_ccl)
5423     {
5424       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5425
5426       if (from < GPT && GPT < to)
5427         move_gap_both (from, from_byte);
5428       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5429       if (from_byte == to_byte
5430           && (encodep || NILP (coding->post_read_conversion))
5431           && ! CODING_REQUIRE_FLUSHING (coding))
5432         {
5433           coding->produced = len_byte;
5434           coding->produced_char = len;
5435           if (!replace)
5436             /* We must record and adjust for this new text now.  */
5437             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5438           return 0;
5439         }
5440
5441       head_skip = from_byte - from_byte_orig;
5442       tail_skip = to_byte_orig - to_byte;
5443       total_skip = head_skip + tail_skip;
5444       from += head_skip;
5445       to -= tail_skip;
5446       len -= total_skip; len_byte -= total_skip;
5447     }
5448
5449   /* For converion, we must put the gap before the text in addition to
5450      making the gap larger for efficient decoding.  The required gap
5451      size starts from 2000 which is the magic number used in make_gap.
5452      But, after one batch of conversion, it will be incremented if we
5453      find that it is not enough .  */
5454   require = 2000;
5455
5456   if (GAP_SIZE  < require)
5457     make_gap (require - GAP_SIZE);
5458   move_gap_both (from, from_byte);
5459
5460   inserted = inserted_byte = 0;
5461
5462   GAP_SIZE += len_byte;
5463   ZV -= len;
5464   Z -= len;
5465   ZV_BYTE -= len_byte;
5466   Z_BYTE -= len_byte;
5467
5468   if (GPT - BEG < BEG_UNCHANGED)
5469     BEG_UNCHANGED = GPT - BEG;
5470   if (Z - GPT < END_UNCHANGED)
5471     END_UNCHANGED = Z - GPT;
5472
5473   if (!encodep && coding->src_multibyte)
5474     {
5475       /* Decoding routines expects that the source text is unibyte.
5476          We must convert 8-bit characters of multibyte form to
5477          unibyte.  */
5478       int len_byte_orig = len_byte;
5479       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5480       if (len_byte < len_byte_orig)
5481         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5482                     len_byte);
5483       coding->src_multibyte = 0;
5484     }
5485
5486   for (;;)
5487     {
5488       int result;
5489
5490       /* The buffer memory is now:
5491          +--------+converted-text+---------+-------original-text-------+---+
5492          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5493                   |<---------------------- GAP ----------------------->|  */
5494       src = GAP_END_ADDR - len_byte;
5495       dst = GPT_ADDR + inserted_byte;
5496
5497       if (encodep)
5498         result = encode_coding (coding, src, dst, len_byte, 0);
5499       else
5500         result = decode_coding (coding, src, dst, len_byte, 0);
5501
5502       /* The buffer memory is now:
5503          +--------+-------converted-text----+--+------original-text----+---+
5504          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5505                   |<---------------------- GAP ----------------------->|  */
5506
5507       inserted += coding->produced_char;
5508       inserted_byte += coding->produced;
5509       len_byte -= coding->consumed;
5510
5511       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5512         {
5513           coding_allocate_composition_data (coding, from + inserted);
5514           continue;
5515         }
5516
5517       src += coding->consumed;
5518       dst += coding->produced;
5519
5520       if (result == CODING_FINISH_NORMAL)
5521         {
5522           src += len_byte;
5523           break;
5524         }
5525       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5526         {
5527           unsigned char *pend = dst, *p = pend - inserted_byte;
5528           Lisp_Object eol_type;
5529
5530           /* Encode LFs back to the original eol format (CR or CRLF).  */
5531           if (coding->eol_type == CODING_EOL_CR)
5532             {
5533               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5534             }
5535           else
5536             {
5537               int count = 0;
5538
5539               while (p < pend) if (*p++ == '\n') count++;
5540               if (src - dst < count)
5541                 {
5542                   /* We don't have sufficient room for encoding LFs
5543                      back to CRLF.  We must record converted and
5544                      not-yet-converted text back to the buffer
5545                      content, enlarge the gap, then record them out of
5546                      the buffer contents again.  */
5547                   int add = len_byte + inserted_byte;
5548
5549                   GAP_SIZE -= add;
5550                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5551                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5552                   make_gap (count - GAP_SIZE);
5553                   GAP_SIZE += add;
5554                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5555                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5556                   /* Don't forget to update SRC, DST, and PEND.  */
5557                   src = GAP_END_ADDR - len_byte;
5558                   dst = GPT_ADDR + inserted_byte;
5559                   pend = dst;
5560                 }
5561               inserted += count;
5562               inserted_byte += count;
5563               coding->produced += count;
5564               p = dst = pend + count;
5565               while (count)
5566                 {
5567                   *--p = *--pend;
5568                   if (*p == '\n') count--, *--p = '\r';
5569                 }
5570             }
5571
5572           /* Suppress eol-format conversion in the further conversion.  */
5573           coding->eol_type = CODING_EOL_LF;
5574
5575           /* Set the coding system symbol to that for Unix-like EOL.  */
5576           eol_type = Fget (saved_coding_symbol, Qeol_type);
5577           if (VECTORP (eol_type)
5578               && XVECTOR (eol_type)->size == 3
5579               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5580             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5581           else
5582             coding->symbol = saved_coding_symbol;
5583
5584           continue;
5585         }
5586       if (len_byte <= 0)
5587         {
5588           if (coding->type != coding_type_ccl
5589               || coding->mode & CODING_MODE_LAST_BLOCK)
5590             break;
5591           coding->mode |= CODING_MODE_LAST_BLOCK;
5592           continue;
5593         }
5594       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5595         {
5596           /* The source text ends in invalid codes.  Let's just
5597              make them valid buffer contents, and finish conversion.  */
5598           inserted += len_byte;
5599           inserted_byte += len_byte;
5600           while (len_byte--)
5601             *dst++ = *src++;
5602           break;
5603         }
5604       if (result == CODING_FINISH_INTERRUPT)
5605         {
5606           /* The conversion procedure was interrupted by a user.  */
5607           break;
5608         }
5609       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5610       if (coding->consumed < 1)
5611         {
5612           /* It's quite strange to require more memory without
5613              consuming any bytes.  Perhaps CCL program bug.  */
5614           break;
5615         }
5616       if (first)
5617         {
5618           /* We have just done the first batch of conversion which was
5619              stoped because of insufficient gap.  Let's reconsider the
5620              required gap size (i.e. SRT - DST) now.
5621
5622              We have converted ORIG bytes (== coding->consumed) into
5623              NEW bytes (coding->produced).  To convert the remaining
5624              LEN bytes, we may need REQUIRE bytes of gap, where:
5625                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5626                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5627              Here, we are sure that NEW >= ORIG.  */
5628           float ratio = coding->produced - coding->consumed;
5629           ratio /= coding->consumed;
5630           require = len_byte * ratio;
5631           first = 0;
5632         }
5633       if ((src - dst) < (require + 2000))
5634         {
5635           /* See the comment above the previous call of make_gap.  */
5636           int add = len_byte + inserted_byte;
5637
5638           GAP_SIZE -= add;
5639           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5640           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5641           make_gap (require + 2000);
5642           GAP_SIZE += add;
5643           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5644           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5645         }
5646     }
5647   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5648
5649   if (encodep && coding->dst_multibyte)
5650     {
5651       /* The output is unibyte.  We must convert 8-bit characters to
5652          multibyte form.  */
5653       if (inserted_byte * 2 > GAP_SIZE)
5654         {
5655           GAP_SIZE -= inserted_byte;
5656           ZV += inserted_byte; Z += inserted_byte;
5657           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5658           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5659           make_gap (inserted_byte - GAP_SIZE);
5660           GAP_SIZE += inserted_byte;
5661           ZV -= inserted_byte; Z -= inserted_byte;
5662           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5663           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5664         }
5665       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5666     }
5667
5668   /* If we have shrinked the conversion area, adjust it now.  */
5669   if (total_skip > 0)
5670     {
5671       if (tail_skip > 0)
5672         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5673       inserted += total_skip; inserted_byte += total_skip;
5674       GAP_SIZE += total_skip;
5675       GPT -= head_skip; GPT_BYTE -= head_skip;
5676       ZV -= total_skip; ZV_BYTE -= total_skip;
5677       Z -= total_skip; Z_BYTE -= total_skip;
5678       from -= head_skip; from_byte -= head_skip;
5679       to += tail_skip; to_byte += tail_skip;
5680     }
5681
5682   prev_Z = Z;
5683   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5684   inserted = Z - prev_Z;
5685
5686   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5687     coding_restore_composition (coding, Fcurrent_buffer ());
5688   coding_free_composition_data (coding);
5689
5690   if (! inhibit_pre_post_conversion
5691       && ! encodep && ! NILP (coding->post_read_conversion))
5692     {
5693       Lisp_Object val;
5694       int count = specpdl_ptr - specpdl;
5695
5696       if (from != PT)
5697         TEMP_SET_PT_BOTH (from, from_byte);
5698       prev_Z = Z;
5699       record_unwind_protect (code_convert_region_unwind, Qnil);
5700       /* We should not call any more pre-write/post-read-conversion
5701          functions while this post-read-conversion is running.  */
5702       inhibit_pre_post_conversion = 1;
5703       val = call1 (coding->post_read_conversion, make_number (inserted));
5704       inhibit_pre_post_conversion = 0;
5705       /* Discard the unwind protect.  */
5706       specpdl_ptr--;
5707       CHECK_NUMBER (val, 0);
5708       inserted += Z - prev_Z;
5709     }
5710
5711   if (orig_point >= from)
5712     {
5713       if (orig_point >= from + orig_len)
5714         orig_point += inserted - orig_len;
5715       else
5716         orig_point = from;
5717       TEMP_SET_PT (orig_point);
5718     }
5719
5720   if (replace)
5721     {
5722       signal_after_change (from, to - from, inserted);
5723       update_compositions (from, from + inserted, CHECK_BORDER);
5724     }
5725
5726   {
5727     coding->consumed = to_byte - from_byte;
5728     coding->consumed_char = to - from;
5729     coding->produced = inserted_byte;
5730     coding->produced_char = inserted;
5731   }
5732
5733   return 0;
5734 }
5735
5736 Lisp_Object
5737 run_pre_post_conversion_on_str (str, coding, encodep)
5738      Lisp_Object str;
5739      struct coding_system *coding;
5740      int encodep;
5741 {
5742   int count = specpdl_ptr - specpdl;
5743   struct gcpro gcpro1;
5744   int multibyte = STRING_MULTIBYTE (str);
5745
5746   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5747   record_unwind_protect (code_convert_region_unwind, Qnil);
5748   GCPRO1 (str);
5749   temp_output_buffer_setup (" *code-converting-work*");
5750   set_buffer_internal (XBUFFER (Vstandard_output));
5751   /* We must insert the contents of STR as is without
5752      unibyte<->multibyte conversion.  For that, we adjust the
5753      multibyteness of the working buffer to that of STR.  */
5754   Ferase_buffer ();
5755   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5756   insert_from_string (str, 0, 0,
5757                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5758   UNGCPRO;
5759   inhibit_pre_post_conversion = 1;
5760   if (encodep)
5761     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5762   else
5763     {
5764       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5765       call1 (coding->post_read_conversion, make_number (Z - BEG));
5766     }
5767   inhibit_pre_post_conversion = 0;
5768   str = make_buffer_string (BEG, Z, 1);
5769   return unbind_to (count, str);
5770 }
5771
5772 Lisp_Object
5773 decode_coding_string (str, coding, nocopy)
5774      Lisp_Object str;
5775      struct coding_system *coding;
5776      int nocopy;
5777 {
5778   int len;
5779   struct conversion_buffer buf;
5780   int from, to_byte;
5781   struct gcpro gcpro1;
5782   Lisp_Object saved_coding_symbol;
5783   int result;
5784   int require_decoding;
5785   int shrinked_bytes = 0;
5786   Lisp_Object newstr;
5787   int consumed, consumed_char, produced, produced_char;
5788
5789   from = 0;
5790   to_byte = STRING_BYTES (XSTRING (str));
5791
5792   saved_coding_symbol = Qnil;
5793   coding->src_multibyte = STRING_MULTIBYTE (str);
5794   coding->dst_multibyte = 1;
5795   if (CODING_REQUIRE_DETECTION (coding))
5796     {
5797       /* See the comments in code_convert_region.  */
5798       if (coding->type == coding_type_undecided)
5799         {
5800           detect_coding (coding, XSTRING (str)->data, to_byte);
5801           if (coding->type == coding_type_undecided)
5802             coding->type = coding_type_emacs_mule;
5803         }
5804       if (coding->eol_type == CODING_EOL_UNDECIDED
5805           && coding->type != coding_type_ccl)
5806         {
5807           saved_coding_symbol = coding->symbol;
5808           detect_eol (coding, XSTRING (str)->data, to_byte);
5809           if (coding->eol_type == CODING_EOL_UNDECIDED)
5810             coding->eol_type = CODING_EOL_LF;
5811           /* We had better recover the original eol format if we
5812              encounter an inconsitent eol format while decoding.  */
5813           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5814         }
5815     }
5816
5817   if (coding->type == coding_type_no_conversion
5818       || coding->type == coding_type_raw_text)
5819     coding->dst_multibyte = 0;
5820
5821   require_decoding = CODING_REQUIRE_DECODING (coding);
5822
5823   if (STRING_MULTIBYTE (str))
5824     {
5825       /* Decoding routines expect the source text to be unibyte.  */
5826       str = Fstring_as_unibyte (str);
5827       to_byte = STRING_BYTES (XSTRING (str));
5828       nocopy = 1;
5829       coding->src_multibyte = 0;
5830     }
5831
5832   /* Try to skip the heading and tailing ASCIIs.  */
5833   if (require_decoding && coding->type != coding_type_ccl)
5834     {
5835       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5836                                 0);
5837       if (from == to_byte)
5838         require_decoding = 0;
5839       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5840     }
5841
5842   if (!require_decoding)
5843     {
5844       coding->consumed = STRING_BYTES (XSTRING (str));
5845       coding->consumed_char = XSTRING (str)->size;
5846       if (coding->dst_multibyte)
5847         {
5848           str = Fstring_as_multibyte (str);
5849           nocopy = 1;
5850         }
5851       coding->produced = STRING_BYTES (XSTRING (str));
5852       coding->produced_char = XSTRING (str)->size;
5853       return (nocopy ? str : Fcopy_sequence (str));
5854     }
5855
5856   if (coding->composing != COMPOSITION_DISABLED)
5857     coding_allocate_composition_data (coding, from);
5858   len = decoding_buffer_size (coding, to_byte - from);
5859   allocate_conversion_buffer (buf, len);
5860
5861   consumed = consumed_char = produced = produced_char = 0;
5862   while (1)
5863     {
5864       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5865                               buf.data + produced, to_byte - from - consumed,
5866                               buf.size - produced);
5867       consumed += coding->consumed;
5868       consumed_char += coding->consumed_char;
5869       produced += coding->produced;
5870       produced_char += coding->produced_char;
5871       if (result == CODING_FINISH_NORMAL
5872           || (result == CODING_FINISH_INSUFFICIENT_SRC
5873               && coding->consumed == 0))
5874         break;
5875       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5876         coding_allocate_composition_data (coding, from + produced_char);
5877       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5878         extend_conversion_buffer (&buf);
5879       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5880         {
5881           /* Recover the original EOL format.  */
5882           if (coding->eol_type == CODING_EOL_CR)
5883             {
5884               unsigned char *p;
5885               for (p = buf.data; p < buf.data + produced; p++)
5886                 if (*p == '\n') *p = '\r';
5887             }
5888           else if (coding->eol_type == CODING_EOL_CRLF)
5889             {
5890               int num_eol = 0;
5891               unsigned char *p0, *p1;
5892               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5893                 if (*p0 == '\n') num_eol++;
5894               if (produced + num_eol >= buf.size)
5895                 extend_conversion_buffer (&buf);
5896               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5897                 {
5898                   *--p1 = *--p0;
5899                   if (*p0 == '\n') *--p1 = '\r';
5900                 }
5901               produced += num_eol;
5902               produced_char += num_eol;
5903             }
5904           coding->eol_type = CODING_EOL_LF;
5905           coding->symbol = saved_coding_symbol;
5906         }
5907     }
5908
5909   coding->consumed = consumed;
5910   coding->consumed_char = consumed_char;
5911   coding->produced = produced;
5912   coding->produced_char = produced_char;
5913
5914   if (coding->dst_multibyte)
5915     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5916                                            produced + shrinked_bytes);
5917   else
5918     newstr = make_uninit_string (produced + shrinked_bytes);
5919   if (from > 0)
5920     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5921   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5922   if (shrinked_bytes > from)
5923     bcopy (XSTRING (str)->data + to_byte,
5924            XSTRING (newstr)->data + from + produced,
5925            shrinked_bytes - from);
5926   free_conversion_buffer (&buf);
5927
5928   if (coding->cmp_data && coding->cmp_data->used)
5929     coding_restore_composition (coding, newstr);
5930   coding_free_composition_data (coding);
5931
5932   if (SYMBOLP (coding->post_read_conversion)
5933       && !NILP (Ffboundp (coding->post_read_conversion)))
5934     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5935
5936   return newstr;
5937 }
5938
5939 Lisp_Object
5940 encode_coding_string (str, coding, nocopy)
5941      Lisp_Object str;
5942      struct coding_system *coding;
5943      int nocopy;
5944 {
5945   int len;
5946   struct conversion_buffer buf;
5947   int from, to, to_byte;
5948   int result;
5949   int shrinked_bytes = 0;
5950   Lisp_Object newstr;
5951   int consumed, consumed_char, produced, produced_char;
5952
5953   if (SYMBOLP (coding->pre_write_conversion)
5954       && !NILP (Ffboundp (coding->pre_write_conversion)))
5955     str = run_pre_post_conversion_on_str (str, coding, 1);
5956
5957   from = 0;
5958   to = XSTRING (str)->size;
5959   to_byte = STRING_BYTES (XSTRING (str));
5960
5961   /* Encoding routines determine the multibyteness of the source text
5962      by coding->src_multibyte.  */
5963   coding->src_multibyte = STRING_MULTIBYTE (str);
5964   coding->dst_multibyte = 0;
5965   if (! CODING_REQUIRE_ENCODING (coding))
5966     {
5967       coding->consumed = STRING_BYTES (XSTRING (str));
5968       coding->consumed_char = XSTRING (str)->size;
5969       if (STRING_MULTIBYTE (str))
5970         {
5971           str = Fstring_as_unibyte (str);
5972           nocopy = 1;
5973         }
5974       coding->produced = STRING_BYTES (XSTRING (str));
5975       coding->produced_char = XSTRING (str)->size;
5976       return (nocopy ? str : Fcopy_sequence (str));
5977     }
5978
5979   if (coding->composing != COMPOSITION_DISABLED)
5980     coding_save_composition (coding, from, to, str);
5981
5982   /* Try to skip the heading and tailing ASCIIs.  */
5983   if (coding->type != coding_type_ccl)
5984     {
5985       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5986                                 1);
5987       if (from == to_byte)
5988         return (nocopy ? str : Fcopy_sequence (str));
5989       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5990     }
5991
5992   len = encoding_buffer_size (coding, to_byte - from);
5993   allocate_conversion_buffer (buf, len);
5994
5995   consumed = consumed_char = produced = produced_char = 0;
5996   while (1)
5997     {
5998       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5999                               buf.data + produced, to_byte - from - consumed,
6000                               buf.size - produced);
6001       consumed += coding->consumed;
6002       consumed_char += coding->consumed_char;
6003       produced += coding->produced;
6004       produced_char += coding->produced_char;
6005       if (result == CODING_FINISH_NORMAL
6006           || (result == CODING_FINISH_INSUFFICIENT_SRC
6007               && coding->consumed == 0))
6008         break;
6009       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6010       extend_conversion_buffer (&buf);
6011     }
6012
6013   coding->consumed = consumed;
6014   coding->consumed_char = consumed_char;
6015   coding->produced = produced;
6016   coding->produced_char = produced_char;
6017
6018   newstr = make_uninit_string (produced + shrinked_bytes);
6019   if (from > 0)
6020     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
6021   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
6022   if (shrinked_bytes > from)
6023     bcopy (XSTRING (str)->data + to_byte,
6024            XSTRING (newstr)->data + from + produced,
6025            shrinked_bytes - from);
6026
6027   free_conversion_buffer (&buf);
6028   coding_free_composition_data (coding);
6029
6030   return newstr;
6031 }
6032
6033 \f
6034 #ifdef emacs
6035 /*** 8. Emacs Lisp library functions ***/
6036
6037 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6038   "Return t if OBJECT is nil or a coding-system.\n\
6039 See the documentation of `make-coding-system' for information\n\
6040 about coding-system objects.")
6041   (obj)
6042      Lisp_Object obj;
6043 {
6044   if (NILP (obj))
6045     return Qt;
6046   if (!SYMBOLP (obj))
6047     return Qnil;
6048   /* Get coding-spec vector for OBJ.  */
6049   obj = Fget (obj, Qcoding_system);
6050   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6051           ? Qt : Qnil);
6052 }
6053
6054 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6055        Sread_non_nil_coding_system, 1, 1, 0,
6056   "Read a coding system from the minibuffer, prompting with string PROMPT.")
6057   (prompt)
6058      Lisp_Object prompt;
6059 {
6060   Lisp_Object val;
6061   do
6062     {
6063       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6064                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6065     }
6066   while (XSTRING (val)->size == 0);
6067   return (Fintern (val, Qnil));
6068 }
6069
6070 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6071   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
6072 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
6073   (prompt, default_coding_system)
6074      Lisp_Object prompt, default_coding_system;
6075 {
6076   Lisp_Object val;
6077   if (SYMBOLP (default_coding_system))
6078     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6079   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6080                           Qt, Qnil, Qcoding_system_history,
6081                           default_coding_system, Qnil);
6082   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6083 }
6084
6085 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6086        1, 1, 0,
6087   "Check validity of CODING-SYSTEM.\n\
6088 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
6089 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
6090 The value of property should be a vector of length 5.")
6091   (coding_system)
6092      Lisp_Object coding_system;
6093 {
6094   CHECK_SYMBOL (coding_system, 0);
6095   if (!NILP (Fcoding_system_p (coding_system)))
6096     return coding_system;
6097   while (1)
6098     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6099 }
6100 \f
6101 Lisp_Object
6102 detect_coding_system (src, src_bytes, highest, multibytep)
6103      unsigned char *src;
6104      int src_bytes, highest;
6105      int multibytep;
6106 {
6107   int coding_mask, eol_type;
6108   Lisp_Object val, tmp;
6109   int dummy;
6110
6111   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6112   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6113   if (eol_type == CODING_EOL_INCONSISTENT)
6114     eol_type = CODING_EOL_UNDECIDED;
6115
6116   if (!coding_mask)
6117     {
6118       val = Qundecided;
6119       if (eol_type != CODING_EOL_UNDECIDED)
6120         {
6121           Lisp_Object val2;
6122           val2 = Fget (Qundecided, Qeol_type);
6123           if (VECTORP (val2))
6124             val = XVECTOR (val2)->contents[eol_type];
6125         }
6126       return (highest ? val : Fcons (val, Qnil));
6127     }
6128
6129   /* At first, gather possible coding systems in VAL.  */
6130   val = Qnil;
6131   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6132     {
6133       Lisp_Object category_val, category_index;
6134
6135       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6136       category_val = Fsymbol_value (XCAR (tmp));
6137       if (!NILP (category_val)
6138           && NATNUMP (category_index)
6139           && (coding_mask & (1 << XFASTINT (category_index))))
6140         {
6141           val = Fcons (category_val, val);
6142           if (highest)
6143             break;
6144         }
6145     }
6146   if (!highest)
6147     val = Fnreverse (val);
6148
6149   /* Then, replace the elements with subsidiary coding systems.  */
6150   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6151     {
6152       if (eol_type != CODING_EOL_UNDECIDED
6153           && eol_type != CODING_EOL_INCONSISTENT)
6154         {
6155           Lisp_Object eol;
6156           eol = Fget (XCAR (tmp), Qeol_type);
6157           if (VECTORP (eol))
6158             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
6159         }
6160     }
6161   return (highest ? XCAR (val) : val);
6162 }
6163
6164 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6165        2, 3, 0,
6166   "Detect coding system of the text in the region between START and END.\n\
6167 Return a list of possible coding systems ordered by priority.\n\
6168 \n\
6169 If only ASCII characters are found, it returns a list of single element\n\
6170 `undecided' or its subsidiary coding system according to a detected\n\
6171 end-of-line format.\n\
6172 \n\
6173 If optional argument HIGHEST is non-nil, return the coding system of\n\
6174 highest priority.")
6175   (start, end, highest)
6176      Lisp_Object start, end, highest;
6177 {
6178   int from, to;
6179   int from_byte, to_byte;
6180
6181   CHECK_NUMBER_COERCE_MARKER (start, 0);
6182   CHECK_NUMBER_COERCE_MARKER (end, 1);
6183
6184   validate_region (&start, &end);
6185   from = XINT (start), to = XINT (end);
6186   from_byte = CHAR_TO_BYTE (from);
6187   to_byte = CHAR_TO_BYTE (to);
6188
6189   if (from < GPT && to >= GPT)
6190     move_gap_both (to, to_byte);
6191
6192   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6193                                to_byte - from_byte,
6194                                !NILP (highest),
6195                                !NILP (current_buffer
6196                                       ->enable_multibyte_characters));
6197 }
6198
6199 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6200        1, 2, 0,
6201   "Detect coding system of the text in STRING.\n\
6202 Return a list of possible coding systems ordered by priority.\n\
6203 \n\
6204 If only ASCII characters are found, it returns a list of single element\n\
6205 `undecided' or its subsidiary coding system according to a detected\n\
6206 end-of-line format.\n\
6207 \n\
6208 If optional argument HIGHEST is non-nil, return the coding system of\n\
6209 highest priority.")
6210   (string, highest)
6211      Lisp_Object string, highest;
6212 {
6213   CHECK_STRING (string, 0);
6214
6215   return detect_coding_system (XSTRING (string)->data,
6216                                STRING_BYTES (XSTRING (string)),
6217                                !NILP (highest),
6218                                STRING_MULTIBYTE (string));
6219 }
6220
6221 /* Return an intersection of lists L1 and L2.  */
6222
6223 static Lisp_Object
6224 intersection (l1, l2)
6225      Lisp_Object l1, l2;
6226 {
6227   Lisp_Object val;
6228
6229   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
6230     {
6231       if (!NILP (Fmemq (XCAR (l1), l2)))
6232         val = Fcons (XCAR (l1), val);
6233     }
6234   return val;
6235 }
6236
6237
6238 /*  Subroutine for Fsafe_coding_systems_region_internal.
6239
6240     Return a list of coding systems that safely encode the multibyte
6241     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
6242     possible coding systems.  If it is nil, it means that we have not
6243     yet found any coding systems.
6244
6245     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6246     element of WORK_TABLE is set to t once the element is looked up.
6247
6248     If a non-ASCII single byte char is found, set
6249     *single_byte_char_found to 1.  */
6250
6251 static Lisp_Object
6252 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6253      unsigned char *p, *pend;
6254      Lisp_Object safe_codings, work_table;
6255      int *single_byte_char_found;
6256 {
6257   int c, len, idx;
6258   Lisp_Object val;
6259
6260   while (p < pend)
6261     {
6262       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6263       p += len;
6264       if (ASCII_BYTE_P (c))
6265         /* We can ignore ASCII characters here.  */
6266         continue;
6267       if (SINGLE_BYTE_CHAR_P (c))
6268         *single_byte_char_found = 1;
6269       if (NILP (safe_codings))
6270         continue;
6271       /* Check the safe coding systems for C.  */
6272       val = char_table_ref_and_index (work_table, c, &idx);
6273       if (EQ (val, Qt))
6274         /* This element was already checked.  Ignore it.  */
6275         continue;
6276       /* Remember that we checked this element.  */
6277       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6278
6279       /* If there are some safe coding systems for C and we have
6280          already found the other set of coding systems for the
6281          different characters, get the intersection of them.  */
6282       if (!EQ (safe_codings, Qt) && !NILP (val))
6283         val = intersection (safe_codings, val);
6284       safe_codings = val;
6285     }
6286   return safe_codings;
6287 }
6288
6289
6290 /* Return a list of coding systems that safely encode the text between
6291    START and END.  If the text contains only ASCII or is unibyte,
6292    return t.  */
6293
6294 DEFUN ("find-coding-systems-region-internal",
6295        Ffind_coding_systems_region_internal,
6296        Sfind_coding_systems_region_internal, 2, 2, 0,
6297   "Internal use only.")
6298   (start, end)
6299      Lisp_Object start, end;
6300 {
6301   Lisp_Object work_table, safe_codings;
6302   int non_ascii_p = 0;
6303   int single_byte_char_found = 0;
6304   unsigned char *p1, *p1end, *p2, *p2end, *p;
6305
6306   if (STRINGP (start))
6307     {
6308       if (!STRING_MULTIBYTE (start))
6309         return Qt;
6310       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
6311       p2 = p2end = p1end;
6312       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6313         non_ascii_p = 1;
6314     }
6315   else
6316     {
6317       int from, to, stop;
6318
6319       CHECK_NUMBER_COERCE_MARKER (start, 0);
6320       CHECK_NUMBER_COERCE_MARKER (end, 1);
6321       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6322         args_out_of_range (start, end);
6323       if (NILP (current_buffer->enable_multibyte_characters))
6324         return Qt;
6325       from = CHAR_TO_BYTE (XINT (start));
6326       to = CHAR_TO_BYTE (XINT (end));
6327       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6328       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6329       if (stop == to)
6330         p2 = p2end = p1end;
6331       else
6332         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6333       if (XINT (end) - XINT (start) != to - from)
6334         non_ascii_p = 1;
6335     }
6336
6337   if (!non_ascii_p)
6338     {
6339       /* We are sure that the text contains no multibyte character.
6340          Check if it contains eight-bit-graphic.  */
6341       p = p1;
6342       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6343       if (p == p1end)
6344         {
6345           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6346           if (p == p2end)
6347             return Qt;
6348         }
6349     }
6350
6351   /* The text contains non-ASCII characters.  */
6352   work_table = Fcopy_sequence (Vchar_coding_system_table);
6353   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6354                                     &single_byte_char_found);
6355   if (p2 < p2end)
6356     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6357                                       &single_byte_char_found);
6358
6359   if (!single_byte_char_found)
6360     {
6361       /* Append generic coding systems.  */
6362       Lisp_Object args[2];
6363       args[0] = safe_codings;
6364       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6365                                         make_number (0));
6366       safe_codings = Fappend (2, args);
6367     }
6368   else
6369     safe_codings = Fcons (Qraw_text,
6370                           Fcons (Qemacs_mule,
6371                                  Fcons (Qno_conversion, safe_codings)));
6372   return safe_codings;
6373 }
6374
6375
6376 Lisp_Object
6377 code_convert_region1 (start, end, coding_system, encodep)
6378      Lisp_Object start, end, coding_system;
6379      int encodep;
6380 {
6381   struct coding_system coding;
6382   int from, to;
6383
6384   CHECK_NUMBER_COERCE_MARKER (start, 0);
6385   CHECK_NUMBER_COERCE_MARKER (end, 1);
6386   CHECK_SYMBOL (coding_system, 2);
6387
6388   validate_region (&start, &end);
6389   from = XFASTINT (start);
6390   to = XFASTINT (end);
6391
6392   if (NILP (coding_system))
6393     return make_number (to - from);
6394
6395   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6396     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6397
6398   coding.mode |= CODING_MODE_LAST_BLOCK;
6399   coding.src_multibyte = coding.dst_multibyte
6400     = !NILP (current_buffer->enable_multibyte_characters);
6401   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6402                        &coding, encodep, 1);
6403   Vlast_coding_system_used = coding.symbol;
6404   return make_number (coding.produced_char);
6405 }
6406
6407 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6408        3, 3, "r\nzCoding system: ",
6409   "Decode the current region by specified coding system.\n\
6410 When called from a program, takes three arguments:\n\
6411 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6412 This function sets `last-coding-system-used' to the precise coding system\n\
6413 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6414 not fully specified.)\n\
6415 It returns the length of the decoded text.")
6416   (start, end, coding_system)
6417      Lisp_Object start, end, coding_system;
6418 {
6419   return code_convert_region1 (start, end, coding_system, 0);
6420 }
6421
6422 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6423        3, 3, "r\nzCoding system: ",
6424   "Encode the current region by specified coding system.\n\
6425 When called from a program, takes three arguments:\n\
6426 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6427 This function sets `last-coding-system-used' to the precise coding system\n\
6428 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6429 not fully specified.)\n\
6430 It returns the length of the encoded text.")
6431   (start, end, coding_system)
6432      Lisp_Object start, end, coding_system;
6433 {
6434   return code_convert_region1 (start, end, coding_system, 1);
6435 }
6436
6437 Lisp_Object
6438 code_convert_string1 (string, coding_system, nocopy, encodep)
6439      Lisp_Object string, coding_system, nocopy;
6440      int encodep;
6441 {
6442   struct coding_system coding;
6443
6444   CHECK_STRING (string, 0);
6445   CHECK_SYMBOL (coding_system, 1);
6446
6447   if (NILP (coding_system))
6448     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6449
6450   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6451     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6452
6453   coding.mode |= CODING_MODE_LAST_BLOCK;
6454   string = (encodep
6455             ? encode_coding_string (string, &coding, !NILP (nocopy))
6456             : decode_coding_string (string, &coding, !NILP (nocopy)));
6457   Vlast_coding_system_used = coding.symbol;
6458
6459   return string;
6460 }
6461
6462 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6463        2, 3, 0,
6464   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
6465 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
6466 if the decoding operation is trivial.\n\
6467 This function sets `last-coding-system-used' to the precise coding system\n\
6468 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6469 not fully specified.)")
6470   (string, coding_system, nocopy)
6471      Lisp_Object string, coding_system, nocopy;
6472 {
6473   return code_convert_string1 (string, coding_system, nocopy, 0);
6474 }
6475
6476 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6477        2, 3, 0,
6478   "Encode STRING to CODING-SYSTEM, and return the result.\n\
6479 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
6480 if the encoding operation is trivial.\n\
6481 This function sets `last-coding-system-used' to the precise coding system\n\
6482 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6483 not fully specified.)")
6484   (string, coding_system, nocopy)
6485      Lisp_Object string, coding_system, nocopy;
6486 {
6487   return code_convert_string1 (string, coding_system, nocopy, 1);
6488 }
6489
6490 /* Encode or decode STRING according to CODING_SYSTEM.
6491    Do not set Vlast_coding_system_used.
6492
6493    This function is called only from macros DECODE_FILE and
6494    ENCODE_FILE, thus we ignore character composition.  */
6495
6496 Lisp_Object
6497 code_convert_string_norecord (string, coding_system, encodep)
6498      Lisp_Object string, coding_system;
6499      int encodep;
6500 {
6501   struct coding_system coding;
6502
6503   CHECK_STRING (string, 0);
6504   CHECK_SYMBOL (coding_system, 1);
6505
6506   if (NILP (coding_system))
6507     return string;
6508
6509   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6510     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6511
6512   coding.composing = COMPOSITION_DISABLED;
6513   coding.mode |= CODING_MODE_LAST_BLOCK;
6514   return (encodep
6515           ? encode_coding_string (string, &coding, 1)
6516           : decode_coding_string (string, &coding, 1));
6517 }
6518 \f
6519 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6520   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6521 Return the corresponding character.")
6522   (code)
6523      Lisp_Object code;
6524 {
6525   unsigned char c1, c2, s1, s2;
6526   Lisp_Object val;
6527
6528   CHECK_NUMBER (code, 0);
6529   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6530   if (s1 == 0)
6531     {
6532       if (s2 < 0x80)
6533         XSETFASTINT (val, s2);
6534       else if (s2 >= 0xA0 || s2 <= 0xDF)
6535         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6536       else
6537         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6538     }
6539   else
6540     {
6541       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6542           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6543         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6544       DECODE_SJIS (s1, s2, c1, c2);
6545       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6546     }
6547   return val;
6548 }
6549
6550 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6551   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6552 Return the corresponding code in SJIS.")
6553   (ch)
6554      Lisp_Object ch;
6555 {
6556   int charset, c1, c2, s1, s2;
6557   Lisp_Object val;
6558
6559   CHECK_NUMBER (ch, 0);
6560   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6561   if (charset == CHARSET_ASCII)
6562     {
6563       val = ch;
6564     }
6565   else if (charset == charset_jisx0208
6566            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6567     {
6568       ENCODE_SJIS (c1, c2, s1, s2);
6569       XSETFASTINT (val, (s1 << 8) | s2);
6570     }
6571   else if (charset == charset_katakana_jisx0201
6572            && c1 > 0x20 && c2 < 0xE0)
6573     {
6574       XSETFASTINT (val, c1 | 0x80);
6575     }
6576   else
6577     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6578   return val;
6579 }
6580
6581 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6582   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6583 Return the corresponding character.")
6584   (code)
6585      Lisp_Object code;
6586 {
6587   int charset;
6588   unsigned char b1, b2, c1, c2;
6589   Lisp_Object val;
6590
6591   CHECK_NUMBER (code, 0);
6592   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6593   if (b1 == 0)
6594     {
6595       if (b2 >= 0x80)
6596         error ("Invalid BIG5 code: %x", XFASTINT (code));
6597       val = code;
6598     }
6599   else
6600     {
6601       if ((b1 < 0xA1 || b1 > 0xFE)
6602           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6603         error ("Invalid BIG5 code: %x", XFASTINT (code));
6604       DECODE_BIG5 (b1, b2, charset, c1, c2);
6605       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6606     }
6607   return val;
6608 }
6609
6610 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6611   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6612 Return the corresponding character code in Big5.")
6613   (ch)
6614      Lisp_Object ch;
6615 {
6616   int charset, c1, c2, b1, b2;
6617   Lisp_Object val;
6618
6619   CHECK_NUMBER (ch, 0);
6620   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6621   if (charset == CHARSET_ASCII)
6622     {
6623       val = ch;
6624     }
6625   else if ((charset == charset_big5_1
6626             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6627            || (charset == charset_big5_2
6628                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6629     {
6630       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6631       XSETFASTINT (val, (b1 << 8) | b2);
6632     }
6633   else
6634     error ("Can't encode to Big5: %d", XFASTINT (ch));
6635   return val;
6636 }
6637 \f
6638 DEFUN ("set-terminal-coding-system-internal",
6639        Fset_terminal_coding_system_internal,
6640        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6641   (coding_system)
6642      Lisp_Object coding_system;
6643 {
6644   CHECK_SYMBOL (coding_system, 0);
6645   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6646   /* We had better not send unsafe characters to terminal.  */
6647   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6648   /* Characer composition should be disabled.  */
6649   terminal_coding.composing = COMPOSITION_DISABLED;
6650   terminal_coding.src_multibyte = 1;
6651   terminal_coding.dst_multibyte = 0;
6652   return Qnil;
6653 }
6654
6655 DEFUN ("set-safe-terminal-coding-system-internal",
6656        Fset_safe_terminal_coding_system_internal,
6657        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6658   (coding_system)
6659      Lisp_Object coding_system;
6660 {
6661   CHECK_SYMBOL (coding_system, 0);
6662   setup_coding_system (Fcheck_coding_system (coding_system),
6663                        &safe_terminal_coding);
6664   /* Characer composition should be disabled.  */
6665   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6666   safe_terminal_coding.src_multibyte = 1;
6667   safe_terminal_coding.dst_multibyte = 0;
6668   return Qnil;
6669 }
6670
6671 DEFUN ("terminal-coding-system",
6672        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6673   "Return coding system specified for terminal output.")
6674   ()
6675 {
6676   return terminal_coding.symbol;
6677 }
6678
6679 DEFUN ("set-keyboard-coding-system-internal",
6680        Fset_keyboard_coding_system_internal,
6681        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6682   (coding_system)
6683      Lisp_Object coding_system;
6684 {
6685   CHECK_SYMBOL (coding_system, 0);
6686   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6687   /* Characer composition should be disabled.  */
6688   keyboard_coding.composing = COMPOSITION_DISABLED;
6689   return Qnil;
6690 }
6691
6692 DEFUN ("keyboard-coding-system",
6693        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6694   "Return coding system specified for decoding keyboard input.")
6695   ()
6696 {
6697   return keyboard_coding.symbol;
6698 }
6699
6700 \f
6701 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6702        Sfind_operation_coding_system,  1, MANY, 0,
6703   "Choose a coding system for an operation based on the target name.\n\
6704 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6705 DECODING-SYSTEM is the coding system to use for decoding\n\
6706 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6707 for encoding (in case OPERATION does encoding).\n\
6708 \n\
6709 The first argument OPERATION specifies an I/O primitive:\n\
6710   For file I/O, `insert-file-contents' or `write-region'.\n\
6711   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6712   For network I/O, `open-network-stream'.\n\
6713 \n\
6714 The remaining arguments should be the same arguments that were passed\n\
6715 to the primitive.  Depending on which primitive, one of those arguments\n\
6716 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6717 whichever argument specifies the file name is TARGET.\n\
6718 \n\
6719 TARGET has a meaning which depends on OPERATION:\n\
6720   For file I/O, TARGET is a file name.\n\
6721   For process I/O, TARGET is a process name.\n\
6722   For network I/O, TARGET is a service name or a port number\n\
6723 \n\
6724 This function looks up what specified for TARGET in,\n\
6725 `file-coding-system-alist', `process-coding-system-alist',\n\
6726 or `network-coding-system-alist' depending on OPERATION.\n\
6727 They may specify a coding system, a cons of coding systems,\n\
6728 or a function symbol to call.\n\
6729 In the last case, we call the function with one argument,\n\
6730 which is a list of all the arguments given to this function.")
6731   (nargs, args)
6732      int nargs;
6733      Lisp_Object *args;
6734 {
6735   Lisp_Object operation, target_idx, target, val;
6736   register Lisp_Object chain;
6737
6738   if (nargs < 2)
6739     error ("Too few arguments");
6740   operation = args[0];
6741   if (!SYMBOLP (operation)
6742       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6743     error ("Invalid first arguement");
6744   if (nargs < 1 + XINT (target_idx))
6745     error ("Too few arguments for operation: %s",
6746            XSYMBOL (operation)->name->data);
6747   target = args[XINT (target_idx) + 1];
6748   if (!(STRINGP (target)
6749         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6750     error ("Invalid %dth argument", XINT (target_idx) + 1);
6751
6752   chain = ((EQ (operation, Qinsert_file_contents)
6753             || EQ (operation, Qwrite_region))
6754            ? Vfile_coding_system_alist
6755            : (EQ (operation, Qopen_network_stream)
6756               ? Vnetwork_coding_system_alist
6757               : Vprocess_coding_system_alist));
6758   if (NILP (chain))
6759     return Qnil;
6760
6761   for (; CONSP (chain); chain = XCDR (chain))
6762     {
6763       Lisp_Object elt;
6764       elt = XCAR (chain);
6765
6766       if (CONSP (elt)
6767           && ((STRINGP (target)
6768                && STRINGP (XCAR (elt))
6769                && fast_string_match (XCAR (elt), target) >= 0)
6770               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6771         {
6772           val = XCDR (elt);
6773           /* Here, if VAL is both a valid coding system and a valid
6774              function symbol, we return VAL as a coding system.  */
6775           if (CONSP (val))
6776             return val;
6777           if (! SYMBOLP (val))
6778             return Qnil;
6779           if (! NILP (Fcoding_system_p (val)))
6780             return Fcons (val, val);
6781           if (! NILP (Ffboundp (val)))
6782             {
6783               val = call1 (val, Flist (nargs, args));
6784               if (CONSP (val))
6785                 return val;
6786               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6787                 return Fcons (val, val);
6788             }
6789           return Qnil;
6790         }
6791     }
6792   return Qnil;
6793 }
6794
6795 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6796        Supdate_coding_systems_internal, 0, 0, 0,
6797   "Update internal database for ISO2022 and CCL based coding systems.\n\
6798 When values of any coding categories are changed, you must\n\
6799 call this function")
6800   ()
6801 {
6802   int i;
6803
6804   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6805     {
6806       Lisp_Object val;
6807
6808       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6809       if (!NILP (val))
6810         {
6811           if (! coding_system_table[i])
6812             coding_system_table[i] = ((struct coding_system *)
6813                                       xmalloc (sizeof (struct coding_system)));
6814           setup_coding_system (val, coding_system_table[i]);
6815         }
6816       else if (coding_system_table[i])
6817         {
6818           xfree (coding_system_table[i]);
6819           coding_system_table[i] = NULL;
6820         }
6821     }
6822
6823   return Qnil;
6824 }
6825
6826 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6827        Sset_coding_priority_internal, 0, 0, 0,
6828   "Update internal database for the current value of `coding-category-list'.\n\
6829 This function is internal use only.")
6830   ()
6831 {
6832   int i = 0, idx;
6833   Lisp_Object val;
6834
6835   val = Vcoding_category_list;
6836
6837   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6838     {
6839       if (! SYMBOLP (XCAR (val)))
6840         break;
6841       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6842       if (idx >= CODING_CATEGORY_IDX_MAX)
6843         break;
6844       coding_priorities[i++] = (1 << idx);
6845       val = XCDR (val);
6846     }
6847   /* If coding-category-list is valid and contains all coding
6848      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6849      the following code saves Emacs from crashing.  */
6850   while (i < CODING_CATEGORY_IDX_MAX)
6851     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6852
6853   return Qnil;
6854 }
6855
6856 #endif /* emacs */
6857
6858 \f
6859 /*** 9. Post-amble ***/
6860
6861 void
6862 init_coding_once ()
6863 {
6864   int i;
6865
6866   /* Emacs' internal format specific initialize routine.  */
6867   for (i = 0; i <= 0x20; i++)
6868     emacs_code_class[i] = EMACS_control_code;
6869   emacs_code_class[0x0A] = EMACS_linefeed_code;
6870   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6871   for (i = 0x21 ; i < 0x7F; i++)
6872     emacs_code_class[i] = EMACS_ascii_code;
6873   emacs_code_class[0x7F] = EMACS_control_code;
6874   for (i = 0x80; i < 0xFF; i++)
6875     emacs_code_class[i] = EMACS_invalid_code;
6876   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6877   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6878   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6879   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6880
6881   /* ISO2022 specific initialize routine.  */
6882   for (i = 0; i < 0x20; i++)
6883     iso_code_class[i] = ISO_control_0;
6884   for (i = 0x21; i < 0x7F; i++)
6885     iso_code_class[i] = ISO_graphic_plane_0;
6886   for (i = 0x80; i < 0xA0; i++)
6887     iso_code_class[i] = ISO_control_1;
6888   for (i = 0xA1; i < 0xFF; i++)
6889     iso_code_class[i] = ISO_graphic_plane_1;
6890   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6891   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6892   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6893   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6894   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6895   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6896   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6897   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6898   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6899   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6900
6901   setup_coding_system (Qnil, &keyboard_coding);
6902   setup_coding_system (Qnil, &terminal_coding);
6903   setup_coding_system (Qnil, &safe_terminal_coding);
6904   setup_coding_system (Qnil, &default_buffer_file_coding);
6905
6906   bzero (coding_system_table, sizeof coding_system_table);
6907
6908   bzero (ascii_skip_code, sizeof ascii_skip_code);
6909   for (i = 0; i < 128; i++)
6910     ascii_skip_code[i] = 1;
6911
6912 #if defined (MSDOS) || defined (WINDOWSNT)
6913   system_eol_type = CODING_EOL_CRLF;
6914 #else
6915   system_eol_type = CODING_EOL_LF;
6916 #endif
6917
6918   inhibit_pre_post_conversion = 0;
6919 }
6920
6921 #ifdef emacs
6922
6923 void
6924 syms_of_coding ()
6925 {
6926   Qtarget_idx = intern ("target-idx");
6927   staticpro (&Qtarget_idx);
6928
6929   Qcoding_system_history = intern ("coding-system-history");
6930   staticpro (&Qcoding_system_history);
6931   Fset (Qcoding_system_history, Qnil);
6932
6933   /* Target FILENAME is the first argument.  */
6934   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6935   /* Target FILENAME is the third argument.  */
6936   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6937
6938   Qcall_process = intern ("call-process");
6939   staticpro (&Qcall_process);
6940   /* Target PROGRAM is the first argument.  */
6941   Fput (Qcall_process, Qtarget_idx, make_number (0));
6942
6943   Qcall_process_region = intern ("call-process-region");
6944   staticpro (&Qcall_process_region);
6945   /* Target PROGRAM is the third argument.  */
6946   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6947
6948   Qstart_process = intern ("start-process");
6949   staticpro (&Qstart_process);
6950   /* Target PROGRAM is the third argument.  */
6951   Fput (Qstart_process, Qtarget_idx, make_number (2));
6952
6953   Qopen_network_stream = intern ("open-network-stream");
6954   staticpro (&Qopen_network_stream);
6955   /* Target SERVICE is the fourth argument.  */
6956   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6957
6958   Qcoding_system = intern ("coding-system");
6959   staticpro (&Qcoding_system);
6960
6961   Qeol_type = intern ("eol-type");
6962   staticpro (&Qeol_type);
6963
6964   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6965   staticpro (&Qbuffer_file_coding_system);
6966
6967   Qpost_read_conversion = intern ("post-read-conversion");
6968   staticpro (&Qpost_read_conversion);
6969
6970   Qpre_write_conversion = intern ("pre-write-conversion");
6971   staticpro (&Qpre_write_conversion);
6972
6973   Qno_conversion = intern ("no-conversion");
6974   staticpro (&Qno_conversion);
6975
6976   Qundecided = intern ("undecided");
6977   staticpro (&Qundecided);
6978
6979   Qcoding_system_p = intern ("coding-system-p");
6980   staticpro (&Qcoding_system_p);
6981
6982   Qcoding_system_error = intern ("coding-system-error");
6983   staticpro (&Qcoding_system_error);
6984
6985   Fput (Qcoding_system_error, Qerror_conditions,
6986         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6987   Fput (Qcoding_system_error, Qerror_message,
6988         build_string ("Invalid coding system"));
6989
6990   Qcoding_category = intern ("coding-category");
6991   staticpro (&Qcoding_category);
6992   Qcoding_category_index = intern ("coding-category-index");
6993   staticpro (&Qcoding_category_index);
6994
6995   Vcoding_category_table
6996     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6997   staticpro (&Vcoding_category_table);
6998   {
6999     int i;
7000     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7001       {
7002         XVECTOR (Vcoding_category_table)->contents[i]
7003           = intern (coding_category_name[i]);
7004         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7005               Qcoding_category_index, make_number (i));
7006       }
7007   }
7008
7009   Qtranslation_table = intern ("translation-table");
7010   staticpro (&Qtranslation_table);
7011   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7012
7013   Qtranslation_table_id = intern ("translation-table-id");
7014   staticpro (&Qtranslation_table_id);
7015
7016   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7017   staticpro (&Qtranslation_table_for_decode);
7018
7019   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7020   staticpro (&Qtranslation_table_for_encode);
7021
7022   Qsafe_chars = intern ("safe-chars");
7023   staticpro (&Qsafe_chars);
7024
7025   Qchar_coding_system = intern ("char-coding-system");
7026   staticpro (&Qchar_coding_system);
7027
7028   /* Intern this now in case it isn't already done.
7029      Setting this variable twice is harmless.
7030      But don't staticpro it here--that is done in alloc.c.  */
7031   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7032   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7033   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
7034
7035   Qvalid_codes = intern ("valid-codes");
7036   staticpro (&Qvalid_codes);
7037
7038   Qemacs_mule = intern ("emacs-mule");
7039   staticpro (&Qemacs_mule);
7040
7041   Qraw_text = intern ("raw-text");
7042   staticpro (&Qraw_text);
7043
7044   defsubr (&Scoding_system_p);
7045   defsubr (&Sread_coding_system);
7046   defsubr (&Sread_non_nil_coding_system);
7047   defsubr (&Scheck_coding_system);
7048   defsubr (&Sdetect_coding_region);
7049   defsubr (&Sdetect_coding_string);
7050   defsubr (&Sfind_coding_systems_region_internal);
7051   defsubr (&Sdecode_coding_region);
7052   defsubr (&Sencode_coding_region);
7053   defsubr (&Sdecode_coding_string);
7054   defsubr (&Sencode_coding_string);
7055   defsubr (&Sdecode_sjis_char);
7056   defsubr (&Sencode_sjis_char);
7057   defsubr (&Sdecode_big5_char);
7058   defsubr (&Sencode_big5_char);
7059   defsubr (&Sset_terminal_coding_system_internal);
7060   defsubr (&Sset_safe_terminal_coding_system_internal);
7061   defsubr (&Sterminal_coding_system);
7062   defsubr (&Sset_keyboard_coding_system_internal);
7063   defsubr (&Skeyboard_coding_system);
7064   defsubr (&Sfind_operation_coding_system);
7065   defsubr (&Supdate_coding_systems_internal);
7066   defsubr (&Sset_coding_priority_internal);
7067
7068   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7069     "List of coding systems.\n\
7070 \n\
7071 Do not alter the value of this variable manually.  This variable should be\n\
7072 updated by the functions `make-coding-system' and\n\
7073 `define-coding-system-alias'.");
7074   Vcoding_system_list = Qnil;
7075
7076   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7077     "Alist of coding system names.\n\
7078 Each element is one element list of coding system name.\n\
7079 This variable is given to `completing-read' as TABLE argument.\n\
7080 \n\
7081 Do not alter the value of this variable manually.  This variable should be\n\
7082 updated by the functions `make-coding-system' and\n\
7083 `define-coding-system-alias'.");
7084   Vcoding_system_alist = Qnil;
7085
7086   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7087     "List of coding-categories (symbols) ordered by priority.");
7088   {
7089     int i;
7090
7091     Vcoding_category_list = Qnil;
7092     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7093       Vcoding_category_list
7094         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7095                  Vcoding_category_list);
7096   }
7097
7098   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7099     "Specify the coding system for read operations.\n\
7100 It is useful to bind this variable with `let', but do not set it globally.\n\
7101 If the value is a coding system, it is used for decoding on read operation.\n\
7102 If not, an appropriate element is used from one of the coding system alists:\n\
7103 There are three such tables, `file-coding-system-alist',\n\
7104 `process-coding-system-alist', and `network-coding-system-alist'.");
7105   Vcoding_system_for_read = Qnil;
7106
7107   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7108     "Specify the coding system for write operations.\n\
7109 Programs bind this variable with `let', but you should not set it globally.\n\
7110 If the value is a coding system, it is used for encoding of output,\n\
7111 when writing it to a file and when sending it to a file or subprocess.\n\
7112 \n\
7113 If this does not specify a coding system, an appropriate element\n\
7114 is used from one of the coding system alists:\n\
7115 There are three such tables, `file-coding-system-alist',\n\
7116 `process-coding-system-alist', and `network-coding-system-alist'.\n\
7117 For output to files, if the above procedure does not specify a coding system,\n\
7118 the value of `buffer-file-coding-system' is used.");
7119   Vcoding_system_for_write = Qnil;
7120
7121   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7122     "Coding system used in the latest file or process I/O.");
7123   Vlast_coding_system_used = Qnil;
7124
7125   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7126     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
7127 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
7128 such conversion.");
7129   inhibit_eol_conversion = 0;
7130
7131   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7132     "Non-nil means process buffer inherits coding system of process output.\n\
7133 Bind it to t if the process output is to be treated as if it were a file\n\
7134 read from some filesystem.");
7135   inherit_process_coding_system = 0;
7136
7137   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7138     "Alist to decide a coding system to use for a file I/O operation.\n\
7139 The format is ((PATTERN . VAL) ...),\n\
7140 where PATTERN is a regular expression matching a file name,\n\
7141 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7142 If VAL is a coding system, it is used for both decoding and encoding\n\
7143 the file contents.\n\
7144 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7145 and the cdr part is used for encoding.\n\
7146 If VAL is a function symbol, the function must return a coding system\n\
7147 or a cons of coding systems which are used as above.\n\
7148 \n\
7149 See also the function `find-operation-coding-system'\n\
7150 and the variable `auto-coding-alist'.");
7151   Vfile_coding_system_alist = Qnil;
7152
7153   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7154     "Alist to decide a coding system to use for a process I/O operation.\n\
7155 The format is ((PATTERN . VAL) ...),\n\
7156 where PATTERN is a regular expression matching a program name,\n\
7157 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7158 If VAL is a coding system, it is used for both decoding what received\n\
7159 from the program and encoding what sent to the program.\n\
7160 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7161 and the cdr part is used for encoding.\n\
7162 If VAL is a function symbol, the function must return a coding system\n\
7163 or a cons of coding systems which are used as above.\n\
7164 \n\
7165 See also the function `find-operation-coding-system'.");
7166   Vprocess_coding_system_alist = Qnil;
7167
7168   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7169     "Alist to decide a coding system to use for a network I/O operation.\n\
7170 The format is ((PATTERN . VAL) ...),\n\
7171 where PATTERN is a regular expression matching a network service name\n\
7172 or is a port number to connect to,\n\
7173 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7174 If VAL is a coding system, it is used for both decoding what received\n\
7175 from the network stream and encoding what sent to the network stream.\n\
7176 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7177 and the cdr part is used for encoding.\n\
7178 If VAL is a function symbol, the function must return a coding system\n\
7179 or a cons of coding systems which are used as above.\n\
7180 \n\
7181 See also the function `find-operation-coding-system'.");
7182   Vnetwork_coding_system_alist = Qnil;
7183
7184   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7185     "Coding system to use with system messages.");
7186   Vlocale_coding_system = Qnil;
7187
7188   /* The eol mnemonics are reset in startup.el system-dependently.  */
7189   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7190     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
7191   eol_mnemonic_unix = build_string (":");
7192
7193   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7194     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
7195   eol_mnemonic_dos = build_string ("\\");
7196
7197   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7198     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
7199   eol_mnemonic_mac = build_string ("/");
7200
7201   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7202     "*String displayed in mode line when end-of-line format is not yet determined.");
7203   eol_mnemonic_undecided = build_string (":");
7204
7205   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7206     "*Non-nil enables character translation while encoding and decoding.");
7207   Venable_character_translation = Qt;
7208
7209   DEFVAR_LISP ("standard-translation-table-for-decode",
7210     &Vstandard_translation_table_for_decode,
7211     "Table for translating characters while decoding.");
7212   Vstandard_translation_table_for_decode = Qnil;
7213
7214   DEFVAR_LISP ("standard-translation-table-for-encode",
7215     &Vstandard_translation_table_for_encode,
7216     "Table for translationg characters while encoding.");
7217   Vstandard_translation_table_for_encode = Qnil;
7218
7219   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7220     "Alist of charsets vs revision numbers.\n\
7221 While encoding, if a charset (car part of an element) is found,\n\
7222 designate it with the escape sequence identifing revision (cdr part of the element).");
7223   Vcharset_revision_alist = Qnil;
7224
7225   DEFVAR_LISP ("default-process-coding-system",
7226                &Vdefault_process_coding_system,
7227     "Cons of coding systems used for process I/O by default.\n\
7228 The car part is used for decoding a process output,\n\
7229 the cdr part is used for encoding a text to be sent to a process.");
7230   Vdefault_process_coding_system = Qnil;
7231
7232   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7233     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
7234 This is a vector of length 256.\n\
7235 If Nth element is non-nil, the existence of code N in a file\n\
7236 \(or output of subprocess) doesn't prevent it to be detected as\n\
7237 a coding system of ISO 2022 variant which has a flag\n\
7238 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
7239 or reading output of a subprocess.\n\
7240 Only 128th through 159th elements has a meaning.");
7241   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7242
7243   DEFVAR_LISP ("select-safe-coding-system-function",
7244                &Vselect_safe_coding_system_function,
7245     "Function to call to select safe coding system for encoding a text.\n\
7246 \n\
7247 If set, this function is called to force a user to select a proper\n\
7248 coding system which can encode the text in the case that a default\n\
7249 coding system used in each operation can't encode the text.\n\
7250 \n\
7251 The default value is `select-safe-coding-system' (which see).");
7252   Vselect_safe_coding_system_function = Qnil;
7253
7254   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7255     "Char-table containing safe coding systems of each characters.\n\
7256 Each element doesn't include such generic coding systems that can\n\
7257 encode any characters.   They are in the first extra slot.");
7258   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7259
7260   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7261                &inhibit_iso_escape_detection,
7262     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
7263 \n\
7264 By default, on reading a file, Emacs tries to detect how the text is\n\
7265 encoded.  This code detection is sensitive to escape sequences.  If\n\
7266 the sequence is valid as ISO2022, the code is determined as one of\n\
7267 the ISO2022 encodings, and the file is decoded by the corresponding\n\
7268 coding system (e.g. `iso-2022-7bit').\n\
7269 \n\
7270 However, there may be a case that you want to read escape sequences in\n\
7271 a file as is.  In such a case, you can set this variable to non-nil.\n\
7272 Then, as the code detection ignores any escape sequences, no file is\n\
7273 detected as encoded in some ISO2022 encoding.  The result is that all\n\
7274 escape sequences become visible in a buffer.\n\
7275 \n\
7276 The default value is nil, and it is strongly recommended not to change\n\
7277 it.  That is because many Emacs Lisp source files that contain\n\
7278 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
7279 in Emacs's distribution, and they won't be decoded correctly on\n\
7280 reading if you suppress escape sequence detection.\n\
7281 \n\
7282 The other way to read escape sequences in a file without decoding is\n\
7283 to explicitly specify some coding system that doesn't use ISO2022's\n\
7284 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
7285   inhibit_iso_escape_detection = 0;
7286 }
7287
7288 char *
7289 emacs_strerror (error_number)
7290      int error_number;
7291 {
7292   char *str;
7293
7294   synchronize_system_messages_locale ();
7295   str = strerror (error_number);
7296
7297   if (! NILP (Vlocale_coding_system))
7298     {
7299       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7300                                                       Vlocale_coding_system,
7301                                                       0);
7302       str = (char *) XSTRING (dec)->data;
7303     }
7304
7305   return str;
7306 }
7307
7308 #endif /* emacs */
7309