src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, -1, 0, 0);      \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (*src == '\r')
 618         {
 619           int c;
 620
 621           src++;
 622           if (coding->eol_type == CODING_EOL_CR)
 623             c = '\n';
 624           else if (coding->eol_type == CODING_EOL_CRLF)
 625             {
 626               ONE_MORE_BYTE (c);
 627               if (c != '\n')
 628                 {
 629                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 630                     {
 631                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 632                       goto label_end_of_loop;
 633                     }
 634                   src--;
 635                   c = '\r';
 636                 }
 637             }
 638           *dst++ = c;
 639           coding->produced_char++;
 640           continue;
 641         }
 642       else if (*src == '\n')
 643         {
 644           if ((coding->eol_type == CODING_EOL_CR
 645                || coding->eol_type == CODING_EOL_CRLF)
 646               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 647             {
 648               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 649               goto label_end_of_loop;
 650             }
 651           *dst++ = *src++;
 652           coding->produced_char++;
 653           continue;
 654         }
 655       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 656         {
 657           p = src;
 658           src += bytes;
 659         }
 660       else
 661         {
 662           bytes = CHAR_STRING (*src, tmp);
 663           p = tmp;
 664           src++;
 665         }
 666       if (dst + bytes >= (dst_bytes ? dst_end : src))
 667         {
 668           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 669           break;
 670         }
 671       while (bytes--) *dst++ = *p++;
 672       coding->produced_char++;
 673     }
 674  label_end_of_loop:
 675   coding->consumed = coding->consumed_char = src_base - source;
 676   coding->produced = dst - destination;
 677 }
 678
 679 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 680   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 681
 682
 683 \f
 684 /*** 3. ISO2022 handlers ***/
 685
 686 /* The following note describes the coding system ISO2022 briefly.
 687    Since the intention of this note is to help understand the
 688    functions in this file, some parts are NOT ACCURATE or OVERLY
 689    SIMPLIFIED.  For thorough understanding, please refer to the
 690    original document of ISO2022.
 691
 692    ISO2022 provides many mechanisms to encode several character sets
 693    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 694    is encoded using bytes less than 128.  This may make the encoded
 695    text a little bit longer, but the text passes more easily through
 696    several gateways, some of which strip off MSB (Most Signigant Bit).
 697
 698    There are two kinds of character sets: control character set and
 699    graphic character set.  The former contains control characters such
 700    as `newline' and `escape' to provide control functions (control
 701    functions are also provided by escape sequences).  The latter
 702    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 703    two control character sets and many graphic character sets.
 704
 705    Graphic character sets are classified into one of the following
 706    four classes, according to the number of bytes (DIMENSION) and
 707    number of characters in one dimension (CHARS) of the set:
 708    - DIMENSION1_CHARS94
 709    - DIMENSION1_CHARS96
 710    - DIMENSION2_CHARS94
 711    - DIMENSION2_CHARS96
 712
 713    In addition, each character set is assigned an identification tag,
 714    unique for each set, called "final character" (denoted as <F>
 715    hereafter).  The <F> of each character set is decided by ECMA(*)
 716    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 717    (0x30..0x3F are for private use only).
 718
 719    Note (*): ECMA = European Computer Manufacturers Association
 720
 721    Here are examples of graphic character set [NAME(<F>)]:
 722         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 723         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 724         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 725         o DIMENSION2_CHARS96 -- none for the moment
 726
 727    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 728         C0 [0x00..0x1F] -- control character plane 0
 729         GL [0x20..0x7F] -- graphic character plane 0
 730         C1 [0x80..0x9F] -- control character plane 1
 731         GR [0xA0..0xFF] -- graphic character plane 1
 732
 733    A control character set is directly designated and invoked to C0 or
 734    C1 by an escape sequence.  The most common case is that:
 735    - ISO646's  control character set is designated/invoked to C0, and
 736    - ISO6429's control character set is designated/invoked to C1,
 737    and usually these designations/invocations are omitted in encoded
 738    text.  In a 7-bit environment, only C0 can be used, and a control
 739    character for C1 is encoded by an appropriate escape sequence to
 740    fit into the environment.  All control characters for C1 are
 741    defined to have corresponding escape sequences.
 742
 743    A graphic character set is at first designated to one of four
 744    graphic registers (G0 through G3), then these graphic registers are
 745    invoked to GL or GR.  These designations and invocations can be
 746    done independently.  The most common case is that G0 is invoked to
 747    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 748    these invocations and designations are omitted in encoded text.
 749    In a 7-bit environment, only GL can be used.
 750
 751    When a graphic character set of CHARS94 is invoked to GL, codes
 752    0x20 and 0x7F of the GL area work as control characters SPACE and
 753    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 754    be used.
 755
 756    There are two ways of invocation: locking-shift and single-shift.
 757    With locking-shift, the invocation lasts until the next different
 758    invocation, whereas with single-shift, the invocation affects the
 759    following character only and doesn't affect the locking-shift
 760    state.  Invocations are done by the following control characters or
 761    escape sequences:
 762
 763    ----------------------------------------------------------------------
 764    abbrev  function                  cntrl escape seq   description
 765    ----------------------------------------------------------------------
 766    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 767    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 768    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 769    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 770    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 771    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 772    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 773    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 774    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 775    ----------------------------------------------------------------------
 776    (*) These are not used by any known coding system.
 777
 778    Control characters for these functions are defined by macros
 779    ISO_CODE_XXX in `coding.h'.
 780
 781    Designations are done by the following escape sequences:
 782    ----------------------------------------------------------------------
 783    escape sequence      description
 784    ----------------------------------------------------------------------
 785    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 786    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 787    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 788    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 789    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 790    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 791    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 792    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 793    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 794    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 795    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 796    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 797    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 798    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 799    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 800    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 801    ----------------------------------------------------------------------
 802
 803    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 804    of dimension 1, chars 94, and final character <F>, etc...
 805
 806    Note (*): Although these designations are not allowed in ISO2022,
 807    Emacs accepts them on decoding, and produces them on encoding
 808    CHARS96 character sets in a coding system which is characterized as
 809    7-bit environment, non-locking-shift, and non-single-shift.
 810
 811    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 812    '(' can be omitted.  We refer to this as "short-form" hereafter.
 813
 814    Now you may notice that there are a lot of ways for encoding the
 815    same multilingual text in ISO2022.  Actually, there exist many
 816    coding systems such as Compound Text (used in X11's inter client
 817    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 818    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 819    localized platforms), and all of these are variants of ISO2022.
 820
 821    In addition to the above, Emacs handles two more kinds of escape
 822    sequences: ISO6429's direction specification and Emacs' private
 823    sequence for specifying character composition.
 824
 825    ISO6429's direction specification takes the following form:
 826         o CSI ']'      -- end of the current direction
 827         o CSI '0' ']'  -- end of the current direction
 828         o CSI '1' ']'  -- start of left-to-right text
 829         o CSI '2' ']'  -- start of right-to-left text
 830    The control character CSI (0x9B: control sequence introducer) is
 831    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 832
 833    Character composition specification takes the following form:
 834         o ESC '0' -- start relative composition
 835         o ESC '1' -- end composition
 836         o ESC '2' -- start rule-base composition (*)
 837         o ESC '3' -- start relative composition with alternate chars  (**)
 838         o ESC '4' -- start rule-base composition with alternate chars  (**)
 839   Since these are not standard escape sequences of any ISO standard,
 840   the use of them for these meaning is restricted to Emacs only.
 841
 842   (*) This form is used only in Emacs 20.5 and the older versions,
 843   but the newer versions can safely decode it.
 844   (**) This form is used only in Emacs 21.1 and the newer versions,
 845   and the older versions can't decode it.
 846
 847   Here's a list of examples usages of these composition escape
 848   sequences (categorized by `enum composition_method').
 849
 850   COMPOSITION_RELATIVE:
 851         ESC 0 CHAR [ CHAR ] ESC 1
 852   COMPOSITOIN_WITH_RULE:
 853         ESC 2 CHAR [ RULE CHAR ] ESC 1
 854   COMPOSITION_WITH_ALTCHARS:
 855         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 856   COMPOSITION_WITH_RULE_ALTCHARS:
 857         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 858
 859 enum iso_code_class_type iso_code_class[256];
 860
 861 #define CHARSET_OK(idx, charset, c)                                     \
 862   (coding_system_table[idx]                                             \
 863    && (charset == CHARSET_ASCII                                         \
 864        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 865            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 866    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 867                                               charset)                  \
 868        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 869
 870 #define SHIFT_OUT_OK(idx) \
 871   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 872
 873 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 874    Check if a text is encoded in ISO2022.  If it is, returns an
 875    integer in which appropriate flag bits any of:
 876         CODING_CATEGORY_MASK_ISO_7
 877         CODING_CATEGORY_MASK_ISO_7_TIGHT
 878         CODING_CATEGORY_MASK_ISO_8_1
 879         CODING_CATEGORY_MASK_ISO_8_2
 880         CODING_CATEGORY_MASK_ISO_7_ELSE
 881         CODING_CATEGORY_MASK_ISO_8_ELSE
 882    are set.  If a code which should never appear in ISO2022 is found,
 883    returns 0.  */
 884
 885 int
 886 detect_coding_iso2022 (src, src_end)
 887      unsigned char *src, *src_end;
 888 {
 889   int mask = CODING_CATEGORY_MASK_ISO;
 890   int mask_found = 0;
 891   int reg[4], shift_out = 0, single_shifting = 0;
 892   int c, c1, i, charset;
 893   /* Dummy for ONE_MORE_BYTE.  */
 894   struct coding_system dummy_coding;
 895   struct coding_system *coding = &dummy_coding;
 896   Lisp_Object safe_chars;
 897
 898   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 899   while (mask && src < src_end)
 900     {
 901       ONE_MORE_BYTE (c);
 902       switch (c)
 903         {
 904         case ISO_CODE_ESC:
 905           if (inhibit_iso_escape_detection)
 906             break;
 907           single_shifting = 0;
 908           ONE_MORE_BYTE (c);
 909           if (c >= '(' && c <= '/')
 910             {
 911               /* Designation sequence for a charset of dimension 1.  */
 912               ONE_MORE_BYTE (c1);
 913               if (c1 < ' ' || c1 >= 0x80
 914                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 915                 /* Invalid designation sequence.  Just ignore.  */
 916                 break;
 917               reg[(c - '(') % 4] = charset;
 918             }
 919           else if (c == '$')
 920             {
 921               /* Designation sequence for a charset of dimension 2.  */
 922               ONE_MORE_BYTE (c);
 923               if (c >= '@' && c <= 'B')
 924                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 925                 reg[0] = charset = iso_charset_table[1][0][c];
 926               else if (c >= '(' && c <= '/')
 927                 {
 928                   ONE_MORE_BYTE (c1);
 929                   if (c1 < ' ' || c1 >= 0x80
 930                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 931                     /* Invalid designation sequence.  Just ignore.  */
 932                     break;
 933                   reg[(c - '(') % 4] = charset;
 934                 }
 935               else
 936                 /* Invalid designation sequence.  Just ignore.  */
 937                 break;
 938             }
 939           else if (c == 'N' || c == 'O')
 940             {
 941               /* ESC <Fe> for SS2 or SS3.  */
 942               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 943               break;
 944             }
 945           else if (c >= '0' && c <= '4')
 946             {
 947               /* ESC <Fp> for start/end composition.  */
 948               mask_found |= CODING_CATEGORY_MASK_ISO;
 949               break;
 950             }
 951           else
 952             /* Invalid escape sequence.  Just ignore.  */
 953             break;
 954
 955           /* We found a valid designation sequence for CHARSET.  */
 956           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 957           c = MAKE_CHAR (charset, 0, 0);
 958           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 959             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 960           else
 961             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 962           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 963             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 964           else
 965             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 966           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 967             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 968           else
 969             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 970           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 971             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 972           else
 973             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 974           break;
 975
 976         case ISO_CODE_SO:
 977           if (inhibit_iso_escape_detection)
 978             break;
 979           single_shifting = 0;
 980           if (shift_out == 0
 981               && (reg[1] >= 0
 982                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 983                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 984             {
 985               /* Locking shift out.  */
 986               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 987               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 988             }
 989           break;
 990
 991         case ISO_CODE_SI:
 992           if (inhibit_iso_escape_detection)
 993             break;
 994           single_shifting = 0;
 995           if (shift_out == 1)
 996             {
 997               /* Locking shift in.  */
 998               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 999               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1000             }
1001           break;
1002
1003         case ISO_CODE_CSI:
1004           single_shifting = 0;
1005         case ISO_CODE_SS2:
1006         case ISO_CODE_SS3:
1007           {
1008             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1009
1010             if (inhibit_iso_escape_detection)
1011               break;
1012             if (c != ISO_CODE_CSI)
1013               {
1014                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1015                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1016                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1017                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1018                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1019                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1020                 single_shifting = 1;
1021               }
1022             if (VECTORP (Vlatin_extra_code_table)
1023                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1024               {
1025                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1026                     & CODING_FLAG_ISO_LATIN_EXTRA)
1027                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1028                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1029                     & CODING_FLAG_ISO_LATIN_EXTRA)
1030                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1031               }
1032             mask &= newmask;
1033             mask_found |= newmask;
1034           }
1035           break;
1036
1037         default:
1038           if (c < 0x80)
1039             {
1040               single_shifting = 0;
1041               break;
1042             }
1043           else if (c < 0xA0)
1044             {
1045               single_shifting = 0;
1046               if (VECTORP (Vlatin_extra_code_table)
1047                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1048                 {
1049                   int newmask = 0;
1050
1051                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1052                       & CODING_FLAG_ISO_LATIN_EXTRA)
1053                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1054                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1055                       & CODING_FLAG_ISO_LATIN_EXTRA)
1056                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1057                   mask &= newmask;
1058                   mask_found |= newmask;
1059                 }
1060               else
1061                 return 0;
1062             }
1063           else
1064             {
1065               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1066                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1067               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1068               /* Check the length of succeeding codes of the range
1069                  0xA0..0FF.  If the byte length is odd, we exclude
1070                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1071                  when we are not single shifting.  */
1072               if (!single_shifting
1073                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1074                 {
1075                   int i = 1;
1076                   while (src < src_end)
1077                     {
1078                       ONE_MORE_BYTE (c);
1079                       if (c < 0xA0)
1080                         break;
1081                       i++;
1082                     }
1083
1084                   if (i & 1 && src < src_end)
1085                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1086                   else
1087                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1088                 }
1089             }
1090           break;
1091         }
1092     }
1093  label_end_of_loop:
1094   return (mask & mask_found);
1095 }
1096
1097 /* Decode a character of which charset is CHARSET, the 1st position
1098    code is C1, the 2nd position code is C2, and return the decoded
1099    character code.  If the variable `translation_table' is non-nil,
1100    returned the translated code.  */
1101
1102 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1103   (NILP (translation_table)                     \
1104    ? MAKE_CHAR (charset, c1, c2)                \
1105    : translate_char (translation_table, -1, charset, c1, c2))
1106
1107 /* Set designation state into CODING.  */
1108 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1109   do {                                                                     \
1110     int charset, c;                                                        \
1111                                                                            \
1112     if (final_char < '0' || final_char >= 128)                             \
1113       goto label_invalid_code;                                             \
1114     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1115                                  make_number (chars),                      \
1116                                  make_number (final_char));                \
1117     c = MAKE_CHAR (charset, 0, 0);                                         \
1118     if (charset >= 0                                                       \
1119         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1120             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1121       {                                                                    \
1122         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1123             && reg == 0                                                    \
1124             && charset == CHARSET_ASCII)                                   \
1125           {                                                                \
1126             /* We should insert this designation sequence as is so         \
1127                that it is surely written back to a file.  */               \
1128             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1129             goto label_invalid_code;                                       \
1130           }                                                                \
1131         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1132         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1133             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1134           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1135         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1136       }                                                                    \
1137     else                                                                   \
1138       {                                                                    \
1139         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1140         goto label_invalid_code;                                           \
1141       }                                                                    \
1142   } while (0)
1143
1144 /* Allocate a memory block for storing information about compositions.
1145    The block is chained to the already allocated blocks.  */
1146
1147 void
1148 coding_allocate_composition_data (coding, char_offset)
1149      struct coding_system *coding;
1150      int char_offset;
1151 {
1152   struct composition_data *cmp_data
1153     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1154
1155   cmp_data->char_offset = char_offset;
1156   cmp_data->used = 0;
1157   cmp_data->prev = coding->cmp_data;
1158   cmp_data->next = NULL;
1159   if (coding->cmp_data)
1160     coding->cmp_data->next = cmp_data;
1161   coding->cmp_data = cmp_data;
1162   coding->cmp_data_start = 0;
1163 }
1164
1165 /* Record the starting position START and METHOD of one composition.  */
1166
1167 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1168   do {                                                          \
1169     struct composition_data *cmp_data = coding->cmp_data;       \
1170     int *data = cmp_data->data + cmp_data->used;                \
1171     coding->cmp_data_start = cmp_data->used;                    \
1172     data[0] = -1;                                               \
1173     data[1] = cmp_data->char_offset + start;                    \
1174     data[3] = (int) method;                                     \
1175     cmp_data->used += 4;                                        \
1176   } while (0)
1177
1178 /* Record the ending position END of the current composition.  */
1179
1180 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1181   do {                                                          \
1182     struct composition_data *cmp_data = coding->cmp_data;       \
1183     int *data = cmp_data->data + coding->cmp_data_start;        \
1184     data[0] = cmp_data->used - coding->cmp_data_start;          \
1185     data[2] = cmp_data->char_offset + end;                      \
1186   } while (0)
1187
1188 /* Record one COMPONENT (alternate character or composition rule).  */
1189
1190 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1191   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1192
1193 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1194
1195 #define DECODE_COMPOSITION_START(c1)                                       \
1196   do {                                                                     \
1197     if (coding->composing == COMPOSITION_DISABLED)                         \
1198       {                                                                    \
1199         *dst++ = ISO_CODE_ESC;                                             \
1200         *dst++ = c1 & 0x7f;                                                \
1201         coding->produced_char += 2;                                        \
1202       }                                                                    \
1203     else if (!COMPOSING_P (coding))                                        \
1204       {                                                                    \
1205         /* This is surely the start of a composition.  We must be sure     \
1206            that coding->cmp_data has enough space to store the             \
1207            information about the composition.  If not, terminate the       \
1208            current decoding loop, allocate one more memory block for       \
1209            coding->cmp_data in the calller, then start the decoding        \
1210            loop again.  We can't allocate memory here directly because     \
1211            it may cause buffer/string relocation.  */                      \
1212         if (!coding->cmp_data                                              \
1213             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1214                 >= COMPOSITION_DATA_SIZE))                                 \
1215           {                                                                \
1216             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1217             goto label_end_of_loop;                                        \
1218           }                                                                \
1219         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1220                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1221                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1222                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1223         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1224                                       coding->composing);                  \
1225         coding->composition_rule_follows = 0;                              \
1226       }                                                                    \
1227     else                                                                   \
1228       {                                                                    \
1229         /* We are already handling a composition.  If the method is        \
1230            the following two, the codes following the current escape       \
1231            sequence are actual characters stored in a buffer.  */          \
1232         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1233             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1234           {                                                                \
1235             coding->composing = COMPOSITION_RELATIVE;                      \
1236             coding->composition_rule_follows = 0;                          \
1237           }                                                                \
1238       }                                                                    \
1239   } while (0)
1240
1241 /* Handle compositoin end sequence ESC 1.  */
1242
1243 #define DECODE_COMPOSITION_END(c1)                                      \
1244   do {                                                                  \
1245     if (coding->composing == COMPOSITION_DISABLED)                      \
1246       {                                                                 \
1247         *dst++ = ISO_CODE_ESC;                                          \
1248         *dst++ = c1;                                                    \
1249         coding->produced_char += 2;                                     \
1250       }                                                                 \
1251     else                                                                \
1252       {                                                                 \
1253         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1254         coding->composing = COMPOSITION_NO;                             \
1255       }                                                                 \
1256   } while (0)
1257
1258 /* Decode a composition rule from the byte C1 (and maybe one more byte
1259    from SRC) and store one encoded composition rule in
1260    coding->cmp_data.  */
1261
1262 #define DECODE_COMPOSITION_RULE(c1)                                     \
1263   do {                                                                  \
1264     int rule = 0;                                                       \
1265     (c1) -= 32;                                                         \
1266     if (c1 < 81)                /* old format (before ver.21) */        \
1267       {                                                                 \
1268         int gref = (c1) / 9;                                            \
1269         int nref = (c1) % 9;                                            \
1270         if (gref == 4) gref = 10;                                       \
1271         if (nref == 4) nref = 10;                                       \
1272         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1273       }                                                                 \
1274     else if (c1 < 93)           /* new format (after ver.21) */         \
1275       {                                                                 \
1276         ONE_MORE_BYTE (c2);                                             \
1277         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1278       }                                                                 \
1279     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1280     coding->composition_rule_follows = 0;                               \
1281   } while (0)
1282
1283
1284 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1285
1286 static void
1287 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1288      struct coding_system *coding;
1289      unsigned char *source, *destination;
1290      int src_bytes, dst_bytes;
1291 {
1292   unsigned char *src = source;
1293   unsigned char *src_end = source + src_bytes;
1294   unsigned char *dst = destination;
1295   unsigned char *dst_end = destination + dst_bytes;
1296   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1297   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1298   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1299   /* SRC_BASE remembers the start position in source in each loop.
1300      The loop will be exited when there's not enough source code
1301      (within macro ONE_MORE_BYTE), or when there's not enough
1302      destination area to produce a character (within macro
1303      EMIT_CHAR).  */
1304   unsigned char *src_base;
1305   int c, charset;
1306   Lisp_Object translation_table;
1307   Lisp_Object safe_chars;
1308
1309   safe_chars = coding_safe_chars (coding);
1310
1311   if (NILP (Venable_character_translation))
1312     translation_table = Qnil;
1313   else
1314     {
1315       translation_table = coding->translation_table_for_decode;
1316       if (NILP (translation_table))
1317         translation_table = Vstandard_translation_table_for_decode;
1318     }
1319
1320   coding->result = CODING_FINISH_NORMAL;
1321
1322   while (1)
1323     {
1324       int c1, c2;
1325
1326       src_base = src;
1327       ONE_MORE_BYTE (c1);
1328
1329       /* We produce no character or one character.  */
1330       switch (iso_code_class [c1])
1331         {
1332         case ISO_0x20_or_0x7F:
1333           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1334             {
1335               DECODE_COMPOSITION_RULE (c1);
1336               continue;
1337             }
1338           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1339             {
1340               /* This is SPACE or DEL.  */
1341               charset = CHARSET_ASCII;
1342               break;
1343             }
1344           /* This is a graphic character, we fall down ...  */
1345
1346         case ISO_graphic_plane_0:
1347           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1348             {
1349               DECODE_COMPOSITION_RULE (c1);
1350               continue;
1351             }
1352           charset = charset0;
1353           break;
1354
1355         case ISO_0xA0_or_0xFF:
1356           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1357               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1358             goto label_invalid_code;
1359           /* This is a graphic character, we fall down ... */
1360
1361         case ISO_graphic_plane_1:
1362           if (charset1 < 0)
1363             goto label_invalid_code;
1364           charset = charset1;
1365           break;
1366
1367         case ISO_control_0:
1368           if (COMPOSING_P (coding))
1369             DECODE_COMPOSITION_END ('1');
1370
1371           /* All ISO2022 control characters in this class have the
1372              same representation in Emacs internal format.  */
1373           if (c1 == '\n'
1374               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1375               && (coding->eol_type == CODING_EOL_CR
1376                   || coding->eol_type == CODING_EOL_CRLF))
1377             {
1378               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1379               goto label_end_of_loop;
1380             }
1381           charset = CHARSET_ASCII;
1382           break;
1383
1384         case ISO_control_1:
1385           if (COMPOSING_P (coding))
1386             DECODE_COMPOSITION_END ('1');
1387           goto label_invalid_code;
1388
1389         case ISO_carriage_return:
1390           if (COMPOSING_P (coding))
1391             DECODE_COMPOSITION_END ('1');
1392
1393           if (coding->eol_type == CODING_EOL_CR)
1394             c1 = '\n';
1395           else if (coding->eol_type == CODING_EOL_CRLF)
1396             {
1397               ONE_MORE_BYTE (c1);
1398               if (c1 != ISO_CODE_LF)
1399                 {
1400                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1401                     {
1402                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1403                       goto label_end_of_loop;
1404                     }
1405                   src--;
1406                   c1 = '\r';
1407                 }
1408             }
1409           charset = CHARSET_ASCII;
1410           break;
1411
1412         case ISO_shift_out:
1413           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1414               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1415             goto label_invalid_code;
1416           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1417           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1418           continue;
1419
1420         case ISO_shift_in:
1421           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1422             goto label_invalid_code;
1423           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1424           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1425           continue;
1426
1427         case ISO_single_shift_2_7:
1428         case ISO_single_shift_2:
1429           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1430             goto label_invalid_code;
1431           /* SS2 is handled as an escape sequence of ESC 'N' */
1432           c1 = 'N';
1433           goto label_escape_sequence;
1434
1435         case ISO_single_shift_3:
1436           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1437             goto label_invalid_code;
1438           /* SS2 is handled as an escape sequence of ESC 'O' */
1439           c1 = 'O';
1440           goto label_escape_sequence;
1441
1442         case ISO_control_sequence_introducer:
1443           /* CSI is handled as an escape sequence of ESC '[' ...  */
1444           c1 = '[';
1445           goto label_escape_sequence;
1446
1447         case ISO_escape:
1448           ONE_MORE_BYTE (c1);
1449         label_escape_sequence:
1450           /* Escape sequences handled by Emacs are invocation,
1451              designation, direction specification, and character
1452              composition specification.  */
1453           switch (c1)
1454             {
1455             case '&':           /* revision of following character set */
1456               ONE_MORE_BYTE (c1);
1457               if (!(c1 >= '@' && c1 <= '~'))
1458                 goto label_invalid_code;
1459               ONE_MORE_BYTE (c1);
1460               if (c1 != ISO_CODE_ESC)
1461                 goto label_invalid_code;
1462               ONE_MORE_BYTE (c1);
1463               goto label_escape_sequence;
1464
1465             case '$':           /* designation of 2-byte character set */
1466               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1467                 goto label_invalid_code;
1468               ONE_MORE_BYTE (c1);
1469               if (c1 >= '@' && c1 <= 'B')
1470                 {       /* designation of JISX0208.1978, GB2312.1980,
1471                            or JISX0208.1980 */
1472                   DECODE_DESIGNATION (0, 2, 94, c1);
1473                 }
1474               else if (c1 >= 0x28 && c1 <= 0x2B)
1475                 {       /* designation of DIMENSION2_CHARS94 character set */
1476                   ONE_MORE_BYTE (c2);
1477                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1478                 }
1479               else if (c1 >= 0x2C && c1 <= 0x2F)
1480                 {       /* designation of DIMENSION2_CHARS96 character set */
1481                   ONE_MORE_BYTE (c2);
1482                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1483                 }
1484               else
1485                 goto label_invalid_code;
1486               /* We must update these variables now.  */
1487               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1488               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1489               continue;
1490
1491             case 'n':           /* invocation of locking-shift-2 */
1492               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1493                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1494                 goto label_invalid_code;
1495               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1496               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1497               continue;
1498
1499             case 'o':           /* invocation of locking-shift-3 */
1500               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1501                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1502                 goto label_invalid_code;
1503               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1504               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1505               continue;
1506
1507             case 'N':           /* invocation of single-shift-2 */
1508               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1509                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1510                 goto label_invalid_code;
1511               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1512               ONE_MORE_BYTE (c1);
1513               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1514                 goto label_invalid_code;
1515               break;
1516
1517             case 'O':           /* invocation of single-shift-3 */
1518               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1519                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1520                 goto label_invalid_code;
1521               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1522               ONE_MORE_BYTE (c1);
1523               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1524                 goto label_invalid_code;
1525               break;
1526
1527             case '0': case '2': case '3': case '4': /* start composition */
1528               DECODE_COMPOSITION_START (c1);
1529               continue;
1530
1531             case '1':           /* end composition */
1532               DECODE_COMPOSITION_END (c1);
1533               continue;
1534
1535             case '[':           /* specification of direction */
1536               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1537                 goto label_invalid_code;
1538               /* For the moment, nested direction is not supported.
1539                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1540                  left-to-right, and nozero means right-to-left.  */
1541               ONE_MORE_BYTE (c1);
1542               switch (c1)
1543                 {
1544                 case ']':       /* end of the current direction */
1545                   coding->mode &= ~CODING_MODE_DIRECTION;
1546
1547                 case '0':       /* end of the current direction */
1548                 case '1':       /* start of left-to-right direction */
1549                   ONE_MORE_BYTE (c1);
1550                   if (c1 == ']')
1551                     coding->mode &= ~CODING_MODE_DIRECTION;
1552                   else
1553                     goto label_invalid_code;
1554                   break;
1555
1556                 case '2':       /* start of right-to-left direction */
1557                   ONE_MORE_BYTE (c1);
1558                   if (c1 == ']')
1559                     coding->mode |= CODING_MODE_DIRECTION;
1560                   else
1561                     goto label_invalid_code;
1562                   break;
1563
1564                 default:
1565                   goto label_invalid_code;
1566                 }
1567               continue;
1568
1569             default:
1570               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1571                 goto label_invalid_code;
1572               if (c1 >= 0x28 && c1 <= 0x2B)
1573                 {       /* designation of DIMENSION1_CHARS94 character set */
1574                   ONE_MORE_BYTE (c2);
1575                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1576                 }
1577               else if (c1 >= 0x2C && c1 <= 0x2F)
1578                 {       /* designation of DIMENSION1_CHARS96 character set */
1579                   ONE_MORE_BYTE (c2);
1580                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1581                 }
1582               else
1583                 goto label_invalid_code;
1584               /* We must update these variables now.  */
1585               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1586               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1587               continue;
1588             }
1589         }
1590
1591       /* Now we know CHARSET and 1st position code C1 of a character.
1592          Produce a multibyte sequence for that character while getting
1593          2nd position code C2 if necessary.  */
1594       if (CHARSET_DIMENSION (charset) == 2)
1595         {
1596           ONE_MORE_BYTE (c2);
1597           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1598             /* C2 is not in a valid range.  */
1599             goto label_invalid_code;
1600         }
1601       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1602       EMIT_CHAR (c);
1603       continue;
1604
1605     label_invalid_code:
1606       coding->errors++;
1607       if (COMPOSING_P (coding))
1608         DECODE_COMPOSITION_END ('1');
1609       src = src_base;
1610       c = *src++;
1611       EMIT_CHAR (c);
1612     }
1613
1614  label_end_of_loop:
1615   coding->consumed = coding->consumed_char = src_base - source;
1616   coding->produced = dst - destination;
1617   return;
1618 }
1619
1620
1621 /* ISO2022 encoding stuff.  */
1622
1623 /*
1624    It is not enough to say just "ISO2022" on encoding, we have to
1625    specify more details.  In Emacs, each coding system of ISO2022
1626    variant has the following specifications:
1627         1. Initial designation to G0 thru G3.
1628         2. Allows short-form designation?
1629         3. ASCII should be designated to G0 before control characters?
1630         4. ASCII should be designated to G0 at end of line?
1631         5. 7-bit environment or 8-bit environment?
1632         6. Use locking-shift?
1633         7. Use Single-shift?
1634    And the following two are only for Japanese:
1635         8. Use ASCII in place of JIS0201-1976-Roman?
1636         9. Use JISX0208-1983 in place of JISX0208-1978?
1637    These specifications are encoded in `coding->flags' as flag bits
1638    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1639    details.
1640 */
1641
1642 /* Produce codes (escape sequence) for designating CHARSET to graphic
1643    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1644    '@', 'A', or 'B' and the coding system CODING allows, produce
1645    designation sequence of short-form.  */
1646
1647 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1648   do {                                                                  \
1649     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1650     char *intermediate_char_94 = "()*+";                                \
1651     char *intermediate_char_96 = ",-./";                                \
1652     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1653                                                                         \
1654     if (revision < 255)                                                 \
1655       {                                                                 \
1656         *dst++ = ISO_CODE_ESC;                                          \
1657         *dst++ = '&';                                                   \
1658         *dst++ = '@' + revision;                                        \
1659       }                                                                 \
1660     *dst++ = ISO_CODE_ESC;                                              \
1661     if (CHARSET_DIMENSION (charset) == 1)                               \
1662       {                                                                 \
1663         if (CHARSET_CHARS (charset) == 94)                              \
1664           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1665         else                                                            \
1666           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1667       }                                                                 \
1668     else                                                                \
1669       {                                                                 \
1670         *dst++ = '$';                                                   \
1671         if (CHARSET_CHARS (charset) == 94)                              \
1672           {                                                             \
1673             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1674                 || reg != 0                                             \
1675                 || final_char < '@' || final_char > 'B')                \
1676               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1677           }                                                             \
1678         else                                                            \
1679           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1680       }                                                                 \
1681     *dst++ = final_char;                                                \
1682     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1683   } while (0)
1684
1685 /* The following two macros produce codes (control character or escape
1686    sequence) for ISO2022 single-shift functions (single-shift-2 and
1687    single-shift-3).  */
1688
1689 #define ENCODE_SINGLE_SHIFT_2                           \
1690   do {                                                  \
1691     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1692       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1693     else                                                \
1694       *dst++ = ISO_CODE_SS2;                            \
1695     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1696   } while (0)
1697
1698 #define ENCODE_SINGLE_SHIFT_3                           \
1699   do {                                                  \
1700     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1701       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1702     else                                                \
1703       *dst++ = ISO_CODE_SS3;                            \
1704     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1705   } while (0)
1706
1707 /* The following four macros produce codes (control character or
1708    escape sequence) for ISO2022 locking-shift functions (shift-in,
1709    shift-out, locking-shift-2, and locking-shift-3).  */
1710
1711 #define ENCODE_SHIFT_IN                         \
1712   do {                                          \
1713     *dst++ = ISO_CODE_SI;                       \
1714     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1715   } while (0)
1716
1717 #define ENCODE_SHIFT_OUT                        \
1718   do {                                          \
1719     *dst++ = ISO_CODE_SO;                       \
1720     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1721   } while (0)
1722
1723 #define ENCODE_LOCKING_SHIFT_2                  \
1724   do {                                          \
1725     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1726     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1727   } while (0)
1728
1729 #define ENCODE_LOCKING_SHIFT_3                  \
1730   do {                                          \
1731     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1732     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1733   } while (0)
1734
1735 /* Produce codes for a DIMENSION1 character whose character set is
1736    CHARSET and whose position-code is C1.  Designation and invocation
1737    sequences are also produced in advance if necessary.  */
1738
1739 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1740   do {                                                                  \
1741     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1742       {                                                                 \
1743         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1744           *dst++ = c1 & 0x7F;                                           \
1745         else                                                            \
1746           *dst++ = c1 | 0x80;                                           \
1747         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1748         break;                                                          \
1749       }                                                                 \
1750     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1751       {                                                                 \
1752         *dst++ = c1 & 0x7F;                                             \
1753         break;                                                          \
1754       }                                                                 \
1755     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1756       {                                                                 \
1757         *dst++ = c1 | 0x80;                                             \
1758         break;                                                          \
1759       }                                                                 \
1760     else                                                                \
1761       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1762          must invoke it, or, at first, designate it to some graphic     \
1763          register.  Then repeat the loop to actually produce the        \
1764          character.  */                                                 \
1765       dst = encode_invocation_designation (charset, coding, dst);       \
1766   } while (1)
1767
1768 /* Produce codes for a DIMENSION2 character whose character set is
1769    CHARSET and whose position-codes are C1 and C2.  Designation and
1770    invocation codes are also produced in advance if necessary.  */
1771
1772 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1773   do {                                                                  \
1774     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1775       {                                                                 \
1776         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1777           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1778         else                                                            \
1779           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1780         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1781         break;                                                          \
1782       }                                                                 \
1783     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1784       {                                                                 \
1785         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1786         break;                                                          \
1787       }                                                                 \
1788     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1789       {                                                                 \
1790         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1791         break;                                                          \
1792       }                                                                 \
1793     else                                                                \
1794       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1795          must invoke it, or, at first, designate it to some graphic     \
1796          register.  Then repeat the loop to actually produce the        \
1797          character.  */                                                 \
1798       dst = encode_invocation_designation (charset, coding, dst);       \
1799   } while (1)
1800
1801 #define ENCODE_ISO_CHARACTER(c)                                 \
1802   do {                                                          \
1803     int charset, c1, c2;                                        \
1804                                                                 \
1805     SPLIT_CHAR (c, charset, c1, c2);                            \
1806     if (CHARSET_DEFINED_P (charset))                            \
1807       {                                                         \
1808         if (CHARSET_DIMENSION (charset) == 1)                   \
1809           {                                                     \
1810             if (charset == CHARSET_ASCII                        \
1811                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1812               charset = charset_latin_jisx0201;                 \
1813             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1814           }                                                     \
1815         else                                                    \
1816           {                                                     \
1817             if (charset == charset_jisx0208                     \
1818                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1819               charset = charset_jisx0208_1978;                  \
1820             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1821           }                                                     \
1822       }                                                         \
1823     else                                                        \
1824       {                                                         \
1825         *dst++ = c1;                                            \
1826         if (c2 >= 0)                                            \
1827           *dst++ = c2;                                          \
1828       }                                                         \
1829   } while (0)
1830
1831
1832 /* Instead of encoding character C, produce one or two `?'s.  */
1833
1834 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1835   do {                                                                  \
1836     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1837     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1838       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1839   } while (0)
1840
1841
1842 /* Produce designation and invocation codes at a place pointed by DST
1843    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1844    Return new DST.  */
1845
1846 unsigned char *
1847 encode_invocation_designation (charset, coding, dst)
1848      int charset;
1849      struct coding_system *coding;
1850      unsigned char *dst;
1851 {
1852   int reg;                      /* graphic register number */
1853
1854   /* At first, check designations.  */
1855   for (reg = 0; reg < 4; reg++)
1856     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1857       break;
1858
1859   if (reg >= 4)
1860     {
1861       /* CHARSET is not yet designated to any graphic registers.  */
1862       /* At first check the requested designation.  */
1863       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1864       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1865         /* Since CHARSET requests no special designation, designate it
1866            to graphic register 0.  */
1867         reg = 0;
1868
1869       ENCODE_DESIGNATION (charset, reg, coding);
1870     }
1871
1872   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1873       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1874     {
1875       /* Since the graphic register REG is not invoked to any graphic
1876          planes, invoke it to graphic plane 0.  */
1877       switch (reg)
1878         {
1879         case 0:                 /* graphic register 0 */
1880           ENCODE_SHIFT_IN;
1881           break;
1882
1883         case 1:                 /* graphic register 1 */
1884           ENCODE_SHIFT_OUT;
1885           break;
1886
1887         case 2:                 /* graphic register 2 */
1888           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1889             ENCODE_SINGLE_SHIFT_2;
1890           else
1891             ENCODE_LOCKING_SHIFT_2;
1892           break;
1893
1894         case 3:                 /* graphic register 3 */
1895           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1896             ENCODE_SINGLE_SHIFT_3;
1897           else
1898             ENCODE_LOCKING_SHIFT_3;
1899           break;
1900         }
1901     }
1902
1903   return dst;
1904 }
1905
1906 /* Produce 2-byte codes for encoded composition rule RULE.  */
1907
1908 #define ENCODE_COMPOSITION_RULE(rule)           \
1909   do {                                          \
1910     int gref, nref;                             \
1911     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1912     *dst++ = 32 + 81 + gref;                    \
1913     *dst++ = 32 + nref;                         \
1914   } while (0)
1915
1916 /* Produce codes for indicating the start of a composition sequence
1917    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1918    which specify information about the composition.  See the comment
1919    in coding.h for the format of DATA.  */
1920
1921 #define ENCODE_COMPOSITION_START(coding, data)                          \
1922   do {                                                                  \
1923     coding->composing = data[3];                                        \
1924     *dst++ = ISO_CODE_ESC;                                              \
1925     if (coding->composing == COMPOSITION_RELATIVE)                      \
1926       *dst++ = '0';                                                     \
1927     else                                                                \
1928       {                                                                 \
1929         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1930                   ? '3' : '4');                                         \
1931         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1932         coding->composition_rule_follows = 0;                           \
1933       }                                                                 \
1934   } while (0)
1935
1936 /* Produce codes for indicating the end of the current composition.  */
1937
1938 #define ENCODE_COMPOSITION_END(coding, data)                    \
1939   do {                                                          \
1940     *dst++ = ISO_CODE_ESC;                                      \
1941     *dst++ = '1';                                               \
1942     coding->cmp_data_start += data[0];                          \
1943     coding->composing = COMPOSITION_NO;                         \
1944     if (coding->cmp_data_start == coding->cmp_data->used        \
1945         && coding->cmp_data->next)                              \
1946       {                                                         \
1947         coding->cmp_data = coding->cmp_data->next;              \
1948         coding->cmp_data_start = 0;                             \
1949       }                                                         \
1950   } while (0)
1951
1952 /* Produce composition start sequence ESC 0.  Here, this sequence
1953    doesn't mean the start of a new composition but means that we have
1954    just produced components (alternate chars and composition rules) of
1955    the composition and the actual text follows in SRC.  */
1956
1957 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1958   do {                                          \
1959     *dst++ = ISO_CODE_ESC;                      \
1960     *dst++ = '0';                               \
1961     coding->composing = COMPOSITION_RELATIVE;   \
1962   } while (0)
1963
1964 /* The following three macros produce codes for indicating direction
1965    of text.  */
1966 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1967   do {                                                  \
1968     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1969       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1970     else                                                \
1971       *dst++ = ISO_CODE_CSI;                            \
1972   } while (0)
1973
1974 #define ENCODE_DIRECTION_R2L    \
1975   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1976
1977 #define ENCODE_DIRECTION_L2R    \
1978   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1979
1980 /* Produce codes for designation and invocation to reset the graphic
1981    planes and registers to initial state.  */
1982 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1983   do {                                                                      \
1984     int reg;                                                                \
1985     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1986       ENCODE_SHIFT_IN;                                                      \
1987     for (reg = 0; reg < 4; reg++)                                           \
1988       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1989           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1990               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1991         ENCODE_DESIGNATION                                                  \
1992           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1993   } while (0)
1994
1995 /* Produce designation sequences of charsets in the line started from
1996    SRC to a place pointed by DST, and return updated DST.
1997
1998    If the current block ends before any end-of-line, we may fail to
1999    find all the necessary designations.  */
2000
2001 static unsigned char *
2002 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2003      struct coding_system *coding;
2004      Lisp_Object translation_table;
2005      unsigned char *src, *src_end, *dst;
2006 {
2007   int charset, c, found = 0, reg;
2008   /* Table of charsets to be designated to each graphic register.  */
2009   int r[4];
2010
2011   for (reg = 0; reg < 4; reg++)
2012     r[reg] = -1;
2013
2014   while (found < 4)
2015     {
2016       ONE_MORE_CHAR (c);
2017       if (c == '\n')
2018         break;
2019
2020       charset = CHAR_CHARSET (c);
2021       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2022       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2023         {
2024           found++;
2025           r[reg] = charset;
2026         }
2027     }
2028
2029  label_end_of_loop:
2030   if (found)
2031     {
2032       for (reg = 0; reg < 4; reg++)
2033         if (r[reg] >= 0
2034             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2035           ENCODE_DESIGNATION (r[reg], reg, coding);
2036     }
2037
2038   return dst;
2039 }
2040
2041 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2042
2043 static void
2044 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2045      struct coding_system *coding;
2046      unsigned char *source, *destination;
2047      int src_bytes, dst_bytes;
2048 {
2049   unsigned char *src = source;
2050   unsigned char *src_end = source + src_bytes;
2051   unsigned char *dst = destination;
2052   unsigned char *dst_end = destination + dst_bytes;
2053   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2054      from DST_END to assure overflow checking is necessary only at the
2055      head of loop.  */
2056   unsigned char *adjusted_dst_end = dst_end - 19;
2057   /* SRC_BASE remembers the start position in source in each loop.
2058      The loop will be exited when there's not enough source text to
2059      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2060      there's not enough destination area to produce encoded codes
2061      (within macro EMIT_BYTES).  */
2062   unsigned char *src_base;
2063   int c;
2064   Lisp_Object translation_table;
2065   Lisp_Object safe_chars;
2066
2067   safe_chars = coding_safe_chars (coding);
2068
2069   if (NILP (Venable_character_translation))
2070     translation_table = Qnil;
2071   else
2072     {
2073       translation_table = coding->translation_table_for_encode;
2074       if (NILP (translation_table))
2075         translation_table = Vstandard_translation_table_for_encode;
2076     }
2077
2078   coding->consumed_char = 0;
2079   coding->errors = 0;
2080   while (1)
2081     {
2082       src_base = src;
2083
2084       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2085         {
2086           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2087           break;
2088         }
2089
2090       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2091           && CODING_SPEC_ISO_BOL (coding))
2092         {
2093           /* We have to produce designation sequences if any now.  */
2094           dst = encode_designation_at_bol (coding, translation_table,
2095                                            src, src_end, dst);
2096           CODING_SPEC_ISO_BOL (coding) = 0;
2097         }
2098
2099       /* Check composition start and end.  */
2100       if (coding->composing != COMPOSITION_DISABLED
2101           && coding->cmp_data_start < coding->cmp_data->used)
2102         {
2103           struct composition_data *cmp_data = coding->cmp_data;
2104           int *data = cmp_data->data + coding->cmp_data_start;
2105           int this_pos = cmp_data->char_offset + coding->consumed_char;
2106
2107           if (coding->composing == COMPOSITION_RELATIVE)
2108             {
2109               if (this_pos == data[2])
2110                 {
2111                   ENCODE_COMPOSITION_END (coding, data);
2112                   cmp_data = coding->cmp_data;
2113                   data = cmp_data->data + coding->cmp_data_start;
2114                 }
2115             }
2116           else if (COMPOSING_P (coding))
2117             {
2118               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2119               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2120                 /* We have consumed components of the composition.
2121                    What follows in SRC is the compositions's base
2122                    text.  */
2123                 ENCODE_COMPOSITION_FAKE_START (coding);
2124               else
2125                 {
2126                   int c = cmp_data->data[coding->cmp_data_index++];
2127                   if (coding->composition_rule_follows)
2128                     {
2129                       ENCODE_COMPOSITION_RULE (c);
2130                       coding->composition_rule_follows = 0;
2131                     }
2132                   else
2133                     {
2134                       if (coding->flags & CODING_FLAG_ISO_SAFE
2135                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2136                         ENCODE_UNSAFE_CHARACTER (c);
2137                       else
2138                         ENCODE_ISO_CHARACTER (c);
2139                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2140                         coding->composition_rule_follows = 1;
2141                     }
2142                   continue;
2143                 }
2144             }
2145           if (!COMPOSING_P (coding))
2146             {
2147               if (this_pos == data[1])
2148                 {
2149                   ENCODE_COMPOSITION_START (coding, data);
2150                   continue;
2151                 }
2152             }
2153         }
2154
2155       ONE_MORE_CHAR (c);
2156
2157       /* Now encode the character C.  */
2158       if (c < 0x20 || c == 0x7F)
2159         {
2160           if (c == '\r')
2161             {
2162               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2163                 {
2164                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2165                     ENCODE_RESET_PLANE_AND_REGISTER;
2166                   *dst++ = c;
2167                   continue;
2168                 }
2169               /* fall down to treat '\r' as '\n' ...  */
2170               c = '\n';
2171             }
2172           if (c == '\n')
2173             {
2174               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2175                 ENCODE_RESET_PLANE_AND_REGISTER;
2176               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2177                 bcopy (coding->spec.iso2022.initial_designation,
2178                        coding->spec.iso2022.current_designation,
2179                        sizeof coding->spec.iso2022.initial_designation);
2180               if (coding->eol_type == CODING_EOL_LF
2181                   || coding->eol_type == CODING_EOL_UNDECIDED)
2182                 *dst++ = ISO_CODE_LF;
2183               else if (coding->eol_type == CODING_EOL_CRLF)
2184                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2185               else
2186                 *dst++ = ISO_CODE_CR;
2187               CODING_SPEC_ISO_BOL (coding) = 1;
2188             }
2189           else
2190             {
2191               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2192                 ENCODE_RESET_PLANE_AND_REGISTER;
2193               *dst++ = c;
2194             }
2195         }
2196       else if (ASCII_BYTE_P (c))
2197         ENCODE_ISO_CHARACTER (c);
2198       else if (SINGLE_BYTE_CHAR_P (c))
2199         {
2200           *dst++ = c;
2201           coding->errors++;
2202         }
2203       else if (coding->flags & CODING_FLAG_ISO_SAFE
2204                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2205         ENCODE_UNSAFE_CHARACTER (c);
2206       else
2207         ENCODE_ISO_CHARACTER (c);
2208
2209       coding->consumed_char++;
2210     }
2211
2212  label_end_of_loop:
2213   coding->consumed = src_base - source;
2214   coding->produced = coding->produced_char = dst - destination;
2215 }
2216
2217 \f
2218 /*** 4. SJIS and BIG5 handlers ***/
2219
2220 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2221    quite widely.  So, for the moment, Emacs supports them in the bare
2222    C code.  But, in the future, they may be supported only by CCL.  */
2223
2224 /* SJIS is a coding system encoding three character sets: ASCII, right
2225    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2226    as is.  A character of charset katakana-jisx0201 is encoded by
2227    "position-code + 0x80".  A character of charset japanese-jisx0208
2228    is encoded in 2-byte but two position-codes are divided and shifted
2229    so that it fit in the range below.
2230
2231    --- CODE RANGE of SJIS ---
2232    (character set)      (range)
2233    ASCII                0x00 .. 0x7F
2234    KATAKANA-JISX0201    0xA0 .. 0xDF
2235    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2236             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2237    -------------------------------
2238
2239 */
2240
2241 /* BIG5 is a coding system encoding two character sets: ASCII and
2242    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2243    character set and is encoded in two-byte.
2244
2245    --- CODE RANGE of BIG5 ---
2246    (character set)      (range)
2247    ASCII                0x00 .. 0x7F
2248    Big5 (1st byte)      0xA1 .. 0xFE
2249         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2250    --------------------------
2251
2252    Since the number of characters in Big5 is larger than maximum
2253    characters in Emacs' charset (96x96), it can't be handled as one
2254    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2255    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2256    contains frequently used characters and the latter contains less
2257    frequently used characters.  */
2258
2259 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2260    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2261    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2262    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2263
2264 /* Number of Big5 characters which have the same code in 1st byte.  */
2265 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2266
2267 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2268   do {                                                                  \
2269     unsigned int temp                                                   \
2270       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2271     if (b1 < 0xC9)                                                      \
2272       charset = charset_big5_1;                                         \
2273     else                                                                \
2274       {                                                                 \
2275         charset = charset_big5_2;                                       \
2276         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2277       }                                                                 \
2278     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2279     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2280   } while (0)
2281
2282 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2283   do {                                                                  \
2284     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2285     if (charset == charset_big5_2)                                      \
2286       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2287     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2288     b2 = temp % BIG5_SAME_ROW;                                          \
2289     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2290   } while (0)
2291
2292 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2293    Check if a text is encoded in SJIS.  If it is, return
2294    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2295
2296 int
2297 detect_coding_sjis (src, src_end)
2298      unsigned char *src, *src_end;
2299 {
2300   int c;
2301   /* Dummy for ONE_MORE_BYTE.  */
2302   struct coding_system dummy_coding;
2303   struct coding_system *coding = &dummy_coding;
2304
2305   while (1)
2306     {
2307       ONE_MORE_BYTE (c);
2308       if (c >= 0x81)
2309         {
2310           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2311             {
2312               ONE_MORE_BYTE (c);
2313               if (c < 0x40 || c == 0x7F || c > 0xFC)
2314                 return 0;
2315             }
2316           else if (c > 0xDF)
2317             return 0;
2318         }
2319     }
2320  label_end_of_loop:
2321   return CODING_CATEGORY_MASK_SJIS;
2322 }
2323
2324 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2325    Check if a text is encoded in BIG5.  If it is, return
2326    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2327
2328 int
2329 detect_coding_big5 (src, src_end)
2330      unsigned char *src, *src_end;
2331 {
2332   int c;
2333   /* Dummy for ONE_MORE_BYTE.  */
2334   struct coding_system dummy_coding;
2335   struct coding_system *coding = &dummy_coding;
2336
2337   while (1)
2338     {
2339       ONE_MORE_BYTE (c);
2340       if (c >= 0xA1)
2341         {
2342           ONE_MORE_BYTE (c);
2343           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2344             return 0;
2345         }
2346     }
2347  label_end_of_loop:
2348   return CODING_CATEGORY_MASK_BIG5;
2349 }
2350
2351 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2352    Check if a text is encoded in UTF-8.  If it is, return
2353    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2354
2355 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2356 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2357 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2358 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2359 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2360 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2361 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2362
2363 int
2364 detect_coding_utf_8 (src, src_end)
2365      unsigned char *src, *src_end;
2366 {
2367   unsigned char c;
2368   int seq_maybe_bytes;
2369   /* Dummy for ONE_MORE_BYTE.  */
2370   struct coding_system dummy_coding;
2371   struct coding_system *coding = &dummy_coding;
2372
2373   while (1)
2374     {
2375       ONE_MORE_BYTE (c);
2376       if (UTF_8_1_OCTET_P (c))
2377         continue;
2378       else if (UTF_8_2_OCTET_LEADING_P (c))
2379         seq_maybe_bytes = 1;
2380       else if (UTF_8_3_OCTET_LEADING_P (c))
2381         seq_maybe_bytes = 2;
2382       else if (UTF_8_4_OCTET_LEADING_P (c))
2383         seq_maybe_bytes = 3;
2384       else if (UTF_8_5_OCTET_LEADING_P (c))
2385         seq_maybe_bytes = 4;
2386       else if (UTF_8_6_OCTET_LEADING_P (c))
2387         seq_maybe_bytes = 5;
2388       else
2389         return 0;
2390
2391       do
2392         {
2393           ONE_MORE_BYTE (c);
2394           if (!UTF_8_EXTRA_OCTET_P (c))
2395             return 0;
2396           seq_maybe_bytes--;
2397         }
2398       while (seq_maybe_bytes > 0);
2399     }
2400
2401  label_end_of_loop:
2402   return CODING_CATEGORY_MASK_UTF_8;
2403 }
2404
2405 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2406    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2407    Little Endian (otherwise).  If it is, return
2408    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2409    else return 0.  */
2410
2411 #define UTF_16_INVALID_P(val)   \
2412   (((val) == 0xFFFE)            \
2413    || ((val) == 0xFFFF))
2414
2415 #define UTF_16_HIGH_SURROGATE_P(val) \
2416   (((val) & 0xD800) == 0xD800)
2417
2418 #define UTF_16_LOW_SURROGATE_P(val) \
2419   (((val) & 0xDC00) == 0xDC00)
2420
2421 int
2422 detect_coding_utf_16 (src, src_end)
2423      unsigned char *src, *src_end;
2424 {
2425   unsigned char c1, c2;
2426   /* Dummy for TWO_MORE_BYTES.  */
2427   struct coding_system dummy_coding;
2428   struct coding_system *coding = &dummy_coding;
2429
2430   TWO_MORE_BYTES (c1, c2);
2431
2432   if ((c1 == 0xFF) && (c2 == 0xFE))
2433     return CODING_CATEGORY_MASK_UTF_16_LE;
2434   else if ((c1 == 0xFE) && (c2 == 0xFF))
2435     return CODING_CATEGORY_MASK_UTF_16_BE;
2436
2437  label_end_of_loop:
2438   return 0;
2439 }
2440
2441 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2442    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2443
2444 static void
2445 decode_coding_sjis_big5 (coding, source, destination,
2446                          src_bytes, dst_bytes, sjis_p)
2447      struct coding_system *coding;
2448      unsigned char *source, *destination;
2449      int src_bytes, dst_bytes;
2450      int sjis_p;
2451 {
2452   unsigned char *src = source;
2453   unsigned char *src_end = source + src_bytes;
2454   unsigned char *dst = destination;
2455   unsigned char *dst_end = destination + dst_bytes;
2456   /* SRC_BASE remembers the start position in source in each loop.
2457      The loop will be exited when there's not enough source code
2458      (within macro ONE_MORE_BYTE), or when there's not enough
2459      destination area to produce a character (within macro
2460      EMIT_CHAR).  */
2461   unsigned char *src_base;
2462   Lisp_Object translation_table;
2463
2464   if (NILP (Venable_character_translation))
2465     translation_table = Qnil;
2466   else
2467     {
2468       translation_table = coding->translation_table_for_decode;
2469       if (NILP (translation_table))
2470         translation_table = Vstandard_translation_table_for_decode;
2471     }
2472
2473   coding->produced_char = 0;
2474   while (1)
2475     {
2476       int c, charset, c1, c2;
2477
2478       src_base = src;
2479       ONE_MORE_BYTE (c1);
2480
2481       if (c1 < 0x80)
2482         {
2483           charset = CHARSET_ASCII;
2484           if (c1 < 0x20)
2485             {
2486               if (c1 == '\r')
2487                 {
2488                   if (coding->eol_type == CODING_EOL_CRLF)
2489                     {
2490                       ONE_MORE_BYTE (c2);
2491                       if (c2 == '\n')
2492                         c1 = c2;
2493                       else if (coding->mode
2494                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2495                         {
2496                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2497                           goto label_end_of_loop;
2498                         }
2499                       else
2500                         /* To process C2 again, SRC is subtracted by 1.  */
2501                         src--;
2502                     }
2503                   else if (coding->eol_type == CODING_EOL_CR)
2504                     c1 = '\n';
2505                 }
2506               else if (c1 == '\n'
2507                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2508                        && (coding->eol_type == CODING_EOL_CR
2509                            || coding->eol_type == CODING_EOL_CRLF))
2510                 {
2511                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2512                   goto label_end_of_loop;
2513                 }
2514             }
2515         }
2516       else
2517         {
2518           if (sjis_p)
2519             {
2520               if (c1 >= 0xF0)
2521                 goto label_invalid_code;
2522               if (c1 < 0xA0 || c1 >= 0xE0)
2523                 {
2524                   /* SJIS -> JISX0208 */
2525                   ONE_MORE_BYTE (c2);
2526                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2527                     goto label_invalid_code;
2528                   DECODE_SJIS (c1, c2, c1, c2);
2529                   charset = charset_jisx0208;
2530                 }
2531               else
2532                 /* SJIS -> JISX0201-Kana */
2533                 charset = charset_katakana_jisx0201;
2534             }
2535           else
2536             {
2537               /* BIG5 -> Big5 */
2538               if (c1 < 0xA1 || c1 > 0xFE)
2539                 goto label_invalid_code;
2540               ONE_MORE_BYTE (c2);
2541               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2542                 goto label_invalid_code;
2543               DECODE_BIG5 (c1, c2, charset, c1, c2);
2544             }
2545         }
2546
2547       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2548       EMIT_CHAR (c);
2549       continue;
2550
2551     label_invalid_code:
2552       coding->errors++;
2553       src = src_base;
2554       c = *src++;
2555       EMIT_CHAR (c);
2556     }
2557
2558  label_end_of_loop:
2559   coding->consumed = coding->consumed_char = src_base - source;
2560   coding->produced = dst - destination;
2561   return;
2562 }
2563
2564 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2565    This function can encode charsets `ascii', `katakana-jisx0201',
2566    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2567    are sure that all these charsets are registered as official charset
2568    (i.e. do not have extended leading-codes).  Characters of other
2569    charsets are produced without any encoding.  If SJIS_P is 1, encode
2570    SJIS text, else encode BIG5 text.  */
2571
2572 static void
2573 encode_coding_sjis_big5 (coding, source, destination,
2574                          src_bytes, dst_bytes, sjis_p)
2575      struct coding_system *coding;
2576      unsigned char *source, *destination;
2577      int src_bytes, dst_bytes;
2578      int sjis_p;
2579 {
2580   unsigned char *src = source;
2581   unsigned char *src_end = source + src_bytes;
2582   unsigned char *dst = destination;
2583   unsigned char *dst_end = destination + dst_bytes;
2584   /* SRC_BASE remembers the start position in source in each loop.
2585      The loop will be exited when there's not enough source text to
2586      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2587      there's not enough destination area to produce encoded codes
2588      (within macro EMIT_BYTES).  */
2589   unsigned char *src_base;
2590   Lisp_Object translation_table;
2591
2592   if (NILP (Venable_character_translation))
2593     translation_table = Qnil;
2594   else
2595     {
2596       translation_table = coding->translation_table_for_encode;
2597       if (NILP (translation_table))
2598         translation_table = Vstandard_translation_table_for_encode;
2599     }
2600
2601   while (1)
2602     {
2603       int c, charset, c1, c2;
2604
2605       src_base = src;
2606       ONE_MORE_CHAR (c);
2607
2608       /* Now encode the character C.  */
2609       if (SINGLE_BYTE_CHAR_P (c))
2610         {
2611           switch (c)
2612             {
2613             case '\r':
2614               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2615                 {
2616                   EMIT_ONE_BYTE (c);
2617                   break;
2618                 }
2619               c = '\n';
2620             case '\n':
2621               if (coding->eol_type == CODING_EOL_CRLF)
2622                 {
2623                   EMIT_TWO_BYTES ('\r', c);
2624                   break;
2625                 }
2626               else if (coding->eol_type == CODING_EOL_CR)
2627                 c = '\r';
2628             default:
2629               EMIT_ONE_BYTE (c);
2630             }
2631         }
2632       else
2633         {
2634           SPLIT_CHAR (c, charset, c1, c2);
2635           if (sjis_p)
2636             {
2637               if (charset == charset_jisx0208
2638                   || charset == charset_jisx0208_1978)
2639                 {
2640                   ENCODE_SJIS (c1, c2, c1, c2);
2641                   EMIT_TWO_BYTES (c1, c2);
2642                 }
2643               else if (charset == charset_katakana_jisx0201)
2644                 EMIT_ONE_BYTE (c1 | 0x80);
2645               else if (charset == charset_latin_jisx0201)
2646                 EMIT_ONE_BYTE (c1);
2647               else
2648                 /* There's no way other than producing the internal
2649                    codes as is.  */
2650                 EMIT_BYTES (src_base, src);
2651             }
2652           else
2653             {
2654               if (charset == charset_big5_1 || charset == charset_big5_2)
2655                 {
2656                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2657                   EMIT_TWO_BYTES (c1, c2);
2658                 }
2659               else
2660                 /* There's no way other than producing the internal
2661                    codes as is.  */
2662                 EMIT_BYTES (src_base, src);
2663             }
2664         }
2665       coding->consumed_char++;
2666     }
2667
2668  label_end_of_loop:
2669   coding->consumed = src_base - source;
2670   coding->produced = coding->produced_char = dst - destination;
2671 }
2672
2673 \f
2674 /*** 5. CCL handlers ***/
2675
2676 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2677    Check if a text is encoded in a coding system of which
2678    encoder/decoder are written in CCL program.  If it is, return
2679    CODING_CATEGORY_MASK_CCL, else return 0.  */
2680
2681 int
2682 detect_coding_ccl (src, src_end)
2683      unsigned char *src, *src_end;
2684 {
2685   unsigned char *valid;
2686   int c;
2687   /* Dummy for ONE_MORE_BYTE.  */
2688   struct coding_system dummy_coding;
2689   struct coding_system *coding = &dummy_coding;
2690
2691   /* No coding system is assigned to coding-category-ccl.  */
2692   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2693     return 0;
2694
2695   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2696   while (1)
2697     {
2698       ONE_MORE_BYTE (c);
2699       if (! valid[c])
2700         return 0;
2701     }
2702  label_end_of_loop:
2703   return CODING_CATEGORY_MASK_CCL;
2704 }
2705
2706 \f
2707 /*** 6. End-of-line handlers ***/
2708
2709 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2710
2711 static void
2712 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2713      struct coding_system *coding;
2714      unsigned char *source, *destination;
2715      int src_bytes, dst_bytes;
2716 {
2717   unsigned char *src = source;
2718   unsigned char *dst = destination;
2719   unsigned char *src_end = src + src_bytes;
2720   unsigned char *dst_end = dst + dst_bytes;
2721   Lisp_Object translation_table;
2722   /* SRC_BASE remembers the start position in source in each loop.
2723      The loop will be exited when there's not enough source code
2724      (within macro ONE_MORE_BYTE), or when there's not enough
2725      destination area to produce a character (within macro
2726      EMIT_CHAR).  */
2727   unsigned char *src_base;
2728   int c;
2729
2730   translation_table = Qnil;
2731   switch (coding->eol_type)
2732     {
2733     case CODING_EOL_CRLF:
2734       while (1)
2735         {
2736           src_base = src;
2737           ONE_MORE_BYTE (c);
2738           if (c == '\r')
2739             {
2740               ONE_MORE_BYTE (c);
2741               if (c != '\n')
2742                 {
2743                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2744                     {
2745                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2746                       goto label_end_of_loop;
2747                     }
2748                   src--;
2749                   c = '\r';
2750                 }
2751             }
2752           else if (c == '\n'
2753                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2754             {
2755               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2756               goto label_end_of_loop;
2757             }
2758           EMIT_CHAR (c);
2759         }
2760       break;
2761
2762     case CODING_EOL_CR:
2763       while (1)
2764         {
2765           src_base = src;
2766           ONE_MORE_BYTE (c);
2767           if (c == '\n')
2768             {
2769               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2770                 {
2771                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2772                   goto label_end_of_loop;
2773                 }
2774             }
2775           else if (c == '\r')
2776             c = '\n';
2777           EMIT_CHAR (c);
2778         }
2779       break;
2780
2781     default:                    /* no need for EOL handling */
2782       while (1)
2783         {
2784           src_base = src;
2785           ONE_MORE_BYTE (c);
2786           EMIT_CHAR (c);
2787         }
2788     }
2789
2790  label_end_of_loop:
2791   coding->consumed = coding->consumed_char = src_base - source;
2792   coding->produced = dst - destination;
2793   return;
2794 }
2795
2796 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2797    format of end-of-line according to `coding->eol_type'.  It also
2798    convert multibyte form 8-bit characers to unibyte if
2799    CODING->src_multibyte is nonzero.  If `coding->mode &
2800    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2801    also means end-of-line.  */
2802
2803 static void
2804 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2805      struct coding_system *coding;
2806      unsigned char *source, *destination;
2807      int src_bytes, dst_bytes;
2808 {
2809   unsigned char *src = source;
2810   unsigned char *dst = destination;
2811   unsigned char *src_end = src + src_bytes;
2812   unsigned char *dst_end = dst + dst_bytes;
2813   Lisp_Object translation_table;
2814   /* SRC_BASE remembers the start position in source in each loop.
2815      The loop will be exited when there's not enough source text to
2816      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2817      there's not enough destination area to produce encoded codes
2818      (within macro EMIT_BYTES).  */
2819   unsigned char *src_base;
2820   int c;
2821   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2822
2823   translation_table = Qnil;
2824   if (coding->src_multibyte
2825       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2826     {
2827       src_end--;
2828       src_bytes--;
2829       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2830     }
2831
2832   if (coding->eol_type == CODING_EOL_CRLF)
2833     {
2834       while (src < src_end)
2835         {
2836           src_base = src;
2837           c = *src++;
2838           if (c >= 0x20)
2839             EMIT_ONE_BYTE (c);
2840           else if (c == '\n' || (c == '\r' && selective_display))
2841             EMIT_TWO_BYTES ('\r', '\n');
2842           else
2843             EMIT_ONE_BYTE (c);
2844         }
2845       src_base = src;
2846     label_end_of_loop:
2847       ;
2848     }
2849   else
2850     {
2851       if (!dst_bytes || src_bytes <= dst_bytes)
2852         {
2853           safe_bcopy (src, dst, src_bytes);
2854           src_base = src_end;
2855           dst += src_bytes;
2856         }
2857       else
2858         {
2859           if (coding->src_multibyte
2860               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2861             dst_bytes--;
2862           safe_bcopy (src, dst, dst_bytes);
2863           src_base = src + dst_bytes;
2864           dst = destination + dst_bytes;
2865           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2866         }
2867       if (coding->eol_type == CODING_EOL_CR)
2868         {
2869           for (src = destination; src < dst; src++)
2870             if (*src == '\n') *src = '\r';
2871         }
2872       else if (selective_display)
2873         {
2874           for (src = destination; src < dst; src++)
2875             if (*src == '\r') *src = '\n';
2876         }
2877     }
2878   if (coding->src_multibyte)
2879     dst = destination + str_as_unibyte (destination, dst - destination);
2880
2881   coding->consumed = src_base - source;
2882   coding->produced = dst - destination;
2883   coding->produced_char = coding->produced;
2884 }
2885
2886 \f
2887 /*** 7. C library functions ***/
2888
2889 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2890    has a property `coding-system'.  The value of this property is a
2891    vector of length 5 (called as coding-vector).  Among elements of
2892    this vector, the first (element[0]) and the fifth (element[4])
2893    carry important information for decoding/encoding.  Before
2894    decoding/encoding, this information should be set in fields of a
2895    structure of type `coding_system'.
2896
2897    A value of property `coding-system' can be a symbol of another
2898    subsidiary coding-system.  In that case, Emacs gets coding-vector
2899    from that symbol.
2900
2901    `element[0]' contains information to be set in `coding->type'.  The
2902    value and its meaning is as follows:
2903
2904    0 -- coding_type_emacs_mule
2905    1 -- coding_type_sjis
2906    2 -- coding_type_iso2022
2907    3 -- coding_type_big5
2908    4 -- coding_type_ccl encoder/decoder written in CCL
2909    nil -- coding_type_no_conversion
2910    t -- coding_type_undecided (automatic conversion on decoding,
2911                                no-conversion on encoding)
2912
2913    `element[4]' contains information to be set in `coding->flags' and
2914    `coding->spec'.  The meaning varies by `coding->type'.
2915
2916    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2917    of length 32 (of which the first 13 sub-elements are used now).
2918    Meanings of these sub-elements are:
2919
2920    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2921         If the value is an integer of valid charset, the charset is
2922         assumed to be designated to graphic register N initially.
2923
2924         If the value is minus, it is a minus value of charset which
2925         reserves graphic register N, which means that the charset is
2926         not designated initially but should be designated to graphic
2927         register N just before encoding a character in that charset.
2928
2929         If the value is nil, graphic register N is never used on
2930         encoding.
2931
2932    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2933         Each value takes t or nil.  See the section ISO2022 of
2934         `coding.h' for more information.
2935
2936    If `coding->type' is `coding_type_big5', element[4] is t to denote
2937    BIG5-ETen or nil to denote BIG5-HKU.
2938
2939    If `coding->type' takes the other value, element[4] is ignored.
2940
2941    Emacs Lisp's coding system also carries information about format of
2942    end-of-line in a value of property `eol-type'.  If the value is
2943    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2944    means CODING_EOL_CR.  If it is not integer, it should be a vector
2945    of subsidiary coding systems of which property `eol-type' has one
2946    of above values.
2947
2948 */
2949
2950 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2951    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2952    is setup so that no conversion is necessary and return -1, else
2953    return 0.  */
2954
2955 int
2956 setup_coding_system (coding_system, coding)
2957      Lisp_Object coding_system;
2958      struct coding_system *coding;
2959 {
2960   Lisp_Object coding_spec, coding_type, eol_type, plist;
2961   Lisp_Object val;
2962   int i;
2963
2964   /* Initialize some fields required for all kinds of coding systems.  */
2965   coding->symbol = coding_system;
2966   coding->common_flags = 0;
2967   coding->mode = 0;
2968   coding->heading_ascii = -1;
2969   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2970   coding->composing = COMPOSITION_DISABLED;
2971   coding->cmp_data = NULL;
2972
2973   if (NILP (coding_system))
2974     goto label_invalid_coding_system;
2975
2976   coding_spec = Fget (coding_system, Qcoding_system);
2977
2978   if (!VECTORP (coding_spec)
2979       || XVECTOR (coding_spec)->size != 5
2980       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2981     goto label_invalid_coding_system;
2982
2983   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2984   if (VECTORP (eol_type))
2985     {
2986       coding->eol_type = CODING_EOL_UNDECIDED;
2987       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2988     }
2989   else if (XFASTINT (eol_type) == 1)
2990     {
2991       coding->eol_type = CODING_EOL_CRLF;
2992       coding->common_flags
2993         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2994     }
2995   else if (XFASTINT (eol_type) == 2)
2996     {
2997       coding->eol_type = CODING_EOL_CR;
2998       coding->common_flags
2999         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3000     }
3001   else
3002     coding->eol_type = CODING_EOL_LF;
3003
3004   coding_type = XVECTOR (coding_spec)->contents[0];
3005   /* Try short cut.  */
3006   if (SYMBOLP (coding_type))
3007     {
3008       if (EQ (coding_type, Qt))
3009         {
3010           coding->type = coding_type_undecided;
3011           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3012         }
3013       else
3014         coding->type = coding_type_no_conversion;
3015       return 0;
3016     }
3017
3018   /* Get values of coding system properties:
3019      `post-read-conversion', `pre-write-conversion',
3020      `translation-table-for-decode', `translation-table-for-encode'.  */
3021   plist = XVECTOR (coding_spec)->contents[3];
3022   /* Pre & post conversion functions should be disabled if
3023      inhibit_eol_conversion is nozero.  This is the case that a code
3024      conversion function is called while those functions are running.  */
3025   if (! inhibit_pre_post_conversion)
3026     {
3027       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3028       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3029     }
3030   val = Fplist_get (plist, Qtranslation_table_for_decode);
3031   if (SYMBOLP (val))
3032     val = Fget (val, Qtranslation_table_for_decode);
3033   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3034   val = Fplist_get (plist, Qtranslation_table_for_encode);
3035   if (SYMBOLP (val))
3036     val = Fget (val, Qtranslation_table_for_encode);
3037   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3038   val = Fplist_get (plist, Qcoding_category);
3039   if (!NILP (val))
3040     {
3041       val = Fget (val, Qcoding_category_index);
3042       if (INTEGERP (val))
3043         coding->category_idx = XINT (val);
3044       else
3045         goto label_invalid_coding_system;
3046     }
3047   else
3048     goto label_invalid_coding_system;
3049
3050   /* If the coding system has non-nil `composition' property, enable
3051      composition handling.  */
3052   val = Fplist_get (plist, Qcomposition);
3053   if (!NILP (val))
3054     coding->composing = COMPOSITION_NO;
3055
3056   switch (XFASTINT (coding_type))
3057     {
3058     case 0:
3059       coding->type = coding_type_emacs_mule;
3060       if (!NILP (coding->post_read_conversion))
3061         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3062       if (!NILP (coding->pre_write_conversion))
3063         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3064       break;
3065
3066     case 1:
3067       coding->type = coding_type_sjis;
3068       coding->common_flags
3069         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3070       break;
3071
3072     case 2:
3073       coding->type = coding_type_iso2022;
3074       coding->common_flags
3075         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3076       {
3077         Lisp_Object val, temp;
3078         Lisp_Object *flags;
3079         int i, charset, reg_bits = 0;
3080
3081         val = XVECTOR (coding_spec)->contents[4];
3082
3083         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3084           goto label_invalid_coding_system;
3085
3086         flags = XVECTOR (val)->contents;
3087         coding->flags
3088           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3089              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3090              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3091              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3092              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3093              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3094              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3095              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3096              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3097              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3098              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3099              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3100              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3101              );
3102
3103         /* Invoke graphic register 0 to plane 0.  */
3104         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3105         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3106         CODING_SPEC_ISO_INVOCATION (coding, 1)
3107           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3108         /* Not single shifting at first.  */
3109         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3110         /* Beginning of buffer should also be regarded as bol. */
3111         CODING_SPEC_ISO_BOL (coding) = 1;
3112
3113         for (charset = 0; charset <= MAX_CHARSET; charset++)
3114           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3115         val = Vcharset_revision_alist;
3116         while (CONSP (val))
3117           {
3118             charset = get_charset_id (Fcar_safe (XCAR (val)));
3119             if (charset >= 0
3120                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3121                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3122               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3123             val = XCDR (val);
3124           }
3125
3126         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3127            FLAGS[REG] can be one of below:
3128                 integer CHARSET: CHARSET occupies register I,
3129                 t: designate nothing to REG initially, but can be used
3130                   by any charsets,
3131                 list of integer, nil, or t: designate the first
3132                   element (if integer) to REG initially, the remaining
3133                   elements (if integer) is designated to REG on request,
3134                   if an element is t, REG can be used by any charsets,
3135                 nil: REG is never used.  */
3136         for (charset = 0; charset <= MAX_CHARSET; charset++)
3137           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3138             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3139         for (i = 0; i < 4; i++)
3140           {
3141             if (INTEGERP (flags[i])
3142                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3143                 || (charset = get_charset_id (flags[i])) >= 0)
3144               {
3145                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3146                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3147               }
3148             else if (EQ (flags[i], Qt))
3149               {
3150                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3151                 reg_bits |= 1 << i;
3152                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3153               }
3154             else if (CONSP (flags[i]))
3155               {
3156                 Lisp_Object tail;
3157                 tail = flags[i];
3158
3159                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3160                 if (INTEGERP (XCAR (tail))
3161                     && (charset = XINT (XCAR (tail)),
3162                         CHARSET_VALID_P (charset))
3163                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3164                   {
3165                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3166                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3167                   }
3168                 else
3169                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3170                 tail = XCDR (tail);
3171                 while (CONSP (tail))
3172                   {
3173                     if (INTEGERP (XCAR (tail))
3174                         && (charset = XINT (XCAR (tail)),
3175                             CHARSET_VALID_P (charset))
3176                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3177                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3178                         = i;
3179                     else if (EQ (XCAR (tail), Qt))
3180                       reg_bits |= 1 << i;
3181                     tail = XCDR (tail);
3182                   }
3183               }
3184             else
3185               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3186
3187             CODING_SPEC_ISO_DESIGNATION (coding, i)
3188               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3189           }
3190
3191         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3192           {
3193             /* REG 1 can be used only by locking shift in 7-bit env.  */
3194             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3195               reg_bits &= ~2;
3196             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3197               /* Without any shifting, only REG 0 and 1 can be used.  */
3198               reg_bits &= 3;
3199           }
3200
3201         if (reg_bits)
3202           for (charset = 0; charset <= MAX_CHARSET; charset++)
3203             {
3204               if (CHARSET_VALID_P (charset)
3205                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3206                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3207                 {
3208                   /* There exist some default graphic registers to be
3209                      used by CHARSET.  */
3210
3211                   /* We had better avoid designating a charset of
3212                      CHARS96 to REG 0 as far as possible.  */
3213                   if (CHARSET_CHARS (charset) == 96)
3214                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3215                       = (reg_bits & 2
3216                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3217                   else
3218                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3219                       = (reg_bits & 1
3220                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3221                 }
3222             }
3223       }
3224       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3225       coding->spec.iso2022.last_invalid_designation_register = -1;
3226       break;
3227
3228     case 3:
3229       coding->type = coding_type_big5;
3230       coding->common_flags
3231         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3232       coding->flags
3233         = (NILP (XVECTOR (coding_spec)->contents[4])
3234            ? CODING_FLAG_BIG5_HKU
3235            : CODING_FLAG_BIG5_ETEN);
3236       break;
3237
3238     case 4:
3239       coding->type = coding_type_ccl;
3240       coding->common_flags
3241         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3242       {
3243         val = XVECTOR (coding_spec)->contents[4];
3244         if (! CONSP (val)
3245             || setup_ccl_program (&(coding->spec.ccl.decoder),
3246                                   XCAR (val)) < 0
3247             || setup_ccl_program (&(coding->spec.ccl.encoder),
3248                                   XCDR (val)) < 0)
3249           goto label_invalid_coding_system;
3250
3251         bzero (coding->spec.ccl.valid_codes, 256);
3252         val = Fplist_get (plist, Qvalid_codes);
3253         if (CONSP (val))
3254           {
3255             Lisp_Object this;
3256
3257             for (; CONSP (val); val = XCDR (val))
3258               {
3259                 this = XCAR (val);
3260                 if (INTEGERP (this)
3261                     && XINT (this) >= 0 && XINT (this) < 256)
3262                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3263                 else if (CONSP (this)
3264                          && INTEGERP (XCAR (this))
3265                          && INTEGERP (XCDR (this)))
3266                   {
3267                     int start = XINT (XCAR (this));
3268                     int end = XINT (XCDR (this));
3269
3270                     if (start >= 0 && start <= end && end < 256)
3271                       while (start <= end)
3272                         coding->spec.ccl.valid_codes[start++] = 1;
3273                   }
3274               }
3275           }
3276       }
3277       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3278       coding->spec.ccl.cr_carryover = 0;
3279       break;
3280
3281     case 5:
3282       coding->type = coding_type_raw_text;
3283       break;
3284
3285     default:
3286       goto label_invalid_coding_system;
3287     }
3288   return 0;
3289
3290  label_invalid_coding_system:
3291   coding->type = coding_type_no_conversion;
3292   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3293   coding->common_flags = 0;
3294   coding->eol_type = CODING_EOL_LF;
3295   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3296   return -1;
3297 }
3298
3299 /* Free memory blocks allocated for storing composition information.  */
3300
3301 void
3302 coding_free_composition_data (coding)
3303      struct coding_system *coding;
3304 {
3305   struct composition_data *cmp_data = coding->cmp_data, *next;
3306
3307   if (!cmp_data)
3308     return;
3309   /* Memory blocks are chained.  At first, rewind to the first, then,
3310      free blocks one by one.  */
3311   while (cmp_data->prev)
3312     cmp_data = cmp_data->prev;
3313   while (cmp_data)
3314     {
3315       next = cmp_data->next;
3316       xfree (cmp_data);
3317       cmp_data = next;
3318     }
3319   coding->cmp_data = NULL;
3320 }
3321
3322 /* Set `char_offset' member of all memory blocks pointed by
3323    coding->cmp_data to POS.  */
3324
3325 void
3326 coding_adjust_composition_offset (coding, pos)
3327      struct coding_system *coding;
3328      int pos;
3329 {
3330   struct composition_data *cmp_data;
3331
3332   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3333     cmp_data->char_offset = pos;
3334 }
3335
3336 /* Setup raw-text or one of its subsidiaries in the structure
3337    coding_system CODING according to the already setup value eol_type
3338    in CODING.  CODING should be setup for some coding system in
3339    advance.  */
3340
3341 void
3342 setup_raw_text_coding_system (coding)
3343      struct coding_system *coding;
3344 {
3345   if (coding->type != coding_type_raw_text)
3346     {
3347       coding->symbol = Qraw_text;
3348       coding->type = coding_type_raw_text;
3349       if (coding->eol_type != CODING_EOL_UNDECIDED)
3350         {
3351           Lisp_Object subsidiaries;
3352           subsidiaries = Fget (Qraw_text, Qeol_type);
3353
3354           if (VECTORP (subsidiaries)
3355               && XVECTOR (subsidiaries)->size == 3)
3356             coding->symbol
3357               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3358         }
3359       setup_coding_system (coding->symbol, coding);
3360     }
3361   return;
3362 }
3363
3364 /* Emacs has a mechanism to automatically detect a coding system if it
3365    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3366    it's impossible to distinguish some coding systems accurately
3367    because they use the same range of codes.  So, at first, coding
3368    systems are categorized into 7, those are:
3369
3370    o coding-category-emacs-mule
3371
3372         The category for a coding system which has the same code range
3373         as Emacs' internal format.  Assigned the coding-system (Lisp
3374         symbol) `emacs-mule' by default.
3375
3376    o coding-category-sjis
3377
3378         The category for a coding system which has the same code range
3379         as SJIS.  Assigned the coding-system (Lisp
3380         symbol) `japanese-shift-jis' by default.
3381
3382    o coding-category-iso-7
3383
3384         The category for a coding system which has the same code range
3385         as ISO2022 of 7-bit environment.  This doesn't use any locking
3386         shift and single shift functions.  This can encode/decode all
3387         charsets.  Assigned the coding-system (Lisp symbol)
3388         `iso-2022-7bit' by default.
3389
3390    o coding-category-iso-7-tight
3391
3392         Same as coding-category-iso-7 except that this can
3393         encode/decode only the specified charsets.
3394
3395    o coding-category-iso-8-1
3396
3397         The category for a coding system which has the same code range
3398         as ISO2022 of 8-bit environment and graphic plane 1 used only
3399         for DIMENSION1 charset.  This doesn't use any locking shift
3400         and single shift functions.  Assigned the coding-system (Lisp
3401         symbol) `iso-latin-1' by default.
3402
3403    o coding-category-iso-8-2
3404
3405         The category for a coding system which has the same code range
3406         as ISO2022 of 8-bit environment and graphic plane 1 used only
3407         for DIMENSION2 charset.  This doesn't use any locking shift
3408         and single shift functions.  Assigned the coding-system (Lisp
3409         symbol) `japanese-iso-8bit' by default.
3410
3411    o coding-category-iso-7-else
3412
3413         The category for a coding system which has the same code range
3414         as ISO2022 of 7-bit environemnt but uses locking shift or
3415         single shift functions.  Assigned the coding-system (Lisp
3416         symbol) `iso-2022-7bit-lock' by default.
3417
3418    o coding-category-iso-8-else
3419
3420         The category for a coding system which has the same code range
3421         as ISO2022 of 8-bit environemnt but uses locking shift or
3422         single shift functions.  Assigned the coding-system (Lisp
3423         symbol) `iso-2022-8bit-ss2' by default.
3424
3425    o coding-category-big5
3426
3427         The category for a coding system which has the same code range
3428         as BIG5.  Assigned the coding-system (Lisp symbol)
3429         `cn-big5' by default.
3430
3431    o coding-category-utf-8
3432
3433         The category for a coding system which has the same code range
3434         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3435         symbol) `utf-8' by default.
3436
3437    o coding-category-utf-16-be
3438
3439         The category for a coding system in which a text has an
3440         Unicode signature (cf. Unicode Standard) in the order of BIG
3441         endian at the head.  Assigned the coding-system (Lisp symbol)
3442         `utf-16-be' by default.
3443
3444    o coding-category-utf-16-le
3445
3446         The category for a coding system in which a text has an
3447         Unicode signature (cf. Unicode Standard) in the order of
3448         LITTLE endian at the head.  Assigned the coding-system (Lisp
3449         symbol) `utf-16-le' by default.
3450
3451    o coding-category-ccl
3452
3453         The category for a coding system of which encoder/decoder is
3454         written in CCL programs.  The default value is nil, i.e., no
3455         coding system is assigned.
3456
3457    o coding-category-binary
3458
3459         The category for a coding system not categorized in any of the
3460         above.  Assigned the coding-system (Lisp symbol)
3461         `no-conversion' by default.
3462
3463    Each of them is a Lisp symbol and the value is an actual
3464    `coding-system's (this is also a Lisp symbol) assigned by a user.
3465    What Emacs does actually is to detect a category of coding system.
3466    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3467    decide only one possible category, it selects a category of the
3468    highest priority.  Priorities of categories are also specified by a
3469    user in a Lisp variable `coding-category-list'.
3470
3471 */
3472
3473 static
3474 int ascii_skip_code[256];
3475
3476 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3477    If it detects possible coding systems, return an integer in which
3478    appropriate flag bits are set.  Flag bits are defined by macros
3479    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3480    it should point the table `coding_priorities'.  In that case, only
3481    the flag bit for a coding system of the highest priority is set in
3482    the returned value.
3483
3484    How many ASCII characters are at the head is returned as *SKIP.  */
3485
3486 static int
3487 detect_coding_mask (source, src_bytes, priorities, skip)
3488      unsigned char *source;
3489      int src_bytes, *priorities, *skip;
3490 {
3491   register unsigned char c;
3492   unsigned char *src = source, *src_end = source + src_bytes;
3493   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3494   int i, idx;
3495
3496   /* At first, skip all ASCII characters and control characters except
3497      for three ISO2022 specific control characters.  */
3498   ascii_skip_code[ISO_CODE_SO] = 0;
3499   ascii_skip_code[ISO_CODE_SI] = 0;
3500   ascii_skip_code[ISO_CODE_ESC] = 0;
3501
3502  label_loop_detect_coding:
3503   while (src < src_end && ascii_skip_code[*src]) src++;
3504   *skip = src - source;
3505
3506   if (src >= src_end)
3507     /* We found nothing other than ASCII.  There's nothing to do.  */
3508     return 0;
3509
3510   c = *src;
3511   /* The text seems to be encoded in some multilingual coding system.
3512      Now, try to find in which coding system the text is encoded.  */
3513   if (c < 0x80)
3514     {
3515       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3516       /* C is an ISO2022 specific control code of C0.  */
3517       mask = detect_coding_iso2022 (src, src_end);
3518       if (mask == 0)
3519         {
3520           /* No valid ISO2022 code follows C.  Try again.  */
3521           src++;
3522           if (c == ISO_CODE_ESC)
3523             ascii_skip_code[ISO_CODE_ESC] = 1;
3524           else
3525             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3526           goto label_loop_detect_coding;
3527         }
3528       if (priorities)
3529         {
3530           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3531             {
3532               if (mask & priorities[i])
3533                 return priorities[i];
3534             }
3535           return CODING_CATEGORY_MASK_RAW_TEXT;
3536         }
3537     }
3538   else
3539     {
3540       int try;
3541
3542       if (c < 0xA0)
3543         {
3544           /* C is the first byte of SJIS character code,
3545              or a leading-code of Emacs' internal format (emacs-mule),
3546              or the first byte of UTF-16.  */
3547           try = (CODING_CATEGORY_MASK_SJIS
3548                   | CODING_CATEGORY_MASK_EMACS_MULE
3549                   | CODING_CATEGORY_MASK_UTF_16_BE
3550                   | CODING_CATEGORY_MASK_UTF_16_LE);
3551
3552           /* Or, if C is a special latin extra code,
3553              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3554              or is an ISO2022 control-sequence-introducer (CSI),
3555              we should also consider the possibility of ISO2022 codings.  */
3556           if ((VECTORP (Vlatin_extra_code_table)
3557                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3558               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3559               || (c == ISO_CODE_CSI
3560                   && (src < src_end
3561                       && (*src == ']'
3562                           || ((*src == '0' || *src == '1' || *src == '2')
3563                               && src + 1 < src_end
3564                               && src[1] == ']')))))
3565             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3566                      | CODING_CATEGORY_MASK_ISO_8BIT);
3567         }
3568       else
3569         /* C is a character of ISO2022 in graphic plane right,
3570            or a SJIS's 1-byte character code (i.e. JISX0201),
3571            or the first byte of BIG5's 2-byte code,
3572            or the first byte of UTF-8/16.  */
3573         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3574                 | CODING_CATEGORY_MASK_ISO_8BIT
3575                 | CODING_CATEGORY_MASK_SJIS
3576                 | CODING_CATEGORY_MASK_BIG5
3577                 | CODING_CATEGORY_MASK_UTF_8
3578                 | CODING_CATEGORY_MASK_UTF_16_BE
3579                 | CODING_CATEGORY_MASK_UTF_16_LE);
3580
3581       /* Or, we may have to consider the possibility of CCL.  */
3582       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3583           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3584               ->spec.ccl.valid_codes)[c])
3585         try |= CODING_CATEGORY_MASK_CCL;
3586
3587       mask = 0;
3588       utf16_examined_p = iso2022_examined_p = 0;
3589       if (priorities)
3590         {
3591           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3592             {
3593               if (!iso2022_examined_p
3594                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3595                 {
3596                   mask |= detect_coding_iso2022 (src, src_end);
3597                   iso2022_examined_p = 1;
3598                 }
3599               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3600                 mask |= detect_coding_sjis (src, src_end);
3601               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3602                 mask |= detect_coding_utf_8 (src, src_end);
3603               else if (!utf16_examined_p
3604                        && (priorities[i] & try &
3605                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3606                 {
3607                   mask |= detect_coding_utf_16 (src, src_end);
3608                   utf16_examined_p = 1;
3609                 }
3610               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3611                 mask |= detect_coding_big5 (src, src_end);
3612               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3613                 mask |= detect_coding_emacs_mule (src, src_end);
3614               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3615                 mask |= detect_coding_ccl (src, src_end);
3616               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3617                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3618               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3619                 mask |= CODING_CATEGORY_MASK_BINARY;
3620               if (mask & priorities[i])
3621                 return priorities[i];
3622             }
3623           return CODING_CATEGORY_MASK_RAW_TEXT;
3624         }
3625       if (try & CODING_CATEGORY_MASK_ISO)
3626         mask |= detect_coding_iso2022 (src, src_end);
3627       if (try & CODING_CATEGORY_MASK_SJIS)
3628         mask |= detect_coding_sjis (src, src_end);
3629       if (try & CODING_CATEGORY_MASK_BIG5)
3630         mask |= detect_coding_big5 (src, src_end);
3631       if (try & CODING_CATEGORY_MASK_UTF_8)
3632         mask |= detect_coding_utf_8 (src, src_end);
3633       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3634         mask |= detect_coding_utf_16 (src, src_end);
3635       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3636         mask |= detect_coding_emacs_mule (src, src_end);
3637       if (try & CODING_CATEGORY_MASK_CCL)
3638         mask |= detect_coding_ccl (src, src_end);
3639     }
3640   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3641 }
3642
3643 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3644    The information of the detected coding system is set in CODING.  */
3645
3646 void
3647 detect_coding (coding, src, src_bytes)
3648      struct coding_system *coding;
3649      unsigned char *src;
3650      int src_bytes;
3651 {
3652   unsigned int idx;
3653   int skip, mask, i;
3654   Lisp_Object val;
3655
3656   val = Vcoding_category_list;
3657   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3658   coding->heading_ascii = skip;
3659
3660   if (!mask) return;
3661
3662   /* We found a single coding system of the highest priority in MASK.  */
3663   idx = 0;
3664   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3665   if (! mask)
3666     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3667
3668   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3669
3670   if (coding->eol_type != CODING_EOL_UNDECIDED)
3671     {
3672       Lisp_Object tmp;
3673
3674       tmp = Fget (val, Qeol_type);
3675       if (VECTORP (tmp))
3676         val = XVECTOR (tmp)->contents[coding->eol_type];
3677     }
3678
3679   /* Setup this new coding system while preserving some slots.  */
3680   {
3681     int src_multibyte = coding->src_multibyte;
3682     int dst_multibyte = coding->dst_multibyte;
3683
3684     setup_coding_system (val, coding);
3685     coding->src_multibyte = src_multibyte;
3686     coding->dst_multibyte = dst_multibyte;
3687     coding->heading_ascii = skip;
3688   }
3689 }
3690
3691 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3692    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3693    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3694
3695    How many non-eol characters are at the head is returned as *SKIP.  */
3696
3697 #define MAX_EOL_CHECK_COUNT 3
3698
3699 static int
3700 detect_eol_type (source, src_bytes, skip)
3701      unsigned char *source;
3702      int src_bytes, *skip;
3703 {
3704   unsigned char *src = source, *src_end = src + src_bytes;
3705   unsigned char c;
3706   int total = 0;                /* How many end-of-lines are found so far.  */
3707   int eol_type = CODING_EOL_UNDECIDED;
3708   int this_eol_type;
3709
3710   *skip = 0;
3711
3712   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3713     {
3714       c = *src++;
3715       if (c == '\n' || c == '\r')
3716         {
3717           if (*skip == 0)
3718             *skip = src - 1 - source;
3719           total++;
3720           if (c == '\n')
3721             this_eol_type = CODING_EOL_LF;
3722           else if (src >= src_end || *src != '\n')
3723             this_eol_type = CODING_EOL_CR;
3724           else
3725             this_eol_type = CODING_EOL_CRLF, src++;
3726
3727           if (eol_type == CODING_EOL_UNDECIDED)
3728             /* This is the first end-of-line.  */
3729             eol_type = this_eol_type;
3730           else if (eol_type != this_eol_type)
3731             {
3732               /* The found type is different from what found before.  */
3733               eol_type = CODING_EOL_INCONSISTENT;
3734               break;
3735             }
3736         }
3737     }
3738
3739   if (*skip == 0)
3740     *skip = src_end - source;
3741   return eol_type;
3742 }
3743
3744 /* Like detect_eol_type, but detect EOL type in 2-octet
3745    big-endian/little-endian format for coding systems utf-16-be and
3746    utf-16-le.  */
3747
3748 static int
3749 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3750      unsigned char *source;
3751      int src_bytes, *skip;
3752 {
3753   unsigned char *src = source, *src_end = src + src_bytes;
3754   unsigned int c1, c2;
3755   int total = 0;                /* How many end-of-lines are found so far.  */
3756   int eol_type = CODING_EOL_UNDECIDED;
3757   int this_eol_type;
3758   int msb, lsb;
3759
3760   if (big_endian_p)
3761     msb = 0, lsb = 1;
3762   else
3763     msb = 1, lsb = 0;
3764
3765   *skip = 0;
3766
3767   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3768     {
3769       c1 = (src[msb] << 8) | (src[lsb]);
3770       src += 2;
3771
3772       if (c1 == '\n' || c1 == '\r')
3773         {
3774           if (*skip == 0)
3775             *skip = src - 2 - source;
3776           total++;
3777           if (c1 == '\n')
3778             {
3779               this_eol_type = CODING_EOL_LF;
3780             }
3781           else
3782             {
3783               if ((src + 1) >= src_end)
3784                 {
3785                   this_eol_type = CODING_EOL_CR;
3786                 }
3787               else
3788                 {
3789                   c2 = (src[msb] << 8) | (src[lsb]);
3790                   if (c2 == '\n')
3791                     this_eol_type = CODING_EOL_CRLF, src += 2;
3792                   else
3793                     this_eol_type = CODING_EOL_CR;
3794                 }
3795             }
3796
3797           if (eol_type == CODING_EOL_UNDECIDED)
3798             /* This is the first end-of-line.  */
3799             eol_type = this_eol_type;
3800           else if (eol_type != this_eol_type)
3801             {
3802               /* The found type is different from what found before.  */
3803               eol_type = CODING_EOL_INCONSISTENT;
3804               break;
3805             }
3806         }
3807     }
3808
3809   if (*skip == 0)
3810     *skip = src_end - source;
3811   return eol_type;
3812 }
3813
3814 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3815    is encoded.  If it detects an appropriate format of end-of-line, it
3816    sets the information in *CODING.  */
3817
3818 void
3819 detect_eol (coding, src, src_bytes)
3820      struct coding_system *coding;
3821      unsigned char *src;
3822      int src_bytes;
3823 {
3824   Lisp_Object val;
3825   int skip;
3826   int eol_type;
3827
3828   switch (coding->category_idx)
3829     {
3830     case CODING_CATEGORY_IDX_UTF_16_BE:
3831       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3832       break;
3833     case CODING_CATEGORY_IDX_UTF_16_LE:
3834       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3835       break;
3836     default:
3837       eol_type = detect_eol_type (src, src_bytes, &skip);
3838       break;
3839     }
3840
3841   if (coding->heading_ascii > skip)
3842     coding->heading_ascii = skip;
3843   else
3844     skip = coding->heading_ascii;
3845
3846   if (eol_type == CODING_EOL_UNDECIDED)
3847     return;
3848   if (eol_type == CODING_EOL_INCONSISTENT)
3849     {
3850 #if 0
3851       /* This code is suppressed until we find a better way to
3852          distinguish raw text file and binary file.  */
3853
3854       /* If we have already detected that the coding is raw-text, the
3855          coding should actually be no-conversion.  */
3856       if (coding->type == coding_type_raw_text)
3857         {
3858           setup_coding_system (Qno_conversion, coding);
3859           return;
3860         }
3861       /* Else, let's decode only text code anyway.  */
3862 #endif /* 0 */
3863       eol_type = CODING_EOL_LF;
3864     }
3865
3866   val = Fget (coding->symbol, Qeol_type);
3867   if (VECTORP (val) && XVECTOR (val)->size == 3)
3868     {
3869       int src_multibyte = coding->src_multibyte;
3870       int dst_multibyte = coding->dst_multibyte;
3871
3872       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3873       coding->src_multibyte = src_multibyte;
3874       coding->dst_multibyte = dst_multibyte;
3875       coding->heading_ascii = skip;
3876     }
3877 }
3878
3879 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3880
3881 #define DECODING_BUFFER_MAG(coding)                     \
3882   (coding->type == coding_type_iso2022                  \
3883    ? 3                                                  \
3884    : (coding->type == coding_type_ccl                   \
3885       ? coding->spec.ccl.decoder.buf_magnification      \
3886       : 2))
3887
3888 /* Return maximum size (bytes) of a buffer enough for decoding
3889    SRC_BYTES of text encoded in CODING.  */
3890
3891 int
3892 decoding_buffer_size (coding, src_bytes)
3893      struct coding_system *coding;
3894      int src_bytes;
3895 {
3896   return (src_bytes * DECODING_BUFFER_MAG (coding)
3897           + CONVERSION_BUFFER_EXTRA_ROOM);
3898 }
3899
3900 /* Return maximum size (bytes) of a buffer enough for encoding
3901    SRC_BYTES of text to CODING.  */
3902
3903 int
3904 encoding_buffer_size (coding, src_bytes)
3905      struct coding_system *coding;
3906      int src_bytes;
3907 {
3908   int magnification;
3909
3910   if (coding->type == coding_type_ccl)
3911     magnification = coding->spec.ccl.encoder.buf_magnification;
3912   else if (CODING_REQUIRE_ENCODING (coding))
3913     magnification = 3;
3914   else
3915     magnification = 1;
3916
3917   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3918 }
3919
3920 /* Working buffer for code conversion.  */
3921 struct conversion_buffer
3922 {
3923   int size;                     /* size of data.  */
3924   int on_stack;                 /* 1 if allocated by alloca.  */
3925   unsigned char *data;
3926 };
3927
3928 /* Don't use alloca for allocating memory space larger than this, lest
3929    we overflow their stack.  */
3930 #define MAX_ALLOCA 16*1024
3931
3932 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
3933 #define allocate_conversion_buffer(buf, len)            \
3934   do {                                                  \
3935     if (len < MAX_ALLOCA)                               \
3936       {                                                 \
3937         buf.data = (unsigned char *) alloca (len);      \
3938         buf.on_stack = 1;                               \
3939       }                                                 \
3940     else                                                \
3941       {                                                 \
3942         buf.data = (unsigned char *) xmalloc (len);     \
3943         buf.on_stack = 0;                               \
3944       }                                                 \
3945     buf.size = len;                                     \
3946   } while (0)
3947
3948 /* Double the allocated memory for *BUF.  */
3949 static void
3950 extend_conversion_buffer (buf)
3951      struct conversion_buffer *buf;
3952 {
3953   if (buf->on_stack)
3954     {
3955       unsigned char *save = buf->data;
3956       buf->data = (unsigned char *) xmalloc (buf->size * 2);
3957       bcopy (save, buf->data, buf->size);
3958       buf->on_stack = 0;
3959     }
3960   else
3961     {
3962       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3963     }
3964   buf->size *= 2;
3965 }
3966
3967 /* Free the allocated memory for BUF if it is not on stack.  */
3968 static void
3969 free_conversion_buffer (buf)
3970      struct conversion_buffer *buf;
3971 {
3972   if (!buf->on_stack)
3973     xfree (buf->data);
3974 }
3975
3976 int
3977 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3978      struct coding_system *coding;
3979      unsigned char *source, *destination;
3980      int src_bytes, dst_bytes, encodep;
3981 {
3982   struct ccl_program *ccl
3983     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3984   int result;
3985
3986   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3987   if (encodep)
3988     ccl->eol_type = coding->eol_type;
3989   ccl->multibyte = coding->src_multibyte;
3990   coding->produced = ccl_driver (ccl, source, destination,
3991                                  src_bytes, dst_bytes, &(coding->consumed));
3992   if (encodep)
3993     coding->produced_char = coding->produced;
3994   else
3995     {
3996       int bytes
3997         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3998       coding->produced = str_as_multibyte (destination, bytes,
3999                                            coding->produced,
4000                                            &(coding->produced_char));
4001     }
4002
4003   switch (ccl->status)
4004     {
4005     case CCL_STAT_SUSPEND_BY_SRC:
4006       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4007       break;
4008     case CCL_STAT_SUSPEND_BY_DST:
4009       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4010       break;
4011     case CCL_STAT_QUIT:
4012     case CCL_STAT_INVALID_CMD:
4013       coding->result = CODING_FINISH_INTERRUPT;
4014       break;
4015     default:
4016       coding->result = CODING_FINISH_NORMAL;
4017       break;
4018     }
4019   return coding->result;
4020 }
4021
4022 /* Decode EOL format of the text at PTR of BYTES length destructively
4023    according to CODING->eol_type.  This is called after the CCL
4024    program produced a decoded text at PTR.  If we do CRLF->LF
4025    conversion, update CODING->produced and CODING->produced_char.  */
4026
4027 static void
4028 decode_eol_post_ccl (coding, ptr, bytes)
4029      struct coding_system *coding;
4030      unsigned char *ptr;
4031      int bytes;
4032 {
4033   Lisp_Object val, saved_coding_symbol;
4034   unsigned char *pend = ptr + bytes;
4035   int dummy;
4036
4037   /* Remember the current coding system symbol.  We set it back when
4038      an inconsistent EOL is found so that `last-coding-system-used' is
4039      set to the coding system that doesn't specify EOL conversion.  */
4040   saved_coding_symbol = coding->symbol;
4041
4042   coding->spec.ccl.cr_carryover = 0;
4043   if (coding->eol_type == CODING_EOL_UNDECIDED)
4044     {
4045       /* Here, to avoid the call of setup_coding_system, we directly
4046          call detect_eol_type.  */
4047       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4048       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4049         coding->eol_type = CODING_EOL_LF;
4050       if (coding->eol_type != CODING_EOL_UNDECIDED)
4051         {
4052           val = Fget (coding->symbol, Qeol_type);
4053           if (VECTORP (val) && XVECTOR (val)->size == 3)
4054             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4055         }
4056       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4057     }
4058
4059   if (coding->eol_type == CODING_EOL_LF
4060       || coding->eol_type == CODING_EOL_UNDECIDED)
4061     {
4062       /* We have nothing to do.  */
4063       ptr = pend;
4064     }
4065   else if (coding->eol_type == CODING_EOL_CRLF)
4066     {
4067       unsigned char *pstart = ptr, *p = ptr;
4068
4069       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4070           && *(pend - 1) == '\r')
4071         {
4072           /* If the last character is CR, we can't handle it here
4073              because LF will be in the not-yet-decoded source text.
4074              Recorded that the CR is not yet processed.  */
4075           coding->spec.ccl.cr_carryover = 1;
4076           coding->produced--;
4077           coding->produced_char--;
4078           pend--;
4079         }
4080       while (ptr < pend)
4081         {
4082           if (*ptr == '\r')
4083             {
4084               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4085                 {
4086                   *p++ = '\n';
4087                   ptr += 2;
4088                 }
4089               else
4090                 {
4091                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4092                     goto undo_eol_conversion;
4093                   *p++ = *ptr++;
4094                 }
4095             }
4096           else if (*ptr == '\n'
4097                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4098             goto undo_eol_conversion;
4099           else
4100             *p++ = *ptr++;
4101           continue;
4102
4103         undo_eol_conversion:
4104           /* We have faced with inconsistent EOL format at PTR.
4105              Convert all LFs before PTR back to CRLFs.  */
4106           for (p--, ptr--; p >= pstart; p--)
4107             {
4108               if (*p == '\n')
4109                 *ptr-- = '\n', *ptr-- = '\r';
4110               else
4111                 *ptr-- = *p;
4112             }
4113           /*  If carryover is recorded, cancel it because we don't
4114               convert CRLF anymore.  */
4115           if (coding->spec.ccl.cr_carryover)
4116             {
4117               coding->spec.ccl.cr_carryover = 0;
4118               coding->produced++;
4119               coding->produced_char++;
4120               pend++;
4121             }
4122           p = ptr = pend;
4123           coding->eol_type = CODING_EOL_LF;
4124           coding->symbol = saved_coding_symbol;
4125         }
4126       if (p < pend)
4127         {
4128           /* As each two-byte sequence CRLF was converted to LF, (PEND
4129              - P) is the number of deleted characters.  */
4130           coding->produced -= pend - p;
4131           coding->produced_char -= pend - p;
4132         }
4133     }
4134   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4135     {
4136       unsigned char *p = ptr;
4137
4138       for (; ptr < pend; ptr++)
4139         {
4140           if (*ptr == '\r')
4141             *ptr = '\n';
4142           else if (*ptr == '\n'
4143                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4144             {
4145               for (; p < ptr; p++)
4146                 {
4147                   if (*p == '\n')
4148                     *p = '\r';
4149                 }
4150               ptr = pend;
4151               coding->eol_type = CODING_EOL_LF;
4152               coding->symbol = saved_coding_symbol;
4153             }
4154         }
4155     }
4156 }
4157
4158 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4159    decoding, it may detect coding system and format of end-of-line if
4160    those are not yet decided.  The source should be unibyte, the
4161    result is multibyte if CODING->dst_multibyte is nonzero, else
4162    unibyte.  */
4163
4164 int
4165 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4166      struct coding_system *coding;
4167      unsigned char *source, *destination;
4168      int src_bytes, dst_bytes;
4169 {
4170   if (coding->type == coding_type_undecided)
4171     detect_coding (coding, source, src_bytes);
4172
4173   if (coding->eol_type == CODING_EOL_UNDECIDED
4174       && coding->type != coding_type_ccl)
4175     detect_eol (coding, source, src_bytes);
4176
4177   coding->produced = coding->produced_char = 0;
4178   coding->consumed = coding->consumed_char = 0;
4179   coding->errors = 0;
4180   coding->result = CODING_FINISH_NORMAL;
4181
4182   switch (coding->type)
4183     {
4184     case coding_type_sjis:
4185       decode_coding_sjis_big5 (coding, source, destination,
4186                                src_bytes, dst_bytes, 1);
4187       break;
4188
4189     case coding_type_iso2022:
4190       decode_coding_iso2022 (coding, source, destination,
4191                              src_bytes, dst_bytes);
4192       break;
4193
4194     case coding_type_big5:
4195       decode_coding_sjis_big5 (coding, source, destination,
4196                                src_bytes, dst_bytes, 0);
4197       break;
4198
4199     case coding_type_emacs_mule:
4200       decode_coding_emacs_mule (coding, source, destination,
4201                                 src_bytes, dst_bytes);
4202       break;
4203
4204     case coding_type_ccl:
4205       if (coding->spec.ccl.cr_carryover)
4206         {
4207           /* Set the CR which is not processed by the previous call of
4208              decode_eol_post_ccl in DESTINATION.  */
4209           *destination = '\r';
4210           coding->produced++;
4211           coding->produced_char++;
4212           dst_bytes--;
4213         }
4214       ccl_coding_driver (coding, source,
4215                          destination + coding->spec.ccl.cr_carryover,
4216                          src_bytes, dst_bytes, 0);
4217       if (coding->eol_type != CODING_EOL_LF)
4218         decode_eol_post_ccl (coding, destination, coding->produced);
4219       break;
4220
4221     default:
4222       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4223     }
4224
4225   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4226       && coding->mode & CODING_MODE_LAST_BLOCK
4227       && coding->consumed == src_bytes)
4228     coding->result = CODING_FINISH_NORMAL;
4229
4230   if (coding->mode & CODING_MODE_LAST_BLOCK
4231       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4232     {
4233       unsigned char *src = source + coding->consumed;
4234       unsigned char *dst = destination + coding->produced;
4235
4236       src_bytes -= coding->consumed;
4237       coding->errors++;
4238       if (COMPOSING_P (coding))
4239         DECODE_COMPOSITION_END ('1');
4240       while (src_bytes--)
4241         {
4242           int c = *src++;
4243           dst += CHAR_STRING (c, dst);
4244           coding->produced_char++;
4245         }
4246       coding->consumed = coding->consumed_char = src - source;
4247       coding->produced = dst - destination;
4248       coding->result = CODING_FINISH_NORMAL;
4249     }
4250
4251   if (!coding->dst_multibyte)
4252     {
4253       coding->produced = str_as_unibyte (destination, coding->produced);
4254       coding->produced_char = coding->produced;
4255     }
4256
4257   return coding->result;
4258 }
4259
4260 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4261    multibyteness of the source is CODING->src_multibyte, the
4262    multibyteness of the result is always unibyte.  */
4263
4264 int
4265 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4266      struct coding_system *coding;
4267      unsigned char *source, *destination;
4268      int src_bytes, dst_bytes;
4269 {
4270   coding->produced = coding->produced_char = 0;
4271   coding->consumed = coding->consumed_char = 0;
4272   coding->errors = 0;
4273   coding->result = CODING_FINISH_NORMAL;
4274
4275   switch (coding->type)
4276     {
4277     case coding_type_sjis:
4278       encode_coding_sjis_big5 (coding, source, destination,
4279                                src_bytes, dst_bytes, 1);
4280       break;
4281
4282     case coding_type_iso2022:
4283       encode_coding_iso2022 (coding, source, destination,
4284                              src_bytes, dst_bytes);
4285       break;
4286
4287     case coding_type_big5:
4288       encode_coding_sjis_big5 (coding, source, destination,
4289                                src_bytes, dst_bytes, 0);
4290       break;
4291
4292     case coding_type_emacs_mule:
4293       encode_coding_emacs_mule (coding, source, destination,
4294                                 src_bytes, dst_bytes);
4295       break;
4296
4297     case coding_type_ccl:
4298       ccl_coding_driver (coding, source, destination,
4299                          src_bytes, dst_bytes, 1);
4300       break;
4301
4302     default:
4303       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4304     }
4305
4306   if (coding->mode & CODING_MODE_LAST_BLOCK
4307       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4308     {
4309       unsigned char *src = source + coding->consumed;
4310       unsigned char *src_end = src + src_bytes;
4311       unsigned char *dst = destination + coding->produced;
4312
4313       if (coding->type == coding_type_iso2022)
4314         ENCODE_RESET_PLANE_AND_REGISTER;
4315       if (COMPOSING_P (coding))
4316         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4317       if (coding->consumed < src_bytes)
4318         {
4319           int len = src_bytes - coding->consumed;
4320
4321           BCOPY_SHORT (source + coding->consumed, dst, len);
4322           if (coding->src_multibyte)
4323             len = str_as_unibyte (dst, len);
4324           dst += len;
4325           coding->consumed = src_bytes;
4326         }
4327       coding->produced = coding->produced_char = dst - destination;
4328       coding->result = CODING_FINISH_NORMAL;
4329     }
4330
4331   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4332       && coding->consumed == src_bytes)
4333     coding->result = CODING_FINISH_NORMAL;
4334
4335   return coding->result;
4336 }
4337
4338 /* Scan text in the region between *BEG and *END (byte positions),
4339    skip characters which we don't have to decode by coding system
4340    CODING at the head and tail, then set *BEG and *END to the region
4341    of the text we actually have to convert.  The caller should move
4342    the gap out of the region in advance if the region is from a
4343    buffer.
4344
4345    If STR is not NULL, *BEG and *END are indices into STR.  */
4346
4347 static void
4348 shrink_decoding_region (beg, end, coding, str)
4349      int *beg, *end;
4350      struct coding_system *coding;
4351      unsigned char *str;
4352 {
4353   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4354   int eol_conversion;
4355   Lisp_Object translation_table;
4356
4357   if (coding->type == coding_type_ccl
4358       || coding->type == coding_type_undecided
4359       || coding->eol_type != CODING_EOL_LF
4360       || !NILP (coding->post_read_conversion)
4361       || coding->composing != COMPOSITION_DISABLED)
4362     {
4363       /* We can't skip any data.  */
4364       return;
4365     }
4366   if (coding->type == coding_type_no_conversion
4367       || coding->type == coding_type_raw_text
4368       || coding->type == coding_type_emacs_mule)
4369     {
4370       /* We need no conversion, but don't have to skip any data here.
4371          Decoding routine handles them effectively anyway.  */
4372       return;
4373     }
4374
4375   translation_table = coding->translation_table_for_decode;
4376   if (NILP (translation_table) && !NILP (Venable_character_translation))
4377     translation_table = Vstandard_translation_table_for_decode;
4378   if (CHAR_TABLE_P (translation_table))
4379     {
4380       int i;
4381       for (i = 0; i < 128; i++)
4382         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4383           break;
4384       if (i < 128)
4385         /* Some ASCII character should be translated.  We give up
4386            shrinking.  */
4387         return;
4388     }
4389
4390   if (coding->heading_ascii >= 0)
4391     /* Detection routine has already found how much we can skip at the
4392        head.  */
4393     *beg += coding->heading_ascii;
4394
4395   if (str)
4396     {
4397       begp_orig = begp = str + *beg;
4398       endp_orig = endp = str + *end;
4399     }
4400   else
4401     {
4402       begp_orig = begp = BYTE_POS_ADDR (*beg);
4403       endp_orig = endp = begp + *end - *beg;
4404     }
4405
4406   eol_conversion = (coding->eol_type == CODING_EOL_CR
4407                     || coding->eol_type == CODING_EOL_CRLF);
4408
4409   switch (coding->type)
4410     {
4411     case coding_type_sjis:
4412     case coding_type_big5:
4413       /* We can skip all ASCII characters at the head.  */
4414       if (coding->heading_ascii < 0)
4415         {
4416           if (eol_conversion)
4417             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4418           else
4419             while (begp < endp && *begp < 0x80) begp++;
4420         }
4421       /* We can skip all ASCII characters at the tail except for the
4422          second byte of SJIS or BIG5 code.  */
4423       if (eol_conversion)
4424         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4425       else
4426         while (begp < endp && endp[-1] < 0x80) endp--;
4427       /* Do not consider LF as ascii if preceded by CR, since that
4428          confuses eol decoding. */
4429       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4430         endp++;
4431       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4432         endp++;
4433       break;
4434
4435     case coding_type_iso2022:
4436       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4437         /* We can't skip any data.  */
4438         break;
4439       if (coding->heading_ascii < 0)
4440         {
4441           /* We can skip all ASCII characters at the head except for a
4442              few control codes.  */
4443           while (begp < endp && (c = *begp) < 0x80
4444                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4445                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4446                  && (!eol_conversion || c != ISO_CODE_LF))
4447             begp++;
4448         }
4449       switch (coding->category_idx)
4450         {
4451         case CODING_CATEGORY_IDX_ISO_8_1:
4452         case CODING_CATEGORY_IDX_ISO_8_2:
4453           /* We can skip all ASCII characters at the tail.  */
4454           if (eol_conversion)
4455             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4456           else
4457             while (begp < endp && endp[-1] < 0x80) endp--;
4458           /* Do not consider LF as ascii if preceded by CR, since that
4459              confuses eol decoding. */
4460           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4461             endp++;
4462           break;
4463
4464         case CODING_CATEGORY_IDX_ISO_7:
4465         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4466           {
4467             /* We can skip all charactes at the tail except for 8-bit
4468                codes and ESC and the following 2-byte at the tail.  */
4469             unsigned char *eight_bit = NULL;
4470
4471             if (eol_conversion)
4472               while (begp < endp
4473                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4474                 {
4475                   if (!eight_bit && c & 0x80) eight_bit = endp;
4476                   endp--;
4477                 }
4478             else
4479               while (begp < endp
4480                      && (c = endp[-1]) != ISO_CODE_ESC)
4481                 {
4482                   if (!eight_bit && c & 0x80) eight_bit = endp;
4483                   endp--;
4484                 }
4485             /* Do not consider LF as ascii if preceded by CR, since that
4486                confuses eol decoding. */
4487             if (begp < endp && endp < endp_orig
4488                 && endp[-1] == '\r' && endp[0] == '\n')
4489               endp++;
4490             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4491               {
4492                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4493                   /* This is an ASCII designation sequence.  We can
4494                      surely skip the tail.  But, if we have
4495                      encountered an 8-bit code, skip only the codes
4496                      after that.  */
4497                   endp = eight_bit ? eight_bit : endp + 2;
4498                 else
4499                   /* Hmmm, we can't skip the tail.  */
4500                   endp = endp_orig;
4501               }
4502             else if (eight_bit)
4503               endp = eight_bit;
4504           }
4505         }
4506       break;
4507
4508     default:
4509       abort ();
4510     }
4511   *beg += begp - begp_orig;
4512   *end += endp - endp_orig;
4513   return;
4514 }
4515
4516 /* Like shrink_decoding_region but for encoding.  */
4517
4518 static void
4519 shrink_encoding_region (beg, end, coding, str)
4520      int *beg, *end;
4521      struct coding_system *coding;
4522      unsigned char *str;
4523 {
4524   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4525   int eol_conversion;
4526   Lisp_Object translation_table;
4527
4528   if (coding->type == coding_type_ccl
4529       || coding->eol_type == CODING_EOL_CRLF
4530       || coding->eol_type == CODING_EOL_CR
4531       || coding->cmp_data && coding->cmp_data->used > 0)
4532     {
4533       /* We can't skip any data.  */
4534       return;
4535     }
4536   if (coding->type == coding_type_no_conversion
4537       || coding->type == coding_type_raw_text
4538       || coding->type == coding_type_emacs_mule
4539       || coding->type == coding_type_undecided)
4540     {
4541       /* We need no conversion, but don't have to skip any data here.
4542          Encoding routine handles them effectively anyway.  */
4543       return;
4544     }
4545
4546   translation_table = coding->translation_table_for_encode;
4547   if (NILP (translation_table) && !NILP (Venable_character_translation))
4548     translation_table = Vstandard_translation_table_for_encode;
4549   if (CHAR_TABLE_P (translation_table))
4550     {
4551       int i;
4552       for (i = 0; i < 128; i++)
4553         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4554           break;
4555       if (i < 128)
4556         /* Some ASCII character should be tranlsated.  We give up
4557            shrinking.  */
4558         return;
4559     }
4560
4561   if (str)
4562     {
4563       begp_orig = begp = str + *beg;
4564       endp_orig = endp = str + *end;
4565     }
4566   else
4567     {
4568       begp_orig = begp = BYTE_POS_ADDR (*beg);
4569       endp_orig = endp = begp + *end - *beg;
4570     }
4571
4572   eol_conversion = (coding->eol_type == CODING_EOL_CR
4573                     || coding->eol_type == CODING_EOL_CRLF);
4574
4575   /* Here, we don't have to check coding->pre_write_conversion because
4576      the caller is expected to have handled it already.  */
4577   switch (coding->type)
4578     {
4579     case coding_type_iso2022:
4580       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4581         /* We can't skip any data.  */
4582         break;
4583       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4584         {
4585           unsigned char *bol = begp;
4586           while (begp < endp && *begp < 0x80)
4587             {
4588               begp++;
4589               if (begp[-1] == '\n')
4590                 bol = begp;
4591             }
4592           begp = bol;
4593           goto label_skip_tail;
4594         }
4595       /* fall down ... */
4596
4597     case coding_type_sjis:
4598     case coding_type_big5:
4599       /* We can skip all ASCII characters at the head and tail.  */
4600       if (eol_conversion)
4601         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4602       else
4603         while (begp < endp && *begp < 0x80) begp++;
4604     label_skip_tail:
4605       if (eol_conversion)
4606         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4607       else
4608         while (begp < endp && *(endp - 1) < 0x80) endp--;
4609       break;
4610
4611     default:
4612       abort ();
4613     }
4614
4615   *beg += begp - begp_orig;
4616   *end += endp - endp_orig;
4617   return;
4618 }
4619
4620 /* As shrinking conversion region requires some overhead, we don't try
4621    shrinking if the length of conversion region is less than this
4622    value.  */
4623 static int shrink_conversion_region_threshhold = 1024;
4624
4625 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4626   do {                                                                  \
4627     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4628       {                                                                 \
4629         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4630         else shrink_decoding_region (beg, end, coding, str);            \
4631       }                                                                 \
4632   } while (0)
4633
4634 static Lisp_Object
4635 code_convert_region_unwind (dummy)
4636      Lisp_Object dummy;
4637 {
4638   inhibit_pre_post_conversion = 0;
4639   return Qnil;
4640 }
4641
4642 /* Store information about all compositions in the range FROM and TO
4643    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4644    buffer or a string, defaults to the current buffer.  */
4645
4646 void
4647 coding_save_composition (coding, from, to, obj)
4648      struct coding_system *coding;
4649      int from, to;
4650      Lisp_Object obj;
4651 {
4652   Lisp_Object prop;
4653   int start, end;
4654
4655   if (coding->composing == COMPOSITION_DISABLED)
4656     return;
4657   if (!coding->cmp_data)
4658     coding_allocate_composition_data (coding, from);
4659   if (!find_composition (from, to, &start, &end, &prop, obj)
4660       || end > to)
4661     return;
4662   if (start < from
4663       && (!find_composition (end, to, &start, &end, &prop, obj)
4664           || end > to))
4665     return;
4666   coding->composing = COMPOSITION_NO;
4667   do
4668     {
4669       if (COMPOSITION_VALID_P (start, end, prop))
4670         {
4671           enum composition_method method = COMPOSITION_METHOD (prop);
4672           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4673               >= COMPOSITION_DATA_SIZE)
4674             coding_allocate_composition_data (coding, from);
4675           /* For relative composition, we remember start and end
4676              positions, for the other compositions, we also remember
4677              components.  */
4678           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4679           if (method != COMPOSITION_RELATIVE)
4680             {
4681               /* We must store a*/
4682               Lisp_Object val, ch;
4683
4684               val = COMPOSITION_COMPONENTS (prop);
4685               if (CONSP (val))
4686                 while (CONSP (val))
4687                   {
4688                     ch = XCAR (val), val = XCDR (val);
4689                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4690                   }
4691               else if (VECTORP (val) || STRINGP (val))
4692                 {
4693                   int len = (VECTORP (val)
4694                              ? XVECTOR (val)->size : XSTRING (val)->size);
4695                   int i;
4696                   for (i = 0; i < len; i++)
4697                     {
4698                       ch = (STRINGP (val)
4699                             ? Faref (val, make_number (i))
4700                             : XVECTOR (val)->contents[i]);
4701                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4702                     }
4703                 }
4704               else              /* INTEGERP (val) */
4705                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4706             }
4707           CODING_ADD_COMPOSITION_END (coding, end - from);
4708         }
4709       start = end;
4710     }
4711   while (start < to
4712          && find_composition (start, to, &start, &end, &prop, obj)
4713          && end <= to);
4714
4715   /* Make coding->cmp_data point to the first memory block.  */
4716   while (coding->cmp_data->prev)
4717     coding->cmp_data = coding->cmp_data->prev;
4718   coding->cmp_data_start = 0;
4719 }
4720
4721 /* Reflect the saved information about compositions to OBJ.
4722    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4723    is a buffer or a string, defaults to the current buffer.  */
4724
4725 void
4726 coding_restore_composition (coding, obj)
4727      struct coding_system *coding;
4728      Lisp_Object obj;
4729 {
4730   struct composition_data *cmp_data = coding->cmp_data;
4731
4732   if (!cmp_data)
4733     return;
4734
4735   while (cmp_data->prev)
4736     cmp_data = cmp_data->prev;
4737
4738   while (cmp_data)
4739     {
4740       int i;
4741
4742       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
4743            i += cmp_data->data[i])
4744         {
4745           int *data = cmp_data->data + i;
4746           enum composition_method method = (enum composition_method) data[3];
4747           Lisp_Object components;
4748
4749           if (method == COMPOSITION_RELATIVE)
4750             components = Qnil;
4751           else
4752             {
4753               int len = data[0] - 4, j;
4754               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4755
4756               for (j = 0; j < len; j++)
4757                 args[j] = make_number (data[4 + j]);
4758               components = (method == COMPOSITION_WITH_ALTCHARS
4759                             ? Fstring (len, args) : Fvector (len, args));
4760             }
4761           compose_text (data[1], data[2], components, Qnil, obj);
4762         }
4763       cmp_data = cmp_data->next;
4764     }
4765 }
4766
4767 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4768    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4769    coding system CODING, and return the status code of code conversion
4770    (currently, this value has no meaning).
4771
4772    How many characters (and bytes) are converted to how many
4773    characters (and bytes) are recorded in members of the structure
4774    CODING.
4775
4776    If REPLACE is nonzero, we do various things as if the original text
4777    is deleted and a new text is inserted.  See the comments in
4778    replace_range (insdel.c) to know what we are doing.
4779
4780    If REPLACE is zero, it is assumed that the source text is unibyte.
4781    Otherwize, it is assumed that the source text is multibyte.  */
4782
4783 int
4784 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4785      int from, from_byte, to, to_byte, encodep, replace;
4786      struct coding_system *coding;
4787 {
4788   int len = to - from, len_byte = to_byte - from_byte;
4789   int require, inserted, inserted_byte;
4790   int head_skip, tail_skip, total_skip = 0;
4791   Lisp_Object saved_coding_symbol;
4792   int first = 1;
4793   unsigned char *src, *dst;
4794   Lisp_Object deletion;
4795   int orig_point = PT, orig_len = len;
4796   int prev_Z;
4797   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4798
4799   coding->src_multibyte = replace && multibyte_p;
4800   coding->dst_multibyte = multibyte_p;
4801
4802   deletion = Qnil;
4803   saved_coding_symbol = Qnil;
4804
4805   if (from < PT && PT < to)
4806     {
4807       TEMP_SET_PT_BOTH (from, from_byte);
4808       orig_point = from;
4809     }
4810
4811   if (replace)
4812     {
4813       int saved_from = from;
4814       int saved_inhibit_modification_hooks;
4815
4816       prepare_to_modify_buffer (from, to, &from);
4817       if (saved_from != from)
4818         {
4819           to = from + len;
4820           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4821           len_byte = to_byte - from_byte;
4822         }
4823
4824       /* The code conversion routine can not preserve text properties
4825          for now.  So, we must remove all text properties in the
4826          region.  Here, we must suppress all modification hooks.  */
4827       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4828       inhibit_modification_hooks = 1;
4829       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4830       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4831     }
4832
4833   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4834     {
4835       /* We must detect encoding of text and eol format.  */
4836
4837       if (from < GPT && to > GPT)
4838         move_gap_both (from, from_byte);
4839       if (coding->type == coding_type_undecided)
4840         {
4841           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4842           if (coding->type == coding_type_undecided)
4843             {
4844               /* It seems that the text contains only ASCII, but we
4845                  should not leave it undecided because the deeper
4846                  decoding routine (decode_coding) tries to detect the
4847                  encodings again in vain.  */
4848               coding->type = coding_type_emacs_mule;
4849               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
4850             }
4851         }
4852       if (coding->eol_type == CODING_EOL_UNDECIDED
4853           && coding->type != coding_type_ccl)
4854         {
4855           saved_coding_symbol = coding->symbol;
4856           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4857           if (coding->eol_type == CODING_EOL_UNDECIDED)
4858             coding->eol_type = CODING_EOL_LF;
4859           /* We had better recover the original eol format if we
4860              encounter an inconsitent eol format while decoding.  */
4861           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4862         }
4863     }
4864
4865   /* Now we convert the text.  */
4866
4867   /* For encoding, we must process pre-write-conversion in advance.  */
4868   if (! inhibit_pre_post_conversion
4869       && encodep
4870       && SYMBOLP (coding->pre_write_conversion)
4871       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4872     {
4873       /* The function in pre-write-conversion may put a new text in a
4874          new buffer.  */
4875       struct buffer *prev = current_buffer;
4876       Lisp_Object new;
4877       int count = specpdl_ptr - specpdl;
4878
4879       record_unwind_protect (code_convert_region_unwind, Qnil);
4880       /* We should not call any more pre-write/post-read-conversion
4881          functions while this pre-write-conversion is running.  */
4882       inhibit_pre_post_conversion = 1;
4883       call2 (coding->pre_write_conversion,
4884              make_number (from), make_number (to));
4885       inhibit_pre_post_conversion = 0;
4886       /* Discard the unwind protect.  */
4887       specpdl_ptr--;
4888
4889       if (current_buffer != prev)
4890         {
4891           len = ZV - BEGV;
4892           new = Fcurrent_buffer ();
4893           set_buffer_internal_1 (prev);
4894           del_range_2 (from, from_byte, to, to_byte, 0);
4895           TEMP_SET_PT_BOTH (from, from_byte);
4896           insert_from_buffer (XBUFFER (new), 1, len, 0);
4897           Fkill_buffer (new);
4898           if (orig_point >= to)
4899             orig_point += len - orig_len;
4900           else if (orig_point > from)
4901             orig_point = from;
4902           orig_len = len;
4903           to = from + len;
4904           from_byte = CHAR_TO_BYTE (from);
4905           to_byte = CHAR_TO_BYTE (to);
4906           len_byte = to_byte - from_byte;
4907           TEMP_SET_PT_BOTH (from, from_byte);
4908         }
4909     }
4910
4911   if (replace)
4912     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4913
4914   if (coding->composing != COMPOSITION_DISABLED)
4915     {
4916       if (encodep)
4917         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4918       else
4919         coding_allocate_composition_data (coding, from);
4920     }
4921
4922   /* Try to skip the heading and tailing ASCIIs.  */
4923   if (coding->type != coding_type_ccl)
4924     {
4925       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4926
4927       if (from < GPT && GPT < to)
4928         move_gap_both (from, from_byte);
4929       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4930       if (from_byte == to_byte
4931           && (encodep || NILP (coding->post_read_conversion))
4932           && ! CODING_REQUIRE_FLUSHING (coding))
4933         {
4934           coding->produced = len_byte;
4935           coding->produced_char = len;
4936           if (!replace)
4937             /* We must record and adjust for this new text now.  */
4938             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4939           return 0;
4940         }
4941
4942       head_skip = from_byte - from_byte_orig;
4943       tail_skip = to_byte_orig - to_byte;
4944       total_skip = head_skip + tail_skip;
4945       from += head_skip;
4946       to -= tail_skip;
4947       len -= total_skip; len_byte -= total_skip;
4948     }
4949
4950   /* For converion, we must put the gap before the text in addition to
4951      making the gap larger for efficient decoding.  The required gap
4952      size starts from 2000 which is the magic number used in make_gap.
4953      But, after one batch of conversion, it will be incremented if we
4954      find that it is not enough .  */
4955   require = 2000;
4956
4957   if (GAP_SIZE  < require)
4958     make_gap (require - GAP_SIZE);
4959   move_gap_both (from, from_byte);
4960
4961   inserted = inserted_byte = 0;
4962
4963   GAP_SIZE += len_byte;
4964   ZV -= len;
4965   Z -= len;
4966   ZV_BYTE -= len_byte;
4967   Z_BYTE -= len_byte;
4968
4969   if (GPT - BEG < BEG_UNCHANGED)
4970     BEG_UNCHANGED = GPT - BEG;
4971   if (Z - GPT < END_UNCHANGED)
4972     END_UNCHANGED = Z - GPT;
4973
4974   if (!encodep && coding->src_multibyte)
4975     {
4976       /* Decoding routines expects that the source text is unibyte.
4977          We must convert 8-bit characters of multibyte form to
4978          unibyte.  */
4979       int len_byte_orig = len_byte;
4980       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4981       if (len_byte < len_byte_orig)
4982         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4983                     len_byte);
4984       coding->src_multibyte = 0;
4985     }
4986
4987   for (;;)
4988     {
4989       int result;
4990
4991       /* The buffer memory is now:
4992          +--------+converted-text+---------+-------original-text-------+---+
4993          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4994                   |<---------------------- GAP ----------------------->|  */
4995       src = GAP_END_ADDR - len_byte;
4996       dst = GPT_ADDR + inserted_byte;
4997
4998       if (encodep)
4999         result = encode_coding (coding, src, dst, len_byte, 0);
5000       else
5001         result = decode_coding (coding, src, dst, len_byte, 0);
5002
5003       /* The buffer memory is now:
5004          +--------+-------converted-text----+--+------original-text----+---+
5005          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5006                   |<---------------------- GAP ----------------------->|  */
5007
5008       inserted += coding->produced_char;
5009       inserted_byte += coding->produced;
5010       len_byte -= coding->consumed;
5011
5012       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5013         {
5014           coding_allocate_composition_data (coding, from + inserted);
5015           continue;
5016         }
5017
5018       src += coding->consumed;
5019       dst += coding->produced;
5020
5021       if (result == CODING_FINISH_NORMAL)
5022         {
5023           src += len_byte;
5024           break;
5025         }
5026       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5027         {
5028           unsigned char *pend = dst, *p = pend - inserted_byte;
5029           Lisp_Object eol_type;
5030
5031           /* Encode LFs back to the original eol format (CR or CRLF).  */
5032           if (coding->eol_type == CODING_EOL_CR)
5033             {
5034               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5035             }
5036           else
5037             {
5038               int count = 0;
5039
5040               while (p < pend) if (*p++ == '\n') count++;
5041               if (src - dst < count)
5042                 {
5043                   /* We don't have sufficient room for encoding LFs
5044                      back to CRLF.  We must record converted and
5045                      not-yet-converted text back to the buffer
5046                      content, enlarge the gap, then record them out of
5047                      the buffer contents again.  */
5048                   int add = len_byte + inserted_byte;
5049
5050                   GAP_SIZE -= add;
5051                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5052                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5053                   make_gap (count - GAP_SIZE);
5054                   GAP_SIZE += add;
5055                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5056                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5057                   /* Don't forget to update SRC, DST, and PEND.  */
5058                   src = GAP_END_ADDR - len_byte;
5059                   dst = GPT_ADDR + inserted_byte;
5060                   pend = dst;
5061                 }
5062               inserted += count;
5063               inserted_byte += count;
5064               coding->produced += count;
5065               p = dst = pend + count;
5066               while (count)
5067                 {
5068                   *--p = *--pend;
5069                   if (*p == '\n') count--, *--p = '\r';
5070                 }
5071             }
5072
5073           /* Suppress eol-format conversion in the further conversion.  */
5074           coding->eol_type = CODING_EOL_LF;
5075
5076           /* Set the coding system symbol to that for Unix-like EOL.  */
5077           eol_type = Fget (saved_coding_symbol, Qeol_type);
5078           if (VECTORP (eol_type)
5079               && XVECTOR (eol_type)->size == 3
5080               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5081             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5082           else
5083             coding->symbol = saved_coding_symbol;
5084
5085           continue;
5086         }
5087       if (len_byte <= 0)
5088         {
5089           if (coding->type != coding_type_ccl
5090               || coding->mode & CODING_MODE_LAST_BLOCK)
5091             break;
5092           coding->mode |= CODING_MODE_LAST_BLOCK;
5093           continue;
5094         }
5095       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5096         {
5097           /* The source text ends in invalid codes.  Let's just
5098              make them valid buffer contents, and finish conversion.  */
5099           inserted += len_byte;
5100           inserted_byte += len_byte;
5101           while (len_byte--)
5102             *dst++ = *src++;
5103           break;
5104         }
5105       if (result == CODING_FINISH_INTERRUPT)
5106         {
5107           /* The conversion procedure was interrupted by a user.  */
5108           break;
5109         }
5110       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5111       if (coding->consumed < 1)
5112         {
5113           /* It's quite strange to require more memory without
5114              consuming any bytes.  Perhaps CCL program bug.  */
5115           break;
5116         }
5117       if (first)
5118         {
5119           /* We have just done the first batch of conversion which was
5120              stoped because of insufficient gap.  Let's reconsider the
5121              required gap size (i.e. SRT - DST) now.
5122
5123              We have converted ORIG bytes (== coding->consumed) into
5124              NEW bytes (coding->produced).  To convert the remaining
5125              LEN bytes, we may need REQUIRE bytes of gap, where:
5126                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5127                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5128              Here, we are sure that NEW >= ORIG.  */
5129           float ratio = coding->produced - coding->consumed;
5130           ratio /= coding->consumed;
5131           require = len_byte * ratio;
5132           first = 0;
5133         }
5134       if ((src - dst) < (require + 2000))
5135         {
5136           /* See the comment above the previous call of make_gap.  */
5137           int add = len_byte + inserted_byte;
5138
5139           GAP_SIZE -= add;
5140           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5141           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5142           make_gap (require + 2000);
5143           GAP_SIZE += add;
5144           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5145           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5146         }
5147     }
5148   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5149
5150   if (encodep && coding->dst_multibyte)
5151     {
5152       /* The output is unibyte.  We must convert 8-bit characters to
5153          multibyte form.  */
5154       if (inserted_byte * 2 > GAP_SIZE)
5155         {
5156           GAP_SIZE -= inserted_byte;
5157           ZV += inserted_byte; Z += inserted_byte;
5158           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5159           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5160           make_gap (inserted_byte - GAP_SIZE);
5161           GAP_SIZE += inserted_byte;
5162           ZV -= inserted_byte; Z -= inserted_byte;
5163           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5164           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5165         }
5166       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5167     }
5168
5169   /* If we have shrinked the conversion area, adjust it now.  */
5170   if (total_skip > 0)
5171     {
5172       if (tail_skip > 0)
5173         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5174       inserted += total_skip; inserted_byte += total_skip;
5175       GAP_SIZE += total_skip;
5176       GPT -= head_skip; GPT_BYTE -= head_skip;
5177       ZV -= total_skip; ZV_BYTE -= total_skip;
5178       Z -= total_skip; Z_BYTE -= total_skip;
5179       from -= head_skip; from_byte -= head_skip;
5180       to += tail_skip; to_byte += tail_skip;
5181     }
5182
5183   prev_Z = Z;
5184   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5185   inserted = Z - prev_Z;
5186
5187   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5188     coding_restore_composition (coding, Fcurrent_buffer ());
5189   coding_free_composition_data (coding);
5190
5191   if (! inhibit_pre_post_conversion
5192       && ! encodep && ! NILP (coding->post_read_conversion))
5193     {
5194       Lisp_Object val;
5195       int count = specpdl_ptr - specpdl;
5196
5197       if (from != PT)
5198         TEMP_SET_PT_BOTH (from, from_byte);
5199       prev_Z = Z;
5200       record_unwind_protect (code_convert_region_unwind, Qnil);
5201       /* We should not call any more pre-write/post-read-conversion
5202          functions while this post-read-conversion is running.  */
5203       inhibit_pre_post_conversion = 1;
5204       val = call1 (coding->post_read_conversion, make_number (inserted));
5205       inhibit_pre_post_conversion = 0;
5206       /* Discard the unwind protect.  */
5207       specpdl_ptr--;
5208       CHECK_NUMBER (val, 0);
5209       inserted += Z - prev_Z;
5210     }
5211
5212   if (orig_point >= from)
5213     {
5214       if (orig_point >= from + orig_len)
5215         orig_point += inserted - orig_len;
5216       else
5217         orig_point = from;
5218       TEMP_SET_PT (orig_point);
5219     }
5220
5221   if (replace)
5222     {
5223       signal_after_change (from, to - from, inserted);
5224       update_compositions (from, from + inserted, CHECK_BORDER);
5225     }
5226
5227   {
5228     coding->consumed = to_byte - from_byte;
5229     coding->consumed_char = to - from;
5230     coding->produced = inserted_byte;
5231     coding->produced_char = inserted;
5232   }
5233
5234   return 0;
5235 }
5236
5237 Lisp_Object
5238 run_pre_post_conversion_on_str (str, coding, encodep)
5239      Lisp_Object str;
5240      struct coding_system *coding;
5241      int encodep;
5242 {
5243   int count = specpdl_ptr - specpdl;
5244   struct gcpro gcpro1;
5245   struct buffer *prev = current_buffer;
5246   int multibyte = STRING_MULTIBYTE (str);
5247
5248   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5249   record_unwind_protect (code_convert_region_unwind, Qnil);
5250   GCPRO1 (str);
5251   temp_output_buffer_setup (" *code-converting-work*");
5252   set_buffer_internal (XBUFFER (Vstandard_output));
5253   /* We must insert the contents of STR as is without
5254      unibyte<->multibyte conversion.  For that, we adjust the
5255      multibyteness of the working buffer to that of STR.  */
5256   Ferase_buffer ();
5257   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5258   insert_from_string (str, 0, 0,
5259                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5260   UNGCPRO;
5261   inhibit_pre_post_conversion = 1;
5262   if (encodep)
5263     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5264   else
5265     {
5266       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5267       call1 (coding->post_read_conversion, make_number (Z - BEG));
5268     }
5269   inhibit_pre_post_conversion = 0;
5270   str = make_buffer_string (BEG, Z, 1);
5271   return unbind_to (count, str);
5272 }
5273
5274 Lisp_Object
5275 decode_coding_string (str, coding, nocopy)
5276      Lisp_Object str;
5277      struct coding_system *coding;
5278      int nocopy;
5279 {
5280   int len;
5281   struct conversion_buffer buf;
5282   int from, to, to_byte;
5283   struct gcpro gcpro1;
5284   Lisp_Object saved_coding_symbol;
5285   int result;
5286   int require_decoding;
5287   int shrinked_bytes = 0;
5288   Lisp_Object newstr;
5289   int consumed, consumed_char, produced, produced_char;
5290
5291   from = 0;
5292   to = XSTRING (str)->size;
5293   to_byte = STRING_BYTES (XSTRING (str));
5294
5295   saved_coding_symbol = Qnil;
5296   if (CODING_REQUIRE_DETECTION (coding))
5297     {
5298       /* See the comments in code_convert_region.  */
5299       if (coding->type == coding_type_undecided)
5300         {
5301           detect_coding (coding, XSTRING (str)->data, to_byte);
5302           if (coding->type == coding_type_undecided)
5303             coding->type = coding_type_emacs_mule;
5304         }
5305       if (coding->eol_type == CODING_EOL_UNDECIDED
5306           && coding->type != coding_type_ccl)
5307         {
5308           saved_coding_symbol = coding->symbol;
5309           detect_eol (coding, XSTRING (str)->data, to_byte);
5310           if (coding->eol_type == CODING_EOL_UNDECIDED)
5311             coding->eol_type = CODING_EOL_LF;
5312           /* We had better recover the original eol format if we
5313              encounter an inconsitent eol format while decoding.  */
5314           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5315         }
5316     }
5317
5318   coding->src_multibyte = 0;
5319   coding->dst_multibyte = (coding->type != coding_type_no_conversion
5320                            && coding->type != coding_type_raw_text);
5321   require_decoding = CODING_REQUIRE_DECODING (coding);
5322
5323   if (STRING_MULTIBYTE (str))
5324     {
5325       /* Decoding routines expect the source text to be unibyte.  */
5326       str = Fstring_as_unibyte (str);
5327       to_byte = STRING_BYTES (XSTRING (str));
5328       nocopy = 1;
5329     }
5330
5331   /* Try to skip the heading and tailing ASCIIs.  */
5332   if (require_decoding && coding->type != coding_type_ccl)
5333     {
5334       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5335                                 0);
5336       if (from == to_byte)
5337         require_decoding = 0;
5338       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5339     }
5340
5341   if (!require_decoding)
5342     {
5343       coding->consumed = STRING_BYTES (XSTRING (str));
5344       coding->consumed_char = XSTRING (str)->size;
5345       if (coding->dst_multibyte)
5346         {
5347           str = Fstring_as_multibyte (str);
5348           nocopy = 1;
5349         }
5350       coding->produced = STRING_BYTES (XSTRING (str));
5351       coding->produced_char = XSTRING (str)->size;
5352       return (nocopy ? str : Fcopy_sequence (str));
5353     }
5354
5355   if (coding->composing != COMPOSITION_DISABLED)
5356     coding_allocate_composition_data (coding, from);
5357   len = decoding_buffer_size (coding, to_byte - from);
5358   allocate_conversion_buffer (buf, len);
5359
5360   consumed = consumed_char = produced = produced_char = 0;
5361   while (1)
5362     {
5363       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5364                               buf.data + produced, to_byte - from - consumed,
5365                               buf.size - produced);
5366       consumed += coding->consumed;
5367       consumed_char += coding->consumed_char;
5368       produced += coding->produced;
5369       produced_char += coding->produced_char;
5370       if (result == CODING_FINISH_NORMAL
5371           || (result == CODING_FINISH_INSUFFICIENT_SRC
5372               && coding->consumed == 0))
5373         break;
5374       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5375         coding_allocate_composition_data (coding, from + produced_char);
5376       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5377         extend_conversion_buffer (&buf);
5378       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5379         {
5380           /* Recover the original EOL format.  */
5381           if (coding->eol_type == CODING_EOL_CR)
5382             {
5383               unsigned char *p;
5384               for (p = buf.data; p < buf.data + produced; p++)
5385                 if (*p == '\n') *p = '\r';
5386             }
5387           else if (coding->eol_type == CODING_EOL_CRLF)
5388             {
5389               int num_eol = 0;
5390               unsigned char *p0, *p1;
5391               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5392                 if (*p0 == '\n') num_eol++;
5393               if (produced + num_eol >= buf.size)
5394                 extend_conversion_buffer (&buf);
5395               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5396                 {
5397                   *--p1 = *--p0;
5398                   if (*p0 == '\n') *--p1 = '\r';
5399                 }
5400               produced += num_eol;
5401               produced_char += num_eol;
5402             }
5403           coding->eol_type = CODING_EOL_LF;
5404           coding->symbol = saved_coding_symbol;
5405         }
5406     }
5407
5408   coding->consumed = consumed;
5409   coding->consumed_char = consumed_char;
5410   coding->produced = produced;
5411   coding->produced_char = produced_char;
5412
5413   if (coding->dst_multibyte)
5414     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5415                                            produced + shrinked_bytes);
5416   else
5417     newstr = make_uninit_string (produced + shrinked_bytes);
5418   if (from > 0)
5419     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5420   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5421   if (shrinked_bytes > from)
5422     bcopy (XSTRING (str)->data + to_byte,
5423            XSTRING (newstr)->data + from + produced,
5424            shrinked_bytes - from);
5425   free_conversion_buffer (&buf);
5426
5427   if (coding->cmp_data && coding->cmp_data->used)
5428     coding_restore_composition (coding, newstr);
5429   coding_free_composition_data (coding);
5430
5431   if (SYMBOLP (coding->post_read_conversion)
5432       && !NILP (Ffboundp (coding->post_read_conversion)))
5433     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5434
5435   return newstr;
5436 }
5437
5438 Lisp_Object
5439 encode_coding_string (str, coding, nocopy)
5440      Lisp_Object str;
5441      struct coding_system *coding;
5442      int nocopy;
5443 {
5444   int len;
5445   struct conversion_buffer buf;
5446   int from, to, to_byte;
5447   struct gcpro gcpro1;
5448   Lisp_Object saved_coding_symbol;
5449   int result;
5450   int shrinked_bytes = 0;
5451   Lisp_Object newstr;
5452   int consumed, consumed_char, produced, produced_char;
5453
5454   if (SYMBOLP (coding->pre_write_conversion)
5455       && !NILP (Ffboundp (coding->pre_write_conversion)))
5456     str = run_pre_post_conversion_on_str (str, coding, 1);
5457
5458   from = 0;
5459   to = XSTRING (str)->size;
5460   to_byte = STRING_BYTES (XSTRING (str));
5461
5462   saved_coding_symbol = Qnil;
5463
5464   /* Encoding routines determine the multibyteness of the source text
5465      by coding->src_multibyte.  */
5466   coding->src_multibyte = STRING_MULTIBYTE (str);
5467   coding->dst_multibyte = 0;
5468   if (! CODING_REQUIRE_ENCODING (coding))
5469     {
5470       coding->consumed = STRING_BYTES (XSTRING (str));
5471       coding->consumed_char = XSTRING (str)->size;
5472       if (STRING_MULTIBYTE (str))
5473         {
5474           str = Fstring_as_unibyte (str);
5475           nocopy = 1;
5476         }
5477       coding->produced = STRING_BYTES (XSTRING (str));
5478       coding->produced_char = XSTRING (str)->size;
5479       return (nocopy ? str : Fcopy_sequence (str));
5480     }
5481
5482   if (coding->composing != COMPOSITION_DISABLED)
5483     coding_save_composition (coding, from, to, str);
5484
5485   /* Try to skip the heading and tailing ASCIIs.  */
5486   if (coding->type != coding_type_ccl)
5487     {
5488       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5489                                 1);
5490       if (from == to_byte)
5491         return (nocopy ? str : Fcopy_sequence (str));
5492       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5493     }
5494
5495   len = encoding_buffer_size (coding, to_byte - from);
5496   allocate_conversion_buffer (buf, len);
5497
5498   consumed = consumed_char = produced = produced_char = 0;
5499   while (1)
5500     {
5501       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5502                               buf.data + produced, to_byte - from - consumed,
5503                               buf.size - produced);
5504       consumed += coding->consumed;
5505       consumed_char += coding->consumed_char;
5506       produced += coding->produced;
5507       produced_char += coding->produced_char;
5508       if (result == CODING_FINISH_NORMAL
5509           || (result == CODING_FINISH_INSUFFICIENT_SRC
5510               && coding->consumed == 0))
5511         break;
5512       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
5513       extend_conversion_buffer (&buf);
5514     }
5515
5516   coding->consumed = consumed;
5517   coding->consumed_char = consumed_char;
5518   coding->produced = produced;
5519   coding->produced_char = produced_char;
5520
5521   newstr = make_uninit_string (produced + shrinked_bytes);
5522   if (from > 0)
5523     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5524   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5525   if (shrinked_bytes > from)
5526     bcopy (XSTRING (str)->data + to_byte,
5527            XSTRING (newstr)->data + from + produced,
5528            shrinked_bytes - from);
5529
5530   free_conversion_buffer (&buf);
5531   coding_free_composition_data (coding);
5532
5533   return newstr;
5534 }
5535
5536 \f
5537 #ifdef emacs
5538 /*** 8. Emacs Lisp library functions ***/
5539
5540 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5541   "Return t if OBJECT is nil or a coding-system.\n\
5542 See the documentation of `make-coding-system' for information\n\
5543 about coding-system objects.")
5544   (obj)
5545      Lisp_Object obj;
5546 {
5547   if (NILP (obj))
5548     return Qt;
5549   if (!SYMBOLP (obj))
5550     return Qnil;
5551   /* Get coding-spec vector for OBJ.  */
5552   obj = Fget (obj, Qcoding_system);
5553   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5554           ? Qt : Qnil);
5555 }
5556
5557 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5558        Sread_non_nil_coding_system, 1, 1, 0,
5559   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5560   (prompt)
5561      Lisp_Object prompt;
5562 {
5563   Lisp_Object val;
5564   do
5565     {
5566       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5567                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5568     }
5569   while (XSTRING (val)->size == 0);
5570   return (Fintern (val, Qnil));
5571 }
5572
5573 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5574   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5575 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5576   (prompt, default_coding_system)
5577      Lisp_Object prompt, default_coding_system;
5578 {
5579   Lisp_Object val;
5580   if (SYMBOLP (default_coding_system))
5581     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5582   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5583                           Qt, Qnil, Qcoding_system_history,
5584                           default_coding_system, Qnil);
5585   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5586 }
5587
5588 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5589        1, 1, 0,
5590   "Check validity of CODING-SYSTEM.\n\
5591 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5592 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5593 The value of property should be a vector of length 5.")
5594   (coding_system)
5595      Lisp_Object coding_system;
5596 {
5597   CHECK_SYMBOL (coding_system, 0);
5598   if (!NILP (Fcoding_system_p (coding_system)))
5599     return coding_system;
5600   while (1)
5601     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5602 }
5603 \f
5604 Lisp_Object
5605 detect_coding_system (src, src_bytes, highest)
5606      unsigned char *src;
5607      int src_bytes, highest;
5608 {
5609   int coding_mask, eol_type;
5610   Lisp_Object val, tmp;
5611   int dummy;
5612
5613   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5614   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5615   if (eol_type == CODING_EOL_INCONSISTENT)
5616     eol_type = CODING_EOL_UNDECIDED;
5617
5618   if (!coding_mask)
5619     {
5620       val = Qundecided;
5621       if (eol_type != CODING_EOL_UNDECIDED)
5622         {
5623           Lisp_Object val2;
5624           val2 = Fget (Qundecided, Qeol_type);
5625           if (VECTORP (val2))
5626             val = XVECTOR (val2)->contents[eol_type];
5627         }
5628       return (highest ? val : Fcons (val, Qnil));
5629     }
5630
5631   /* At first, gather possible coding systems in VAL.  */
5632   val = Qnil;
5633   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5634     {
5635       Lisp_Object category_val, category_index;
5636
5637       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5638       category_val = Fsymbol_value (XCAR (tmp));
5639       if (!NILP (category_val)
5640           && NATNUMP (category_index)
5641           && (coding_mask & (1 << XFASTINT (category_index))))
5642         {
5643           val = Fcons (category_val, val);
5644           if (highest)
5645             break;
5646         }
5647     }
5648   if (!highest)
5649     val = Fnreverse (val);
5650
5651   /* Then, replace the elements with subsidiary coding systems.  */
5652   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5653     {
5654       if (eol_type != CODING_EOL_UNDECIDED
5655           && eol_type != CODING_EOL_INCONSISTENT)
5656         {
5657           Lisp_Object eol;
5658           eol = Fget (XCAR (tmp), Qeol_type);
5659           if (VECTORP (eol))
5660             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5661         }
5662     }
5663   return (highest ? XCAR (val) : val);
5664 }
5665
5666 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5667        2, 3, 0,
5668   "Detect coding system of the text in the region between START and END.\n\
5669 Return a list of possible coding systems ordered by priority.\n\
5670 \n\
5671 If only ASCII characters are found, it returns a list of single element\n\
5672 `undecided' or its subsidiary coding system according to a detected\n\
5673 end-of-line format.\n\
5674 \n\
5675 If optional argument HIGHEST is non-nil, return the coding system of\n\
5676 highest priority.")
5677   (start, end, highest)
5678      Lisp_Object start, end, highest;
5679 {
5680   int from, to;
5681   int from_byte, to_byte;
5682
5683   CHECK_NUMBER_COERCE_MARKER (start, 0);
5684   CHECK_NUMBER_COERCE_MARKER (end, 1);
5685
5686   validate_region (&start, &end);
5687   from = XINT (start), to = XINT (end);
5688   from_byte = CHAR_TO_BYTE (from);
5689   to_byte = CHAR_TO_BYTE (to);
5690
5691   if (from < GPT && to >= GPT)
5692     move_gap_both (to, to_byte);
5693
5694   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5695                                to_byte - from_byte,
5696                                !NILP (highest));
5697 }
5698
5699 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5700        1, 2, 0,
5701   "Detect coding system of the text in STRING.\n\
5702 Return a list of possible coding systems ordered by priority.\n\
5703 \n\
5704 If only ASCII characters are found, it returns a list of single element\n\
5705 `undecided' or its subsidiary coding system according to a detected\n\
5706 end-of-line format.\n\
5707 \n\
5708 If optional argument HIGHEST is non-nil, return the coding system of\n\
5709 highest priority.")
5710   (string, highest)
5711      Lisp_Object string, highest;
5712 {
5713   CHECK_STRING (string, 0);
5714
5715   return detect_coding_system (XSTRING (string)->data,
5716                                STRING_BYTES (XSTRING (string)),
5717                                !NILP (highest));
5718 }
5719
5720 /* Return an intersection of lists L1 and L2.  */
5721
5722 static Lisp_Object
5723 intersection (l1, l2)
5724      Lisp_Object l1, l2;
5725 {
5726   Lisp_Object val;
5727
5728   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5729     {
5730       if (!NILP (Fmemq (XCAR (l1), l2)))
5731         val = Fcons (XCAR (l1), val);
5732     }
5733   return val;
5734 }
5735
5736
5737 /*  Subroutine for Fsafe_coding_systems_region_internal.
5738
5739     Return a list of coding systems that safely encode the multibyte
5740     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5741     possible coding systems.  If it is nil, it means that we have not
5742     yet found any coding systems.
5743
5744     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5745     element of WORK_TABLE is set to t once the element is looked up.
5746
5747     If a non-ASCII single byte char is found, set
5748     *single_byte_char_found to 1.  */
5749
5750 static Lisp_Object
5751 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5752      unsigned char *p, *pend;
5753      Lisp_Object safe_codings, work_table;
5754      int *single_byte_char_found;
5755 {
5756   int c, len, idx;
5757   Lisp_Object val;
5758
5759   while (p < pend)
5760     {
5761       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5762       p += len;
5763       if (ASCII_BYTE_P (c))
5764         /* We can ignore ASCII characters here.  */
5765         continue;
5766       if (SINGLE_BYTE_CHAR_P (c))
5767         *single_byte_char_found = 1;
5768       if (NILP (safe_codings))
5769         continue;
5770       /* Check the safe coding systems for C.  */
5771       val = char_table_ref_and_index (work_table, c, &idx);
5772       if (EQ (val, Qt))
5773         /* This element was already checked.  Ignore it.  */
5774         continue;
5775       /* Remember that we checked this element.  */
5776       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
5777
5778       /* If there are some safe coding systems for C and we have
5779          already found the other set of coding systems for the
5780          different characters, get the intersection of them.  */
5781       if (!EQ (safe_codings, Qt) && !NILP (val))
5782         val = intersection (safe_codings, val);
5783       safe_codings = val;
5784     }
5785   return safe_codings;
5786 }
5787
5788
5789 /* Return a list of coding systems that safely encode the text between
5790    START and END.  If the text contains only ASCII or is unibyte,
5791    return t.  */
5792
5793 DEFUN ("find-coding-systems-region-internal",
5794        Ffind_coding_systems_region_internal,
5795        Sfind_coding_systems_region_internal, 2, 2, 0,
5796   "Internal use only.")
5797   (start, end)
5798      Lisp_Object start, end;
5799 {
5800   Lisp_Object work_table, safe_codings;
5801   int non_ascii_p = 0;
5802   int single_byte_char_found = 0;
5803   unsigned char *p1, *p1end, *p2, *p2end, *p;
5804   Lisp_Object args[2];
5805
5806   if (STRINGP (start))
5807     {
5808       if (!STRING_MULTIBYTE (start))
5809         return Qt;
5810       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5811       p2 = p2end = p1end;
5812       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5813         non_ascii_p = 1;
5814     }
5815   else
5816     {
5817       int from, to, stop;
5818
5819       CHECK_NUMBER_COERCE_MARKER (start, 0);
5820       CHECK_NUMBER_COERCE_MARKER (end, 1);
5821       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5822         args_out_of_range (start, end);
5823       if (NILP (current_buffer->enable_multibyte_characters))
5824         return Qt;
5825       from = CHAR_TO_BYTE (XINT (start));
5826       to = CHAR_TO_BYTE (XINT (end));
5827       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5828       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5829       if (stop == to)
5830         p2 = p2end = p1end;
5831       else
5832         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5833       if (XINT (end) - XINT (start) != to - from)
5834         non_ascii_p = 1;
5835     }
5836
5837   if (!non_ascii_p)
5838     {
5839       /* We are sure that the text contains no multibyte character.
5840          Check if it contains eight-bit-graphic.  */
5841       p = p1;
5842       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5843       if (p == p1end)
5844         {
5845           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5846           if (p == p2end)
5847             return Qt;
5848         }
5849     }
5850
5851   /* The text contains non-ASCII characters.  */
5852   work_table = Fcopy_sequence (Vchar_coding_system_table);
5853   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5854                                     &single_byte_char_found);
5855   if (p2 < p2end)
5856     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5857                                       &single_byte_char_found);
5858
5859   if (!single_byte_char_found)
5860     {
5861       /* Append generic coding systems.  */
5862       Lisp_Object args[2];
5863       args[0] = safe_codings;
5864       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5865                                         make_number (0));
5866       safe_codings = Fappend (2, args);
5867     }
5868   else
5869     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5870   return safe_codings;
5871 }
5872
5873
5874 Lisp_Object
5875 code_convert_region1 (start, end, coding_system, encodep)
5876      Lisp_Object start, end, coding_system;
5877      int encodep;
5878 {
5879   struct coding_system coding;
5880   int from, to, len;
5881
5882   CHECK_NUMBER_COERCE_MARKER (start, 0);
5883   CHECK_NUMBER_COERCE_MARKER (end, 1);
5884   CHECK_SYMBOL (coding_system, 2);
5885
5886   validate_region (&start, &end);
5887   from = XFASTINT (start);
5888   to = XFASTINT (end);
5889
5890   if (NILP (coding_system))
5891     return make_number (to - from);
5892
5893   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5894     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5895
5896   coding.mode |= CODING_MODE_LAST_BLOCK;
5897   coding.src_multibyte = coding.dst_multibyte
5898     = !NILP (current_buffer->enable_multibyte_characters);
5899   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5900                        &coding, encodep, 1);
5901   Vlast_coding_system_used = coding.symbol;
5902   return make_number (coding.produced_char);
5903 }
5904
5905 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5906        3, 3, "r\nzCoding system: ",
5907   "Decode the current region by specified coding system.\n\
5908 When called from a program, takes three arguments:\n\
5909 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5910 This function sets `last-coding-system-used' to the precise coding system\n\
5911 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5912 not fully specified.)\n\
5913 It returns the length of the decoded text.")
5914   (start, end, coding_system)
5915      Lisp_Object start, end, coding_system;
5916 {
5917   return code_convert_region1 (start, end, coding_system, 0);
5918 }
5919
5920 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5921        3, 3, "r\nzCoding system: ",
5922   "Encode the current region by specified coding system.\n\
5923 When called from a program, takes three arguments:\n\
5924 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5925 This function sets `last-coding-system-used' to the precise coding system\n\
5926 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5927 not fully specified.)\n\
5928 It returns the length of the encoded text.")
5929   (start, end, coding_system)
5930      Lisp_Object start, end, coding_system;
5931 {
5932   return code_convert_region1 (start, end, coding_system, 1);
5933 }
5934
5935 Lisp_Object
5936 code_convert_string1 (string, coding_system, nocopy, encodep)
5937      Lisp_Object string, coding_system, nocopy;
5938      int encodep;
5939 {
5940   struct coding_system coding;
5941
5942   CHECK_STRING (string, 0);
5943   CHECK_SYMBOL (coding_system, 1);
5944
5945   if (NILP (coding_system))
5946     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5947
5948   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5949     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5950
5951   coding.mode |= CODING_MODE_LAST_BLOCK;
5952   string = (encodep
5953             ? encode_coding_string (string, &coding, !NILP (nocopy))
5954             : decode_coding_string (string, &coding, !NILP (nocopy)));
5955   Vlast_coding_system_used = coding.symbol;
5956
5957   return string;
5958 }
5959
5960 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5961        2, 3, 0,
5962   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5963 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5964 if the decoding operation is trivial.\n\
5965 This function sets `last-coding-system-used' to the precise coding system\n\
5966 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5967 not fully specified.)")
5968   (string, coding_system, nocopy)
5969      Lisp_Object string, coding_system, nocopy;
5970 {
5971   return code_convert_string1 (string, coding_system, nocopy, 0);
5972 }
5973
5974 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5975        2, 3, 0,
5976   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5977 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5978 if the encoding operation is trivial.\n\
5979 This function sets `last-coding-system-used' to the precise coding system\n\
5980 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5981 not fully specified.)")
5982   (string, coding_system, nocopy)
5983      Lisp_Object string, coding_system, nocopy;
5984 {
5985   return code_convert_string1 (string, coding_system, nocopy, 1);
5986 }
5987
5988 /* Encode or decode STRING according to CODING_SYSTEM.
5989    Do not set Vlast_coding_system_used.
5990
5991    This function is called only from macros DECODE_FILE and
5992    ENCODE_FILE, thus we ignore character composition.  */
5993
5994 Lisp_Object
5995 code_convert_string_norecord (string, coding_system, encodep)
5996      Lisp_Object string, coding_system;
5997      int encodep;
5998 {
5999   struct coding_system coding;
6000
6001   CHECK_STRING (string, 0);
6002   CHECK_SYMBOL (coding_system, 1);
6003
6004   if (NILP (coding_system))
6005     return string;
6006
6007   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6008     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6009
6010   coding.composing = COMPOSITION_DISABLED;
6011   coding.mode |= CODING_MODE_LAST_BLOCK;
6012   return (encodep
6013           ? encode_coding_string (string, &coding, 1)
6014           : decode_coding_string (string, &coding, 1));
6015 }
6016 \f
6017 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6018   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6019 Return the corresponding character.")
6020   (code)
6021      Lisp_Object code;
6022 {
6023   unsigned char c1, c2, s1, s2;
6024   Lisp_Object val;
6025
6026   CHECK_NUMBER (code, 0);
6027   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6028   if (s1 == 0)
6029     {
6030       if (s2 < 0x80)
6031         XSETFASTINT (val, s2);
6032       else if (s2 >= 0xA0 || s2 <= 0xDF)
6033         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6034       else
6035         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6036     }
6037   else
6038     {
6039       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6040           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6041         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6042       DECODE_SJIS (s1, s2, c1, c2);
6043       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6044     }
6045   return val;
6046 }
6047
6048 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6049   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6050 Return the corresponding code in SJIS.")
6051   (ch)
6052      Lisp_Object ch;
6053 {
6054   int charset, c1, c2, s1, s2;
6055   Lisp_Object val;
6056
6057   CHECK_NUMBER (ch, 0);
6058   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6059   if (charset == CHARSET_ASCII)
6060     {
6061       val = ch;
6062     }
6063   else if (charset == charset_jisx0208
6064            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6065     {
6066       ENCODE_SJIS (c1, c2, s1, s2);
6067       XSETFASTINT (val, (s1 << 8) | s2);
6068     }
6069   else if (charset == charset_katakana_jisx0201
6070            && c1 > 0x20 && c2 < 0xE0)
6071     {
6072       XSETFASTINT (val, c1 | 0x80);
6073     }
6074   else
6075     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6076   return val;
6077 }
6078
6079 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6080   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6081 Return the corresponding character.")
6082   (code)
6083      Lisp_Object code;
6084 {
6085   int charset;
6086   unsigned char b1, b2, c1, c2;
6087   Lisp_Object val;
6088
6089   CHECK_NUMBER (code, 0);
6090   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6091   if (b1 == 0)
6092     {
6093       if (b2 >= 0x80)
6094         error ("Invalid BIG5 code: %x", XFASTINT (code));
6095       val = code;
6096     }
6097   else
6098     {
6099       if ((b1 < 0xA1 || b1 > 0xFE)
6100           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6101         error ("Invalid BIG5 code: %x", XFASTINT (code));
6102       DECODE_BIG5 (b1, b2, charset, c1, c2);
6103       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6104     }
6105   return val;
6106 }
6107
6108 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6109   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6110 Return the corresponding character code in Big5.")
6111   (ch)
6112      Lisp_Object ch;
6113 {
6114   int charset, c1, c2, b1, b2;
6115   Lisp_Object val;
6116
6117   CHECK_NUMBER (ch, 0);
6118   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6119   if (charset == CHARSET_ASCII)
6120     {
6121       val = ch;
6122     }
6123   else if ((charset == charset_big5_1
6124             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6125            || (charset == charset_big5_2
6126                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6127     {
6128       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6129       XSETFASTINT (val, (b1 << 8) | b2);
6130     }
6131   else
6132     error ("Can't encode to Big5: %d", XFASTINT (ch));
6133   return val;
6134 }
6135 \f
6136 DEFUN ("set-terminal-coding-system-internal",
6137        Fset_terminal_coding_system_internal,
6138        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6139   (coding_system)
6140      Lisp_Object coding_system;
6141 {
6142   CHECK_SYMBOL (coding_system, 0);
6143   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6144   /* We had better not send unsafe characters to terminal.  */
6145   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6146   /* Characer composition should be disabled.  */
6147   terminal_coding.composing = COMPOSITION_DISABLED;
6148   terminal_coding.src_multibyte = 1;
6149   terminal_coding.dst_multibyte = 0;
6150   return Qnil;
6151 }
6152
6153 DEFUN ("set-safe-terminal-coding-system-internal",
6154        Fset_safe_terminal_coding_system_internal,
6155        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6156   (coding_system)
6157      Lisp_Object coding_system;
6158 {
6159   CHECK_SYMBOL (coding_system, 0);
6160   setup_coding_system (Fcheck_coding_system (coding_system),
6161                        &safe_terminal_coding);
6162   /* Characer composition should be disabled.  */
6163   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6164   safe_terminal_coding.src_multibyte = 1;
6165   safe_terminal_coding.dst_multibyte = 0;
6166   return Qnil;
6167 }
6168
6169 DEFUN ("terminal-coding-system",
6170        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6171   "Return coding system specified for terminal output.")
6172   ()
6173 {
6174   return terminal_coding.symbol;
6175 }
6176
6177 DEFUN ("set-keyboard-coding-system-internal",
6178        Fset_keyboard_coding_system_internal,
6179        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6180   (coding_system)
6181      Lisp_Object coding_system;
6182 {
6183   CHECK_SYMBOL (coding_system, 0);
6184   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6185   /* Characer composition should be disabled.  */
6186   keyboard_coding.composing = COMPOSITION_DISABLED;
6187   return Qnil;
6188 }
6189
6190 DEFUN ("keyboard-coding-system",
6191        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6192   "Return coding system specified for decoding keyboard input.")
6193   ()
6194 {
6195   return keyboard_coding.symbol;
6196 }
6197
6198 \f
6199 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6200        Sfind_operation_coding_system,  1, MANY, 0,
6201   "Choose a coding system for an operation based on the target name.\n\
6202 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6203 DECODING-SYSTEM is the coding system to use for decoding\n\
6204 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6205 for encoding (in case OPERATION does encoding).\n\
6206 \n\
6207 The first argument OPERATION specifies an I/O primitive:\n\
6208   For file I/O, `insert-file-contents' or `write-region'.\n\
6209   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6210   For network I/O, `open-network-stream'.\n\
6211 \n\
6212 The remaining arguments should be the same arguments that were passed\n\
6213 to the primitive.  Depending on which primitive, one of those arguments\n\
6214 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6215 whichever argument specifies the file name is TARGET.\n\
6216 \n\
6217 TARGET has a meaning which depends on OPERATION:\n\
6218   For file I/O, TARGET is a file name.\n\
6219   For process I/O, TARGET is a process name.\n\
6220   For network I/O, TARGET is a service name or a port number\n\
6221 \n\
6222 This function looks up what specified for TARGET in,\n\
6223 `file-coding-system-alist', `process-coding-system-alist',\n\
6224 or `network-coding-system-alist' depending on OPERATION.\n\
6225 They may specify a coding system, a cons of coding systems,\n\
6226 or a function symbol to call.\n\
6227 In the last case, we call the function with one argument,\n\
6228 which is a list of all the arguments given to this function.")
6229   (nargs, args)
6230      int nargs;
6231      Lisp_Object *args;
6232 {
6233   Lisp_Object operation, target_idx, target, val;
6234   register Lisp_Object chain;
6235
6236   if (nargs < 2)
6237     error ("Too few arguments");
6238   operation = args[0];
6239   if (!SYMBOLP (operation)
6240       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6241     error ("Invalid first arguement");
6242   if (nargs < 1 + XINT (target_idx))
6243     error ("Too few arguments for operation: %s",
6244            XSYMBOL (operation)->name->data);
6245   target = args[XINT (target_idx) + 1];
6246   if (!(STRINGP (target)
6247         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6248     error ("Invalid %dth argument", XINT (target_idx) + 1);
6249
6250   chain = ((EQ (operation, Qinsert_file_contents)
6251             || EQ (operation, Qwrite_region))
6252            ? Vfile_coding_system_alist
6253            : (EQ (operation, Qopen_network_stream)
6254               ? Vnetwork_coding_system_alist
6255               : Vprocess_coding_system_alist));
6256   if (NILP (chain))
6257     return Qnil;
6258
6259   for (; CONSP (chain); chain = XCDR (chain))
6260     {
6261       Lisp_Object elt;
6262       elt = XCAR (chain);
6263
6264       if (CONSP (elt)
6265           && ((STRINGP (target)
6266                && STRINGP (XCAR (elt))
6267                && fast_string_match (XCAR (elt), target) >= 0)
6268               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6269         {
6270           val = XCDR (elt);
6271           /* Here, if VAL is both a valid coding system and a valid
6272              function symbol, we return VAL as a coding system.  */
6273           if (CONSP (val))
6274             return val;
6275           if (! SYMBOLP (val))
6276             return Qnil;
6277           if (! NILP (Fcoding_system_p (val)))
6278             return Fcons (val, val);
6279           if (! NILP (Ffboundp (val)))
6280             {
6281               val = call1 (val, Flist (nargs, args));
6282               if (CONSP (val))
6283                 return val;
6284               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6285                 return Fcons (val, val);
6286             }
6287           return Qnil;
6288         }
6289     }
6290   return Qnil;
6291 }
6292
6293 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6294        Supdate_coding_systems_internal, 0, 0, 0,
6295   "Update internal database for ISO2022 and CCL based coding systems.\n\
6296 When values of any coding categories are changed, you must\n\
6297 call this function")
6298   ()
6299 {
6300   int i;
6301
6302   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6303     {
6304       Lisp_Object val;
6305
6306       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6307       if (!NILP (val))
6308         {
6309           if (! coding_system_table[i])
6310             coding_system_table[i] = ((struct coding_system *)
6311                                       xmalloc (sizeof (struct coding_system)));
6312           setup_coding_system (val, coding_system_table[i]);
6313         }
6314       else if (coding_system_table[i])
6315         {
6316           xfree (coding_system_table[i]);
6317           coding_system_table[i] = NULL;
6318         }
6319     }
6320
6321   return Qnil;
6322 }
6323
6324 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6325        Sset_coding_priority_internal, 0, 0, 0,
6326   "Update internal database for the current value of `coding-category-list'.\n\
6327 This function is internal use only.")
6328   ()
6329 {
6330   int i = 0, idx;
6331   Lisp_Object val;
6332
6333   val = Vcoding_category_list;
6334
6335   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6336     {
6337       if (! SYMBOLP (XCAR (val)))
6338         break;
6339       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6340       if (idx >= CODING_CATEGORY_IDX_MAX)
6341         break;
6342       coding_priorities[i++] = (1 << idx);
6343       val = XCDR (val);
6344     }
6345   /* If coding-category-list is valid and contains all coding
6346      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6347      the following code saves Emacs from crashing.  */
6348   while (i < CODING_CATEGORY_IDX_MAX)
6349     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6350
6351   return Qnil;
6352 }
6353
6354 #endif /* emacs */
6355
6356 \f
6357 /*** 9. Post-amble ***/
6358
6359 void
6360 init_coding_once ()
6361 {
6362   int i;
6363
6364   /* Emacs' internal format specific initialize routine.  */
6365   for (i = 0; i <= 0x20; i++)
6366     emacs_code_class[i] = EMACS_control_code;
6367   emacs_code_class[0x0A] = EMACS_linefeed_code;
6368   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6369   for (i = 0x21 ; i < 0x7F; i++)
6370     emacs_code_class[i] = EMACS_ascii_code;
6371   emacs_code_class[0x7F] = EMACS_control_code;
6372   for (i = 0x80; i < 0xFF; i++)
6373     emacs_code_class[i] = EMACS_invalid_code;
6374   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6375   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6376   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6377   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6378
6379   /* ISO2022 specific initialize routine.  */
6380   for (i = 0; i < 0x20; i++)
6381     iso_code_class[i] = ISO_control_0;
6382   for (i = 0x21; i < 0x7F; i++)
6383     iso_code_class[i] = ISO_graphic_plane_0;
6384   for (i = 0x80; i < 0xA0; i++)
6385     iso_code_class[i] = ISO_control_1;
6386   for (i = 0xA1; i < 0xFF; i++)
6387     iso_code_class[i] = ISO_graphic_plane_1;
6388   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6389   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6390   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6391   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6392   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6393   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6394   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6395   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6396   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6397   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6398
6399   setup_coding_system (Qnil, &keyboard_coding);
6400   setup_coding_system (Qnil, &terminal_coding);
6401   setup_coding_system (Qnil, &safe_terminal_coding);
6402   setup_coding_system (Qnil, &default_buffer_file_coding);
6403
6404   bzero (coding_system_table, sizeof coding_system_table);
6405
6406   bzero (ascii_skip_code, sizeof ascii_skip_code);
6407   for (i = 0; i < 128; i++)
6408     ascii_skip_code[i] = 1;
6409
6410 #if defined (MSDOS) || defined (WINDOWSNT)
6411   system_eol_type = CODING_EOL_CRLF;
6412 #else
6413   system_eol_type = CODING_EOL_LF;
6414 #endif
6415
6416   inhibit_pre_post_conversion = 0;
6417 }
6418
6419 #ifdef emacs
6420
6421 void
6422 syms_of_coding ()
6423 {
6424   Qtarget_idx = intern ("target-idx");
6425   staticpro (&Qtarget_idx);
6426
6427   Qcoding_system_history = intern ("coding-system-history");
6428   staticpro (&Qcoding_system_history);
6429   Fset (Qcoding_system_history, Qnil);
6430
6431   /* Target FILENAME is the first argument.  */
6432   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6433   /* Target FILENAME is the third argument.  */
6434   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6435
6436   Qcall_process = intern ("call-process");
6437   staticpro (&Qcall_process);
6438   /* Target PROGRAM is the first argument.  */
6439   Fput (Qcall_process, Qtarget_idx, make_number (0));
6440
6441   Qcall_process_region = intern ("call-process-region");
6442   staticpro (&Qcall_process_region);
6443   /* Target PROGRAM is the third argument.  */
6444   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6445
6446   Qstart_process = intern ("start-process");
6447   staticpro (&Qstart_process);
6448   /* Target PROGRAM is the third argument.  */
6449   Fput (Qstart_process, Qtarget_idx, make_number (2));
6450
6451   Qopen_network_stream = intern ("open-network-stream");
6452   staticpro (&Qopen_network_stream);
6453   /* Target SERVICE is the fourth argument.  */
6454   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6455
6456   Qcoding_system = intern ("coding-system");
6457   staticpro (&Qcoding_system);
6458
6459   Qeol_type = intern ("eol-type");
6460   staticpro (&Qeol_type);
6461
6462   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6463   staticpro (&Qbuffer_file_coding_system);
6464
6465   Qpost_read_conversion = intern ("post-read-conversion");
6466   staticpro (&Qpost_read_conversion);
6467
6468   Qpre_write_conversion = intern ("pre-write-conversion");
6469   staticpro (&Qpre_write_conversion);
6470
6471   Qno_conversion = intern ("no-conversion");
6472   staticpro (&Qno_conversion);
6473
6474   Qundecided = intern ("undecided");
6475   staticpro (&Qundecided);
6476
6477   Qcoding_system_p = intern ("coding-system-p");
6478   staticpro (&Qcoding_system_p);
6479
6480   Qcoding_system_error = intern ("coding-system-error");
6481   staticpro (&Qcoding_system_error);
6482
6483   Fput (Qcoding_system_error, Qerror_conditions,
6484         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6485   Fput (Qcoding_system_error, Qerror_message,
6486         build_string ("Invalid coding system"));
6487
6488   Qcoding_category = intern ("coding-category");
6489   staticpro (&Qcoding_category);
6490   Qcoding_category_index = intern ("coding-category-index");
6491   staticpro (&Qcoding_category_index);
6492
6493   Vcoding_category_table
6494     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6495   staticpro (&Vcoding_category_table);
6496   {
6497     int i;
6498     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6499       {
6500         XVECTOR (Vcoding_category_table)->contents[i]
6501           = intern (coding_category_name[i]);
6502         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6503               Qcoding_category_index, make_number (i));
6504       }
6505   }
6506
6507   Qtranslation_table = intern ("translation-table");
6508   staticpro (&Qtranslation_table);
6509   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6510
6511   Qtranslation_table_id = intern ("translation-table-id");
6512   staticpro (&Qtranslation_table_id);
6513
6514   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6515   staticpro (&Qtranslation_table_for_decode);
6516
6517   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6518   staticpro (&Qtranslation_table_for_encode);
6519
6520   Qsafe_chars = intern ("safe-chars");
6521   staticpro (&Qsafe_chars);
6522
6523   Qchar_coding_system = intern ("char-coding-system");
6524   staticpro (&Qchar_coding_system);
6525
6526   /* Intern this now in case it isn't already done.
6527      Setting this variable twice is harmless.
6528      But don't staticpro it here--that is done in alloc.c.  */
6529   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6530   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6531   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6532
6533   Qvalid_codes = intern ("valid-codes");
6534   staticpro (&Qvalid_codes);
6535
6536   Qemacs_mule = intern ("emacs-mule");
6537   staticpro (&Qemacs_mule);
6538
6539   Qraw_text = intern ("raw-text");
6540   staticpro (&Qraw_text);
6541
6542   defsubr (&Scoding_system_p);
6543   defsubr (&Sread_coding_system);
6544   defsubr (&Sread_non_nil_coding_system);
6545   defsubr (&Scheck_coding_system);
6546   defsubr (&Sdetect_coding_region);
6547   defsubr (&Sdetect_coding_string);
6548   defsubr (&Sfind_coding_systems_region_internal);
6549   defsubr (&Sdecode_coding_region);
6550   defsubr (&Sencode_coding_region);
6551   defsubr (&Sdecode_coding_string);
6552   defsubr (&Sencode_coding_string);
6553   defsubr (&Sdecode_sjis_char);
6554   defsubr (&Sencode_sjis_char);
6555   defsubr (&Sdecode_big5_char);
6556   defsubr (&Sencode_big5_char);
6557   defsubr (&Sset_terminal_coding_system_internal);
6558   defsubr (&Sset_safe_terminal_coding_system_internal);
6559   defsubr (&Sterminal_coding_system);
6560   defsubr (&Sset_keyboard_coding_system_internal);
6561   defsubr (&Skeyboard_coding_system);
6562   defsubr (&Sfind_operation_coding_system);
6563   defsubr (&Supdate_coding_systems_internal);
6564   defsubr (&Sset_coding_priority_internal);
6565
6566   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6567     "List of coding systems.\n\
6568 \n\
6569 Do not alter the value of this variable manually.  This variable should be\n\
6570 updated by the functions `make-coding-system' and\n\
6571 `define-coding-system-alias'.");
6572   Vcoding_system_list = Qnil;
6573
6574   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6575     "Alist of coding system names.\n\
6576 Each element is one element list of coding system name.\n\
6577 This variable is given to `completing-read' as TABLE argument.\n\
6578 \n\
6579 Do not alter the value of this variable manually.  This variable should be\n\
6580 updated by the functions `make-coding-system' and\n\
6581 `define-coding-system-alias'.");
6582   Vcoding_system_alist = Qnil;
6583
6584   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6585     "List of coding-categories (symbols) ordered by priority.");
6586   {
6587     int i;
6588
6589     Vcoding_category_list = Qnil;
6590     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6591       Vcoding_category_list
6592         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6593                  Vcoding_category_list);
6594   }
6595
6596   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6597     "Specify the coding system for read operations.\n\
6598 It is useful to bind this variable with `let', but do not set it globally.\n\
6599 If the value is a coding system, it is used for decoding on read operation.\n\
6600 If not, an appropriate element is used from one of the coding system alists:\n\
6601 There are three such tables, `file-coding-system-alist',\n\
6602 `process-coding-system-alist', and `network-coding-system-alist'.");
6603   Vcoding_system_for_read = Qnil;
6604
6605   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6606     "Specify the coding system for write operations.\n\
6607 Programs bind this variable with `let', but you should not set it globally.\n\
6608 If the value is a coding system, it is used for encoding of output,\n\
6609 when writing it to a file and when sending it to a file or subprocess.\n\
6610 \n\
6611 If this does not specify a coding system, an appropriate element\n\
6612 is used from one of the coding system alists:\n\
6613 There are three such tables, `file-coding-system-alist',\n\
6614 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6615 For output to files, if the above procedure does not specify a coding system,\n\
6616 the value of `buffer-file-coding-system' is used.");
6617   Vcoding_system_for_write = Qnil;
6618
6619   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6620     "Coding system used in the latest file or process I/O.");
6621   Vlast_coding_system_used = Qnil;
6622
6623   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6624     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6625 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6626 such conversion.");
6627   inhibit_eol_conversion = 0;
6628
6629   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6630     "Non-nil means process buffer inherits coding system of process output.\n\
6631 Bind it to t if the process output is to be treated as if it were a file\n\
6632 read from some filesystem.");
6633   inherit_process_coding_system = 0;
6634
6635   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6636     "Alist to decide a coding system to use for a file I/O operation.\n\
6637 The format is ((PATTERN . VAL) ...),\n\
6638 where PATTERN is a regular expression matching a file name,\n\
6639 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6640 If VAL is a coding system, it is used for both decoding and encoding\n\
6641 the file contents.\n\
6642 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6643 and the cdr part is used for encoding.\n\
6644 If VAL is a function symbol, the function must return a coding system\n\
6645 or a cons of coding systems which are used as above.\n\
6646 \n\
6647 See also the function `find-operation-coding-system'\n\
6648 and the variable `auto-coding-alist'.");
6649   Vfile_coding_system_alist = Qnil;
6650
6651   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6652     "Alist to decide a coding system to use for a process I/O operation.\n\
6653 The format is ((PATTERN . VAL) ...),\n\
6654 where PATTERN is a regular expression matching a program name,\n\
6655 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6656 If VAL is a coding system, it is used for both decoding what received\n\
6657 from the program and encoding what sent to the program.\n\
6658 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6659 and the cdr part is used for encoding.\n\
6660 If VAL is a function symbol, the function must return a coding system\n\
6661 or a cons of coding systems which are used as above.\n\
6662 \n\
6663 See also the function `find-operation-coding-system'.");
6664   Vprocess_coding_system_alist = Qnil;
6665
6666   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6667     "Alist to decide a coding system to use for a network I/O operation.\n\
6668 The format is ((PATTERN . VAL) ...),\n\
6669 where PATTERN is a regular expression matching a network service name\n\
6670 or is a port number to connect to,\n\
6671 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6672 If VAL is a coding system, it is used for both decoding what received\n\
6673 from the network stream and encoding what sent to the network stream.\n\
6674 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6675 and the cdr part is used for encoding.\n\
6676 If VAL is a function symbol, the function must return a coding system\n\
6677 or a cons of coding systems which are used as above.\n\
6678 \n\
6679 See also the function `find-operation-coding-system'.");
6680   Vnetwork_coding_system_alist = Qnil;
6681
6682   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6683     "Coding system to use with system messages.");
6684   Vlocale_coding_system = Qnil;
6685
6686   /* The eol mnemonics are reset in startup.el system-dependently.  */
6687   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6688     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6689   eol_mnemonic_unix = build_string (":");
6690
6691   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6692     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6693   eol_mnemonic_dos = build_string ("\\");
6694
6695   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6696     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6697   eol_mnemonic_mac = build_string ("/");
6698
6699   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6700     "*String displayed in mode line when end-of-line format is not yet determined.");
6701   eol_mnemonic_undecided = build_string (":");
6702
6703   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6704     "*Non-nil enables character translation while encoding and decoding.");
6705   Venable_character_translation = Qt;
6706
6707   DEFVAR_LISP ("standard-translation-table-for-decode",
6708     &Vstandard_translation_table_for_decode,
6709     "Table for translating characters while decoding.");
6710   Vstandard_translation_table_for_decode = Qnil;
6711
6712   DEFVAR_LISP ("standard-translation-table-for-encode",
6713     &Vstandard_translation_table_for_encode,
6714     "Table for translationg characters while encoding.");
6715   Vstandard_translation_table_for_encode = Qnil;
6716
6717   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6718     "Alist of charsets vs revision numbers.\n\
6719 While encoding, if a charset (car part of an element) is found,\n\
6720 designate it with the escape sequence identifing revision (cdr part of the element).");
6721   Vcharset_revision_alist = Qnil;
6722
6723   DEFVAR_LISP ("default-process-coding-system",
6724                &Vdefault_process_coding_system,
6725     "Cons of coding systems used for process I/O by default.\n\
6726 The car part is used for decoding a process output,\n\
6727 the cdr part is used for encoding a text to be sent to a process.");
6728   Vdefault_process_coding_system = Qnil;
6729
6730   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6731     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6732 This is a vector of length 256.\n\
6733 If Nth element is non-nil, the existence of code N in a file\n\
6734 \(or output of subprocess) doesn't prevent it to be detected as\n\
6735 a coding system of ISO 2022 variant which has a flag\n\
6736 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6737 or reading output of a subprocess.\n\
6738 Only 128th through 159th elements has a meaning.");
6739   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6740
6741   DEFVAR_LISP ("select-safe-coding-system-function",
6742                &Vselect_safe_coding_system_function,
6743     "Function to call to select safe coding system for encoding a text.\n\
6744 \n\
6745 If set, this function is called to force a user to select a proper\n\
6746 coding system which can encode the text in the case that a default\n\
6747 coding system used in each operation can't encode the text.\n\
6748 \n\
6749 The default value is `select-safe-coding-system' (which see).");
6750   Vselect_safe_coding_system_function = Qnil;
6751
6752   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6753     "Char-table containing safe coding systems of each characters.\n\
6754 Each element doesn't include such generic coding systems that can\n\
6755 encode any characters.   They are in the first extra slot.");
6756   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6757
6758   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6759                &inhibit_iso_escape_detection,
6760     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6761 \n\
6762 By default, on reading a file, Emacs tries to detect how the text is\n\
6763 encoded.  This code detection is sensitive to escape sequences.  If\n\
6764 the sequence is valid as ISO2022, the code is determined as one of\n\
6765 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6766 coding system (e.g. `iso-2022-7bit').\n\
6767 \n\
6768 However, there may be a case that you want to read escape sequences in\n\
6769 a file as is.  In such a case, you can set this variable to non-nil.\n\
6770 Then, as the code detection ignores any escape sequences, no file is\n\
6771 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6772 escape sequences become visible in a buffer.\n\
6773 \n\
6774 The default value is nil, and it is strongly recommended not to change\n\
6775 it.  That is because many Emacs Lisp source files that contain\n\
6776 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6777 in Emacs's distribution, and they won't be decoded correctly on\n\
6778 reading if you suppress escape sequence detection.\n\
6779 \n\
6780 The other way to read escape sequences in a file without decoding is\n\
6781 to explicitly specify some coding system that doesn't use ISO2022's\n\
6782 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6783   inhibit_iso_escape_detection = 0;
6784 }
6785
6786 char *
6787 emacs_strerror (error_number)
6788      int error_number;
6789 {
6790   char *str;
6791
6792   synchronize_system_messages_locale ();
6793   str = strerror (error_number);
6794
6795   if (! NILP (Vlocale_coding_system))
6796     {
6797       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6798                                                       Vlocale_coding_system,
6799                                                       0);
6800       str = (char *) XSTRING (dec)->data;
6801     }
6802
6803   return str;
6804 }
6805
6806 #endif /* emacs */
6807