src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, -1, 0, 0);      \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (*src == '\r')
 618         {
 619           int c;
 620
 621           src++;
 622           if (coding->eol_type == CODING_EOL_CR)
 623             c = '\n';
 624           else if (coding->eol_type == CODING_EOL_CRLF)
 625             {
 626               ONE_MORE_BYTE (c);
 627               if (c != '\n')
 628                 {
 629                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 630                     {
 631                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 632                       goto label_end_of_loop;
 633                     }
 634                   src--;
 635                   c = '\r';
 636                 }
 637             }
 638           *dst++ = c;
 639           coding->produced_char++;
 640           continue;
 641         }
 642       else if (*src == '\n')
 643         {
 644           if ((coding->eol_type == CODING_EOL_CR
 645                || coding->eol_type == CODING_EOL_CRLF)
 646               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 647             {
 648               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 649               goto label_end_of_loop;
 650             }
 651           *dst++ = *src++;
 652           coding->produced_char++;
 653           continue;
 654         }
 655       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 656         {
 657           p = src;
 658           src += bytes;
 659         }
 660       else
 661         {
 662           bytes = CHAR_STRING (*src, tmp);
 663           p = tmp;
 664           src++;
 665         }
 666       if (dst + bytes >= (dst_bytes ? dst_end : src))
 667         {
 668           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 669           break;
 670         }
 671       while (bytes--) *dst++ = *p++;
 672       coding->produced_char++;
 673     }
 674  label_end_of_loop:
 675   coding->consumed = coding->consumed_char = src_base - source;
 676   coding->produced = dst - destination;
 677 }
 678
 679 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 680   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 681
 682
 683 \f
 684 /*** 3. ISO2022 handlers ***/
 685
 686 /* The following note describes the coding system ISO2022 briefly.
 687    Since the intention of this note is to help understand the
 688    functions in this file, some parts are NOT ACCURATE or OVERLY
 689    SIMPLIFIED.  For thorough understanding, please refer to the
 690    original document of ISO2022.
 691
 692    ISO2022 provides many mechanisms to encode several character sets
 693    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 694    is encoded using bytes less than 128.  This may make the encoded
 695    text a little bit longer, but the text passes more easily through
 696    several gateways, some of which strip off MSB (Most Signigant Bit).
 697
 698    There are two kinds of character sets: control character set and
 699    graphic character set.  The former contains control characters such
 700    as `newline' and `escape' to provide control functions (control
 701    functions are also provided by escape sequences).  The latter
 702    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 703    two control character sets and many graphic character sets.
 704
 705    Graphic character sets are classified into one of the following
 706    four classes, according to the number of bytes (DIMENSION) and
 707    number of characters in one dimension (CHARS) of the set:
 708    - DIMENSION1_CHARS94
 709    - DIMENSION1_CHARS96
 710    - DIMENSION2_CHARS94
 711    - DIMENSION2_CHARS96
 712
 713    In addition, each character set is assigned an identification tag,
 714    unique for each set, called "final character" (denoted as <F>
 715    hereafter).  The <F> of each character set is decided by ECMA(*)
 716    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 717    (0x30..0x3F are for private use only).
 718
 719    Note (*): ECMA = European Computer Manufacturers Association
 720
 721    Here are examples of graphic character set [NAME(<F>)]:
 722         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 723         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 724         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 725         o DIMENSION2_CHARS96 -- none for the moment
 726
 727    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 728         C0 [0x00..0x1F] -- control character plane 0
 729         GL [0x20..0x7F] -- graphic character plane 0
 730         C1 [0x80..0x9F] -- control character plane 1
 731         GR [0xA0..0xFF] -- graphic character plane 1
 732
 733    A control character set is directly designated and invoked to C0 or
 734    C1 by an escape sequence.  The most common case is that:
 735    - ISO646's  control character set is designated/invoked to C0, and
 736    - ISO6429's control character set is designated/invoked to C1,
 737    and usually these designations/invocations are omitted in encoded
 738    text.  In a 7-bit environment, only C0 can be used, and a control
 739    character for C1 is encoded by an appropriate escape sequence to
 740    fit into the environment.  All control characters for C1 are
 741    defined to have corresponding escape sequences.
 742
 743    A graphic character set is at first designated to one of four
 744    graphic registers (G0 through G3), then these graphic registers are
 745    invoked to GL or GR.  These designations and invocations can be
 746    done independently.  The most common case is that G0 is invoked to
 747    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 748    these invocations and designations are omitted in encoded text.
 749    In a 7-bit environment, only GL can be used.
 750
 751    When a graphic character set of CHARS94 is invoked to GL, codes
 752    0x20 and 0x7F of the GL area work as control characters SPACE and
 753    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 754    be used.
 755
 756    There are two ways of invocation: locking-shift and single-shift.
 757    With locking-shift, the invocation lasts until the next different
 758    invocation, whereas with single-shift, the invocation affects the
 759    following character only and doesn't affect the locking-shift
 760    state.  Invocations are done by the following control characters or
 761    escape sequences:
 762
 763    ----------------------------------------------------------------------
 764    abbrev  function                  cntrl escape seq   description
 765    ----------------------------------------------------------------------
 766    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 767    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 768    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 769    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 770    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 771    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 772    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 773    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 774    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 775    ----------------------------------------------------------------------
 776    (*) These are not used by any known coding system.
 777
 778    Control characters for these functions are defined by macros
 779    ISO_CODE_XXX in `coding.h'.
 780
 781    Designations are done by the following escape sequences:
 782    ----------------------------------------------------------------------
 783    escape sequence      description
 784    ----------------------------------------------------------------------
 785    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 786    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 787    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 788    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 789    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 790    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 791    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 792    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 793    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 794    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 795    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 796    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 797    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 798    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 799    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 800    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 801    ----------------------------------------------------------------------
 802
 803    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 804    of dimension 1, chars 94, and final character <F>, etc...
 805
 806    Note (*): Although these designations are not allowed in ISO2022,
 807    Emacs accepts them on decoding, and produces them on encoding
 808    CHARS96 character sets in a coding system which is characterized as
 809    7-bit environment, non-locking-shift, and non-single-shift.
 810
 811    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 812    '(' can be omitted.  We refer to this as "short-form" hereafter.
 813
 814    Now you may notice that there are a lot of ways for encoding the
 815    same multilingual text in ISO2022.  Actually, there exist many
 816    coding systems such as Compound Text (used in X11's inter client
 817    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 818    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 819    localized platforms), and all of these are variants of ISO2022.
 820
 821    In addition to the above, Emacs handles two more kinds of escape
 822    sequences: ISO6429's direction specification and Emacs' private
 823    sequence for specifying character composition.
 824
 825    ISO6429's direction specification takes the following form:
 826         o CSI ']'      -- end of the current direction
 827         o CSI '0' ']'  -- end of the current direction
 828         o CSI '1' ']'  -- start of left-to-right text
 829         o CSI '2' ']'  -- start of right-to-left text
 830    The control character CSI (0x9B: control sequence introducer) is
 831    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 832
 833    Character composition specification takes the following form:
 834         o ESC '0' -- start relative composition
 835         o ESC '1' -- end composition
 836         o ESC '2' -- start rule-base composition (*)
 837         o ESC '3' -- start relative composition with alternate chars  (**)
 838         o ESC '4' -- start rule-base composition with alternate chars  (**)
 839   Since these are not standard escape sequences of any ISO standard,
 840   the use of them for these meaning is restricted to Emacs only.
 841
 842   (*) This form is used only in Emacs 20.5 and the older versions,
 843   but the newer versions can safely decode it.
 844   (**) This form is used only in Emacs 21.1 and the newer versions,
 845   and the older versions can't decode it.
 846
 847   Here's a list of examples usages of these composition escape
 848   sequences (categorized by `enum composition_method').
 849
 850   COMPOSITION_RELATIVE:
 851         ESC 0 CHAR [ CHAR ] ESC 1
 852   COMPOSITOIN_WITH_RULE:
 853         ESC 2 CHAR [ RULE CHAR ] ESC 1
 854   COMPOSITION_WITH_ALTCHARS:
 855         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 856   COMPOSITION_WITH_RULE_ALTCHARS:
 857         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 858
 859 enum iso_code_class_type iso_code_class[256];
 860
 861 #define CHARSET_OK(idx, charset, c)                                     \
 862   (coding_system_table[idx]                                             \
 863    && (charset == CHARSET_ASCII                                         \
 864        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 865            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 866    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 867                                               charset)                  \
 868        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 869
 870 #define SHIFT_OUT_OK(idx) \
 871   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 872
 873 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 874    Check if a text is encoded in ISO2022.  If it is, returns an
 875    integer in which appropriate flag bits any of:
 876         CODING_CATEGORY_MASK_ISO_7
 877         CODING_CATEGORY_MASK_ISO_7_TIGHT
 878         CODING_CATEGORY_MASK_ISO_8_1
 879         CODING_CATEGORY_MASK_ISO_8_2
 880         CODING_CATEGORY_MASK_ISO_7_ELSE
 881         CODING_CATEGORY_MASK_ISO_8_ELSE
 882    are set.  If a code which should never appear in ISO2022 is found,
 883    returns 0.  */
 884
 885 int
 886 detect_coding_iso2022 (src, src_end)
 887      unsigned char *src, *src_end;
 888 {
 889   int mask = CODING_CATEGORY_MASK_ISO;
 890   int mask_found = 0;
 891   int reg[4], shift_out = 0, single_shifting = 0;
 892   int c, c1, i, charset;
 893   /* Dummy for ONE_MORE_BYTE.  */
 894   struct coding_system dummy_coding;
 895   struct coding_system *coding = &dummy_coding;
 896   Lisp_Object safe_chars;
 897
 898   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 899   while (mask && src < src_end)
 900     {
 901       ONE_MORE_BYTE (c);
 902       switch (c)
 903         {
 904         case ISO_CODE_ESC:
 905           if (inhibit_iso_escape_detection)
 906             break;
 907           single_shifting = 0;
 908           ONE_MORE_BYTE (c);
 909           if (c >= '(' && c <= '/')
 910             {
 911               /* Designation sequence for a charset of dimension 1.  */
 912               ONE_MORE_BYTE (c1);
 913               if (c1 < ' ' || c1 >= 0x80
 914                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 915                 /* Invalid designation sequence.  Just ignore.  */
 916                 break;
 917               reg[(c - '(') % 4] = charset;
 918             }
 919           else if (c == '$')
 920             {
 921               /* Designation sequence for a charset of dimension 2.  */
 922               ONE_MORE_BYTE (c);
 923               if (c >= '@' && c <= 'B')
 924                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 925                 reg[0] = charset = iso_charset_table[1][0][c];
 926               else if (c >= '(' && c <= '/')
 927                 {
 928                   ONE_MORE_BYTE (c1);
 929                   if (c1 < ' ' || c1 >= 0x80
 930                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 931                     /* Invalid designation sequence.  Just ignore.  */
 932                     break;
 933                   reg[(c - '(') % 4] = charset;
 934                 }
 935               else
 936                 /* Invalid designation sequence.  Just ignore.  */
 937                 break;
 938             }
 939           else if (c == 'N' || c == 'O')
 940             {
 941               /* ESC <Fe> for SS2 or SS3.  */
 942               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 943               break;
 944             }
 945           else if (c >= '0' && c <= '4')
 946             {
 947               /* ESC <Fp> for start/end composition.  */
 948               mask_found |= CODING_CATEGORY_MASK_ISO;
 949               break;
 950             }
 951           else
 952             /* Invalid escape sequence.  Just ignore.  */
 953             break;
 954
 955           /* We found a valid designation sequence for CHARSET.  */
 956           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 957           c = MAKE_CHAR (charset, 0, 0);
 958           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 959             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 960           else
 961             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 962           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 963             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 964           else
 965             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 966           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 967             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 968           else
 969             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 970           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 971             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 972           else
 973             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 974           break;
 975
 976         case ISO_CODE_SO:
 977           if (inhibit_iso_escape_detection)
 978             break;
 979           single_shifting = 0;
 980           if (shift_out == 0
 981               && (reg[1] >= 0
 982                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 983                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 984             {
 985               /* Locking shift out.  */
 986               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 987               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 988             }
 989           break;
 990
 991         case ISO_CODE_SI:
 992           if (inhibit_iso_escape_detection)
 993             break;
 994           single_shifting = 0;
 995           if (shift_out == 1)
 996             {
 997               /* Locking shift in.  */
 998               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 999               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1000             }
1001           break;
1002
1003         case ISO_CODE_CSI:
1004           single_shifting = 0;
1005         case ISO_CODE_SS2:
1006         case ISO_CODE_SS3:
1007           {
1008             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1009
1010             if (inhibit_iso_escape_detection)
1011               break;
1012             if (c != ISO_CODE_CSI)
1013               {
1014                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1015                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1016                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1017                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1018                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1019                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1020                 single_shifting = 1;
1021               }
1022             if (VECTORP (Vlatin_extra_code_table)
1023                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1024               {
1025                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1026                     & CODING_FLAG_ISO_LATIN_EXTRA)
1027                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1028                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1029                     & CODING_FLAG_ISO_LATIN_EXTRA)
1030                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1031               }
1032             mask &= newmask;
1033             mask_found |= newmask;
1034           }
1035           break;
1036
1037         default:
1038           if (c < 0x80)
1039             {
1040               single_shifting = 0;
1041               break;
1042             }
1043           else if (c < 0xA0)
1044             {
1045               single_shifting = 0;
1046               if (VECTORP (Vlatin_extra_code_table)
1047                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1048                 {
1049                   int newmask = 0;
1050
1051                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1052                       & CODING_FLAG_ISO_LATIN_EXTRA)
1053                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1054                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1055                       & CODING_FLAG_ISO_LATIN_EXTRA)
1056                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1057                   mask &= newmask;
1058                   mask_found |= newmask;
1059                 }
1060               else
1061                 return 0;
1062             }
1063           else
1064             {
1065               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1066                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1067               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1068               /* Check the length of succeeding codes of the range
1069                  0xA0..0FF.  If the byte length is odd, we exclude
1070                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1071                  when we are not single shifting.  */
1072               if (!single_shifting
1073                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1074                 {
1075                   int i = 1;
1076                   while (src < src_end)
1077                     {
1078                       ONE_MORE_BYTE (c);
1079                       if (c < 0xA0)
1080                         break;
1081                       i++;
1082                     }
1083
1084                   if (i & 1 && src < src_end)
1085                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1086                   else
1087                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1088                 }
1089             }
1090           break;
1091         }
1092     }
1093  label_end_of_loop:
1094   return (mask & mask_found);
1095 }
1096
1097 /* Decode a character of which charset is CHARSET, the 1st position
1098    code is C1, the 2nd position code is C2, and return the decoded
1099    character code.  If the variable `translation_table' is non-nil,
1100    returned the translated code.  */
1101
1102 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1103   (NILP (translation_table)                     \
1104    ? MAKE_CHAR (charset, c1, c2)                \
1105    : translate_char (translation_table, -1, charset, c1, c2))
1106
1107 /* Set designation state into CODING.  */
1108 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1109   do {                                                                     \
1110     int charset, c;                                                        \
1111                                                                            \
1112     if (final_char < '0' || final_char >= 128)                             \
1113       goto label_invalid_code;                                             \
1114     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1115                                  make_number (chars),                      \
1116                                  make_number (final_char));                \
1117     c = MAKE_CHAR (charset, 0, 0);                                         \
1118     if (charset >= 0                                                       \
1119         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1120             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1121       {                                                                    \
1122         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1123             && reg == 0                                                    \
1124             && charset == CHARSET_ASCII)                                   \
1125           {                                                                \
1126             /* We should insert this designation sequence as is so         \
1127                that it is surely written back to a file.  */               \
1128             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1129             goto label_invalid_code;                                       \
1130           }                                                                \
1131         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1132         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1133             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1134           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1135         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1136       }                                                                    \
1137     else                                                                   \
1138       {                                                                    \
1139         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1140         goto label_invalid_code;                                           \
1141       }                                                                    \
1142   } while (0)
1143
1144 /* Allocate a memory block for storing information about compositions.
1145    The block is chained to the already allocated blocks.  */
1146
1147 void
1148 coding_allocate_composition_data (coding, char_offset)
1149      struct coding_system *coding;
1150      int char_offset;
1151 {
1152   struct composition_data *cmp_data
1153     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1154
1155   cmp_data->char_offset = char_offset;
1156   cmp_data->used = 0;
1157   cmp_data->prev = coding->cmp_data;
1158   cmp_data->next = NULL;
1159   if (coding->cmp_data)
1160     coding->cmp_data->next = cmp_data;
1161   coding->cmp_data = cmp_data;
1162   coding->cmp_data_start = 0;
1163 }
1164
1165 /* Record the starting position START and METHOD of one composition.  */
1166
1167 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1168   do {                                                          \
1169     struct composition_data *cmp_data = coding->cmp_data;       \
1170     int *data = cmp_data->data + cmp_data->used;                \
1171     coding->cmp_data_start = cmp_data->used;                    \
1172     data[0] = -1;                                               \
1173     data[1] = cmp_data->char_offset + start;                    \
1174     data[3] = (int) method;                                     \
1175     cmp_data->used += 4;                                        \
1176   } while (0)
1177
1178 /* Record the ending position END of the current composition.  */
1179
1180 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1181   do {                                                          \
1182     struct composition_data *cmp_data = coding->cmp_data;       \
1183     int *data = cmp_data->data + coding->cmp_data_start;        \
1184     data[0] = cmp_data->used - coding->cmp_data_start;          \
1185     data[2] = cmp_data->char_offset + end;                      \
1186   } while (0)
1187
1188 /* Record one COMPONENT (alternate character or composition rule).  */
1189
1190 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1191   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1192
1193 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1194
1195 #define DECODE_COMPOSITION_START(c1)                                       \
1196   do {                                                                     \
1197     if (coding->composing == COMPOSITION_DISABLED)                         \
1198       {                                                                    \
1199         *dst++ = ISO_CODE_ESC;                                             \
1200         *dst++ = c1 & 0x7f;                                                \
1201         coding->produced_char += 2;                                        \
1202       }                                                                    \
1203     else if (!COMPOSING_P (coding))                                        \
1204       {                                                                    \
1205         /* This is surely the start of a composition.  We must be sure     \
1206            that coding->cmp_data has enough space to store the             \
1207            information about the composition.  If not, terminate the       \
1208            current decoding loop, allocate one more memory block for       \
1209            coding->cmp_data in the calller, then start the decoding        \
1210            loop again.  We can't allocate memory here directly because     \
1211            it may cause buffer/string relocation.  */                      \
1212         if (!coding->cmp_data                                              \
1213             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1214                 >= COMPOSITION_DATA_SIZE))                                 \
1215           {                                                                \
1216             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1217             goto label_end_of_loop;                                        \
1218           }                                                                \
1219         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1220                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1221                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1222                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1223         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1224                                       coding->composing);                  \
1225         coding->composition_rule_follows = 0;                              \
1226       }                                                                    \
1227     else                                                                   \
1228       {                                                                    \
1229         /* We are already handling a composition.  If the method is        \
1230            the following two, the codes following the current escape       \
1231            sequence are actual characters stored in a buffer.  */          \
1232         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1233             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1234           {                                                                \
1235             coding->composing = COMPOSITION_RELATIVE;                      \
1236             coding->composition_rule_follows = 0;                          \
1237           }                                                                \
1238       }                                                                    \
1239   } while (0)
1240
1241 /* Handle compositoin end sequence ESC 1.  */
1242
1243 #define DECODE_COMPOSITION_END(c1)                                      \
1244   do {                                                                  \
1245     if (coding->composing == COMPOSITION_DISABLED)                      \
1246       {                                                                 \
1247         *dst++ = ISO_CODE_ESC;                                          \
1248         *dst++ = c1;                                                    \
1249         coding->produced_char += 2;                                     \
1250       }                                                                 \
1251     else                                                                \
1252       {                                                                 \
1253         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1254         coding->composing = COMPOSITION_NO;                             \
1255       }                                                                 \
1256   } while (0)
1257
1258 /* Decode a composition rule from the byte C1 (and maybe one more byte
1259    from SRC) and store one encoded composition rule in
1260    coding->cmp_data.  */
1261
1262 #define DECODE_COMPOSITION_RULE(c1)                                     \
1263   do {                                                                  \
1264     int rule = 0;                                                       \
1265     (c1) -= 32;                                                         \
1266     if (c1 < 81)                /* old format (before ver.21) */        \
1267       {                                                                 \
1268         int gref = (c1) / 9;                                            \
1269         int nref = (c1) % 9;                                            \
1270         if (gref == 4) gref = 10;                                       \
1271         if (nref == 4) nref = 10;                                       \
1272         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1273       }                                                                 \
1274     else if (c1 < 93)           /* new format (after ver.21) */         \
1275       {                                                                 \
1276         ONE_MORE_BYTE (c2);                                             \
1277         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1278       }                                                                 \
1279     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1280     coding->composition_rule_follows = 0;                               \
1281   } while (0)
1282
1283
1284 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1285
1286 static void
1287 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1288      struct coding_system *coding;
1289      unsigned char *source, *destination;
1290      int src_bytes, dst_bytes;
1291 {
1292   unsigned char *src = source;
1293   unsigned char *src_end = source + src_bytes;
1294   unsigned char *dst = destination;
1295   unsigned char *dst_end = destination + dst_bytes;
1296   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1297   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1298   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1299   /* SRC_BASE remembers the start position in source in each loop.
1300      The loop will be exited when there's not enough source code
1301      (within macro ONE_MORE_BYTE), or when there's not enough
1302      destination area to produce a character (within macro
1303      EMIT_CHAR).  */
1304   unsigned char *src_base;
1305   int c, charset;
1306   Lisp_Object translation_table;
1307   Lisp_Object safe_chars;
1308
1309   safe_chars = coding_safe_chars (coding);
1310
1311   if (NILP (Venable_character_translation))
1312     translation_table = Qnil;
1313   else
1314     {
1315       translation_table = coding->translation_table_for_decode;
1316       if (NILP (translation_table))
1317         translation_table = Vstandard_translation_table_for_decode;
1318     }
1319
1320   coding->result = CODING_FINISH_NORMAL;
1321
1322   while (1)
1323     {
1324       int c1, c2;
1325
1326       src_base = src;
1327       ONE_MORE_BYTE (c1);
1328
1329       /* We produce no character or one character.  */
1330       switch (iso_code_class [c1])
1331         {
1332         case ISO_0x20_or_0x7F:
1333           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1334             {
1335               DECODE_COMPOSITION_RULE (c1);
1336               continue;
1337             }
1338           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1339             {
1340               /* This is SPACE or DEL.  */
1341               charset = CHARSET_ASCII;
1342               break;
1343             }
1344           /* This is a graphic character, we fall down ...  */
1345
1346         case ISO_graphic_plane_0:
1347           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1348             {
1349               DECODE_COMPOSITION_RULE (c1);
1350               continue;
1351             }
1352           charset = charset0;
1353           break;
1354
1355         case ISO_0xA0_or_0xFF:
1356           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1357               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1358             goto label_invalid_code;
1359           /* This is a graphic character, we fall down ... */
1360
1361         case ISO_graphic_plane_1:
1362           if (charset1 < 0)
1363             goto label_invalid_code;
1364           charset = charset1;
1365           break;
1366
1367         case ISO_control_0:
1368           if (COMPOSING_P (coding))
1369             DECODE_COMPOSITION_END ('1');
1370
1371           /* All ISO2022 control characters in this class have the
1372              same representation in Emacs internal format.  */
1373           if (c1 == '\n'
1374               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1375               && (coding->eol_type == CODING_EOL_CR
1376                   || coding->eol_type == CODING_EOL_CRLF))
1377             {
1378               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1379               goto label_end_of_loop;
1380             }
1381           charset = CHARSET_ASCII;
1382           break;
1383
1384         case ISO_control_1:
1385           if (COMPOSING_P (coding))
1386             DECODE_COMPOSITION_END ('1');
1387           goto label_invalid_code;
1388
1389         case ISO_carriage_return:
1390           if (COMPOSING_P (coding))
1391             DECODE_COMPOSITION_END ('1');
1392
1393           if (coding->eol_type == CODING_EOL_CR)
1394             c1 = '\n';
1395           else if (coding->eol_type == CODING_EOL_CRLF)
1396             {
1397               ONE_MORE_BYTE (c1);
1398               if (c1 != ISO_CODE_LF)
1399                 {
1400                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1401                     {
1402                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1403                       goto label_end_of_loop;
1404                     }
1405                   src--;
1406                   c1 = '\r';
1407                 }
1408             }
1409           charset = CHARSET_ASCII;
1410           break;
1411
1412         case ISO_shift_out:
1413           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1414               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1415             goto label_invalid_code;
1416           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1417           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1418           continue;
1419
1420         case ISO_shift_in:
1421           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1422             goto label_invalid_code;
1423           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1424           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1425           continue;
1426
1427         case ISO_single_shift_2_7:
1428         case ISO_single_shift_2:
1429           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1430             goto label_invalid_code;
1431           /* SS2 is handled as an escape sequence of ESC 'N' */
1432           c1 = 'N';
1433           goto label_escape_sequence;
1434
1435         case ISO_single_shift_3:
1436           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1437             goto label_invalid_code;
1438           /* SS2 is handled as an escape sequence of ESC 'O' */
1439           c1 = 'O';
1440           goto label_escape_sequence;
1441
1442         case ISO_control_sequence_introducer:
1443           /* CSI is handled as an escape sequence of ESC '[' ...  */
1444           c1 = '[';
1445           goto label_escape_sequence;
1446
1447         case ISO_escape:
1448           ONE_MORE_BYTE (c1);
1449         label_escape_sequence:
1450           /* Escape sequences handled by Emacs are invocation,
1451              designation, direction specification, and character
1452              composition specification.  */
1453           switch (c1)
1454             {
1455             case '&':           /* revision of following character set */
1456               ONE_MORE_BYTE (c1);
1457               if (!(c1 >= '@' && c1 <= '~'))
1458                 goto label_invalid_code;
1459               ONE_MORE_BYTE (c1);
1460               if (c1 != ISO_CODE_ESC)
1461                 goto label_invalid_code;
1462               ONE_MORE_BYTE (c1);
1463               goto label_escape_sequence;
1464
1465             case '$':           /* designation of 2-byte character set */
1466               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1467                 goto label_invalid_code;
1468               ONE_MORE_BYTE (c1);
1469               if (c1 >= '@' && c1 <= 'B')
1470                 {       /* designation of JISX0208.1978, GB2312.1980,
1471                            or JISX0208.1980 */
1472                   DECODE_DESIGNATION (0, 2, 94, c1);
1473                 }
1474               else if (c1 >= 0x28 && c1 <= 0x2B)
1475                 {       /* designation of DIMENSION2_CHARS94 character set */
1476                   ONE_MORE_BYTE (c2);
1477                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1478                 }
1479               else if (c1 >= 0x2C && c1 <= 0x2F)
1480                 {       /* designation of DIMENSION2_CHARS96 character set */
1481                   ONE_MORE_BYTE (c2);
1482                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1483                 }
1484               else
1485                 goto label_invalid_code;
1486               /* We must update these variables now.  */
1487               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1488               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1489               continue;
1490
1491             case 'n':           /* invocation of locking-shift-2 */
1492               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1493                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1494                 goto label_invalid_code;
1495               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1496               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1497               continue;
1498
1499             case 'o':           /* invocation of locking-shift-3 */
1500               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1501                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1502                 goto label_invalid_code;
1503               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1504               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1505               continue;
1506
1507             case 'N':           /* invocation of single-shift-2 */
1508               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1509                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1510                 goto label_invalid_code;
1511               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1512               ONE_MORE_BYTE (c1);
1513               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1514                 goto label_invalid_code;
1515               break;
1516
1517             case 'O':           /* invocation of single-shift-3 */
1518               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1519                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1520                 goto label_invalid_code;
1521               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1522               ONE_MORE_BYTE (c1);
1523               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1524                 goto label_invalid_code;
1525               break;
1526
1527             case '0': case '2': case '3': case '4': /* start composition */
1528               DECODE_COMPOSITION_START (c1);
1529               continue;
1530
1531             case '1':           /* end composition */
1532               DECODE_COMPOSITION_END (c1);
1533               continue;
1534
1535             case '[':           /* specification of direction */
1536               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1537                 goto label_invalid_code;
1538               /* For the moment, nested direction is not supported.
1539                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1540                  left-to-right, and nozero means right-to-left.  */
1541               ONE_MORE_BYTE (c1);
1542               switch (c1)
1543                 {
1544                 case ']':       /* end of the current direction */
1545                   coding->mode &= ~CODING_MODE_DIRECTION;
1546
1547                 case '0':       /* end of the current direction */
1548                 case '1':       /* start of left-to-right direction */
1549                   ONE_MORE_BYTE (c1);
1550                   if (c1 == ']')
1551                     coding->mode &= ~CODING_MODE_DIRECTION;
1552                   else
1553                     goto label_invalid_code;
1554                   break;
1555
1556                 case '2':       /* start of right-to-left direction */
1557                   ONE_MORE_BYTE (c1);
1558                   if (c1 == ']')
1559                     coding->mode |= CODING_MODE_DIRECTION;
1560                   else
1561                     goto label_invalid_code;
1562                   break;
1563
1564                 default:
1565                   goto label_invalid_code;
1566                 }
1567               continue;
1568
1569             default:
1570               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1571                 goto label_invalid_code;
1572               if (c1 >= 0x28 && c1 <= 0x2B)
1573                 {       /* designation of DIMENSION1_CHARS94 character set */
1574                   ONE_MORE_BYTE (c2);
1575                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1576                 }
1577               else if (c1 >= 0x2C && c1 <= 0x2F)
1578                 {       /* designation of DIMENSION1_CHARS96 character set */
1579                   ONE_MORE_BYTE (c2);
1580                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1581                 }
1582               else
1583                 goto label_invalid_code;
1584               /* We must update these variables now.  */
1585               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1586               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1587               continue;
1588             }
1589         }
1590
1591       /* Now we know CHARSET and 1st position code C1 of a character.
1592          Produce a multibyte sequence for that character while getting
1593          2nd position code C2 if necessary.  */
1594       if (CHARSET_DIMENSION (charset) == 2)
1595         {
1596           ONE_MORE_BYTE (c2);
1597           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1598             /* C2 is not in a valid range.  */
1599             goto label_invalid_code;
1600         }
1601       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1602       EMIT_CHAR (c);
1603       continue;
1604
1605     label_invalid_code:
1606       coding->errors++;
1607       if (COMPOSING_P (coding))
1608         DECODE_COMPOSITION_END ('1');
1609       src = src_base;
1610       c = *src++;
1611       EMIT_CHAR (c);
1612     }
1613
1614  label_end_of_loop:
1615   coding->consumed = coding->consumed_char = src_base - source;
1616   coding->produced = dst - destination;
1617   return;
1618 }
1619
1620
1621 /* ISO2022 encoding stuff.  */
1622
1623 /*
1624    It is not enough to say just "ISO2022" on encoding, we have to
1625    specify more details.  In Emacs, each coding system of ISO2022
1626    variant has the following specifications:
1627         1. Initial designation to G0 thru G3.
1628         2. Allows short-form designation?
1629         3. ASCII should be designated to G0 before control characters?
1630         4. ASCII should be designated to G0 at end of line?
1631         5. 7-bit environment or 8-bit environment?
1632         6. Use locking-shift?
1633         7. Use Single-shift?
1634    And the following two are only for Japanese:
1635         8. Use ASCII in place of JIS0201-1976-Roman?
1636         9. Use JISX0208-1983 in place of JISX0208-1978?
1637    These specifications are encoded in `coding->flags' as flag bits
1638    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1639    details.
1640 */
1641
1642 /* Produce codes (escape sequence) for designating CHARSET to graphic
1643    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1644    '@', 'A', or 'B' and the coding system CODING allows, produce
1645    designation sequence of short-form.  */
1646
1647 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1648   do {                                                                  \
1649     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1650     char *intermediate_char_94 = "()*+";                                \
1651     char *intermediate_char_96 = ",-./";                                \
1652     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1653                                                                         \
1654     if (revision < 255)                                                 \
1655       {                                                                 \
1656         *dst++ = ISO_CODE_ESC;                                          \
1657         *dst++ = '&';                                                   \
1658         *dst++ = '@' + revision;                                        \
1659       }                                                                 \
1660     *dst++ = ISO_CODE_ESC;                                              \
1661     if (CHARSET_DIMENSION (charset) == 1)                               \
1662       {                                                                 \
1663         if (CHARSET_CHARS (charset) == 94)                              \
1664           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1665         else                                                            \
1666           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1667       }                                                                 \
1668     else                                                                \
1669       {                                                                 \
1670         *dst++ = '$';                                                   \
1671         if (CHARSET_CHARS (charset) == 94)                              \
1672           {                                                             \
1673             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1674                 || reg != 0                                             \
1675                 || final_char < '@' || final_char > 'B')                \
1676               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1677           }                                                             \
1678         else                                                            \
1679           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1680       }                                                                 \
1681     *dst++ = final_char;                                                \
1682     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1683   } while (0)
1684
1685 /* The following two macros produce codes (control character or escape
1686    sequence) for ISO2022 single-shift functions (single-shift-2 and
1687    single-shift-3).  */
1688
1689 #define ENCODE_SINGLE_SHIFT_2                           \
1690   do {                                                  \
1691     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1692       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1693     else                                                \
1694       *dst++ = ISO_CODE_SS2;                            \
1695     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1696   } while (0)
1697
1698 #define ENCODE_SINGLE_SHIFT_3                           \
1699   do {                                                  \
1700     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1701       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1702     else                                                \
1703       *dst++ = ISO_CODE_SS3;                            \
1704     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1705   } while (0)
1706
1707 /* The following four macros produce codes (control character or
1708    escape sequence) for ISO2022 locking-shift functions (shift-in,
1709    shift-out, locking-shift-2, and locking-shift-3).  */
1710
1711 #define ENCODE_SHIFT_IN                         \
1712   do {                                          \
1713     *dst++ = ISO_CODE_SI;                       \
1714     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1715   } while (0)
1716
1717 #define ENCODE_SHIFT_OUT                        \
1718   do {                                          \
1719     *dst++ = ISO_CODE_SO;                       \
1720     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1721   } while (0)
1722
1723 #define ENCODE_LOCKING_SHIFT_2                  \
1724   do {                                          \
1725     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1726     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1727   } while (0)
1728
1729 #define ENCODE_LOCKING_SHIFT_3                  \
1730   do {                                          \
1731     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1732     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1733   } while (0)
1734
1735 /* Produce codes for a DIMENSION1 character whose character set is
1736    CHARSET and whose position-code is C1.  Designation and invocation
1737    sequences are also produced in advance if necessary.  */
1738
1739 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1740   do {                                                                  \
1741     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1742       {                                                                 \
1743         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1744           *dst++ = c1 & 0x7F;                                           \
1745         else                                                            \
1746           *dst++ = c1 | 0x80;                                           \
1747         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1748         break;                                                          \
1749       }                                                                 \
1750     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1751       {                                                                 \
1752         *dst++ = c1 & 0x7F;                                             \
1753         break;                                                          \
1754       }                                                                 \
1755     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1756       {                                                                 \
1757         *dst++ = c1 | 0x80;                                             \
1758         break;                                                          \
1759       }                                                                 \
1760     else                                                                \
1761       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1762          must invoke it, or, at first, designate it to some graphic     \
1763          register.  Then repeat the loop to actually produce the        \
1764          character.  */                                                 \
1765       dst = encode_invocation_designation (charset, coding, dst);       \
1766   } while (1)
1767
1768 /* Produce codes for a DIMENSION2 character whose character set is
1769    CHARSET and whose position-codes are C1 and C2.  Designation and
1770    invocation codes are also produced in advance if necessary.  */
1771
1772 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1773   do {                                                                  \
1774     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1775       {                                                                 \
1776         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1777           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1778         else                                                            \
1779           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1780         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1781         break;                                                          \
1782       }                                                                 \
1783     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1784       {                                                                 \
1785         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1786         break;                                                          \
1787       }                                                                 \
1788     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1789       {                                                                 \
1790         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1791         break;                                                          \
1792       }                                                                 \
1793     else                                                                \
1794       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1795          must invoke it, or, at first, designate it to some graphic     \
1796          register.  Then repeat the loop to actually produce the        \
1797          character.  */                                                 \
1798       dst = encode_invocation_designation (charset, coding, dst);       \
1799   } while (1)
1800
1801 #define ENCODE_ISO_CHARACTER(c)                                 \
1802   do {                                                          \
1803     int charset, c1, c2;                                        \
1804                                                                 \
1805     SPLIT_CHAR (c, charset, c1, c2);                            \
1806     if (CHARSET_DEFINED_P (charset))                            \
1807       {                                                         \
1808         if (CHARSET_DIMENSION (charset) == 1)                   \
1809           {                                                     \
1810             if (charset == CHARSET_ASCII                        \
1811                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1812               charset = charset_latin_jisx0201;                 \
1813             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1814           }                                                     \
1815         else                                                    \
1816           {                                                     \
1817             if (charset == charset_jisx0208                     \
1818                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1819               charset = charset_jisx0208_1978;                  \
1820             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1821           }                                                     \
1822       }                                                         \
1823     else                                                        \
1824       {                                                         \
1825         *dst++ = c1;                                            \
1826         if (c2 >= 0)                                            \
1827           *dst++ = c2;                                          \
1828       }                                                         \
1829   } while (0)
1830
1831
1832 /* Instead of encoding character C, produce one or two `?'s.  */
1833
1834 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1835   do {                                                                  \
1836     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1837     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1838       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1839   } while (0)
1840
1841
1842 /* Produce designation and invocation codes at a place pointed by DST
1843    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1844    Return new DST.  */
1845
1846 unsigned char *
1847 encode_invocation_designation (charset, coding, dst)
1848      int charset;
1849      struct coding_system *coding;
1850      unsigned char *dst;
1851 {
1852   int reg;                      /* graphic register number */
1853
1854   /* At first, check designations.  */
1855   for (reg = 0; reg < 4; reg++)
1856     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1857       break;
1858
1859   if (reg >= 4)
1860     {
1861       /* CHARSET is not yet designated to any graphic registers.  */
1862       /* At first check the requested designation.  */
1863       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1864       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1865         /* Since CHARSET requests no special designation, designate it
1866            to graphic register 0.  */
1867         reg = 0;
1868
1869       ENCODE_DESIGNATION (charset, reg, coding);
1870     }
1871
1872   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1873       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1874     {
1875       /* Since the graphic register REG is not invoked to any graphic
1876          planes, invoke it to graphic plane 0.  */
1877       switch (reg)
1878         {
1879         case 0:                 /* graphic register 0 */
1880           ENCODE_SHIFT_IN;
1881           break;
1882
1883         case 1:                 /* graphic register 1 */
1884           ENCODE_SHIFT_OUT;
1885           break;
1886
1887         case 2:                 /* graphic register 2 */
1888           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1889             ENCODE_SINGLE_SHIFT_2;
1890           else
1891             ENCODE_LOCKING_SHIFT_2;
1892           break;
1893
1894         case 3:                 /* graphic register 3 */
1895           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1896             ENCODE_SINGLE_SHIFT_3;
1897           else
1898             ENCODE_LOCKING_SHIFT_3;
1899           break;
1900         }
1901     }
1902
1903   return dst;
1904 }
1905
1906 /* Produce 2-byte codes for encoded composition rule RULE.  */
1907
1908 #define ENCODE_COMPOSITION_RULE(rule)           \
1909   do {                                          \
1910     int gref, nref;                             \
1911     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1912     *dst++ = 32 + 81 + gref;                    \
1913     *dst++ = 32 + nref;                         \
1914   } while (0)
1915
1916 /* Produce codes for indicating the start of a composition sequence
1917    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1918    which specify information about the composition.  See the comment
1919    in coding.h for the format of DATA.  */
1920
1921 #define ENCODE_COMPOSITION_START(coding, data)                          \
1922   do {                                                                  \
1923     coding->composing = data[3];                                        \
1924     *dst++ = ISO_CODE_ESC;                                              \
1925     if (coding->composing == COMPOSITION_RELATIVE)                      \
1926       *dst++ = '0';                                                     \
1927     else                                                                \
1928       {                                                                 \
1929         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1930                   ? '3' : '4');                                         \
1931         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1932         coding->composition_rule_follows = 0;                           \
1933       }                                                                 \
1934   } while (0)
1935
1936 /* Produce codes for indicating the end of the current composition.  */
1937
1938 #define ENCODE_COMPOSITION_END(coding, data)                    \
1939   do {                                                          \
1940     *dst++ = ISO_CODE_ESC;                                      \
1941     *dst++ = '1';                                               \
1942     coding->cmp_data_start += data[0];                          \
1943     coding->composing = COMPOSITION_NO;                         \
1944     if (coding->cmp_data_start == coding->cmp_data->used        \
1945         && coding->cmp_data->next)                              \
1946       {                                                         \
1947         coding->cmp_data = coding->cmp_data->next;              \
1948         coding->cmp_data_start = 0;                             \
1949       }                                                         \
1950   } while (0)
1951
1952 /* Produce composition start sequence ESC 0.  Here, this sequence
1953    doesn't mean the start of a new composition but means that we have
1954    just produced components (alternate chars and composition rules) of
1955    the composition and the actual text follows in SRC.  */
1956
1957 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1958   do {                                          \
1959     *dst++ = ISO_CODE_ESC;                      \
1960     *dst++ = '0';                               \
1961     coding->composing = COMPOSITION_RELATIVE;   \
1962   } while (0)
1963
1964 /* The following three macros produce codes for indicating direction
1965    of text.  */
1966 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1967   do {                                                  \
1968     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1969       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1970     else                                                \
1971       *dst++ = ISO_CODE_CSI;                            \
1972   } while (0)
1973
1974 #define ENCODE_DIRECTION_R2L    \
1975   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1976
1977 #define ENCODE_DIRECTION_L2R    \
1978   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1979
1980 /* Produce codes for designation and invocation to reset the graphic
1981    planes and registers to initial state.  */
1982 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1983   do {                                                                      \
1984     int reg;                                                                \
1985     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1986       ENCODE_SHIFT_IN;                                                      \
1987     for (reg = 0; reg < 4; reg++)                                           \
1988       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1989           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1990               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1991         ENCODE_DESIGNATION                                                  \
1992           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1993   } while (0)
1994
1995 /* Produce designation sequences of charsets in the line started from
1996    SRC to a place pointed by DST, and return updated DST.
1997
1998    If the current block ends before any end-of-line, we may fail to
1999    find all the necessary designations.  */
2000
2001 static unsigned char *
2002 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2003      struct coding_system *coding;
2004      Lisp_Object translation_table;
2005      unsigned char *src, *src_end, *dst;
2006 {
2007   int charset, c, found = 0, reg;
2008   /* Table of charsets to be designated to each graphic register.  */
2009   int r[4];
2010
2011   for (reg = 0; reg < 4; reg++)
2012     r[reg] = -1;
2013
2014   while (found < 4)
2015     {
2016       ONE_MORE_CHAR (c);
2017       if (c == '\n')
2018         break;
2019
2020       charset = CHAR_CHARSET (c);
2021       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2022       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2023         {
2024           found++;
2025           r[reg] = charset;
2026         }
2027     }
2028
2029  label_end_of_loop:
2030   if (found)
2031     {
2032       for (reg = 0; reg < 4; reg++)
2033         if (r[reg] >= 0
2034             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2035           ENCODE_DESIGNATION (r[reg], reg, coding);
2036     }
2037
2038   return dst;
2039 }
2040
2041 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2042
2043 static void
2044 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2045      struct coding_system *coding;
2046      unsigned char *source, *destination;
2047      int src_bytes, dst_bytes;
2048 {
2049   unsigned char *src = source;
2050   unsigned char *src_end = source + src_bytes;
2051   unsigned char *dst = destination;
2052   unsigned char *dst_end = destination + dst_bytes;
2053   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2054      from DST_END to assure overflow checking is necessary only at the
2055      head of loop.  */
2056   unsigned char *adjusted_dst_end = dst_end - 19;
2057   /* SRC_BASE remembers the start position in source in each loop.
2058      The loop will be exited when there's not enough source text to
2059      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2060      there's not enough destination area to produce encoded codes
2061      (within macro EMIT_BYTES).  */
2062   unsigned char *src_base;
2063   int c;
2064   Lisp_Object translation_table;
2065   Lisp_Object safe_chars;
2066
2067   safe_chars = coding_safe_chars (coding);
2068
2069   if (NILP (Venable_character_translation))
2070     translation_table = Qnil;
2071   else
2072     {
2073       translation_table = coding->translation_table_for_encode;
2074       if (NILP (translation_table))
2075         translation_table = Vstandard_translation_table_for_encode;
2076     }
2077
2078   coding->consumed_char = 0;
2079   coding->errors = 0;
2080   while (1)
2081     {
2082       src_base = src;
2083
2084       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2085         {
2086           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2087           break;
2088         }
2089
2090       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2091           && CODING_SPEC_ISO_BOL (coding))
2092         {
2093           /* We have to produce designation sequences if any now.  */
2094           dst = encode_designation_at_bol (coding, translation_table,
2095                                            src, src_end, dst);
2096           CODING_SPEC_ISO_BOL (coding) = 0;
2097         }
2098
2099       /* Check composition start and end.  */
2100       if (coding->composing != COMPOSITION_DISABLED
2101           && coding->cmp_data_start < coding->cmp_data->used)
2102         {
2103           struct composition_data *cmp_data = coding->cmp_data;
2104           int *data = cmp_data->data + coding->cmp_data_start;
2105           int this_pos = cmp_data->char_offset + coding->consumed_char;
2106
2107           if (coding->composing == COMPOSITION_RELATIVE)
2108             {
2109               if (this_pos == data[2])
2110                 {
2111                   ENCODE_COMPOSITION_END (coding, data);
2112                   cmp_data = coding->cmp_data;
2113                   data = cmp_data->data + coding->cmp_data_start;
2114                 }
2115             }
2116           else if (COMPOSING_P (coding))
2117             {
2118               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2119               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2120                 /* We have consumed components of the composition.
2121                    What follows in SRC is the compositions's base
2122                    text.  */
2123                 ENCODE_COMPOSITION_FAKE_START (coding);
2124               else
2125                 {
2126                   int c = cmp_data->data[coding->cmp_data_index++];
2127                   if (coding->composition_rule_follows)
2128                     {
2129                       ENCODE_COMPOSITION_RULE (c);
2130                       coding->composition_rule_follows = 0;
2131                     }
2132                   else
2133                     {
2134                       if (coding->flags & CODING_FLAG_ISO_SAFE
2135                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2136                         ENCODE_UNSAFE_CHARACTER (c);
2137                       else
2138                         ENCODE_ISO_CHARACTER (c);
2139                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2140                         coding->composition_rule_follows = 1;
2141                     }
2142                   continue;
2143                 }
2144             }
2145           if (!COMPOSING_P (coding))
2146             {
2147               if (this_pos == data[1])
2148                 {
2149                   ENCODE_COMPOSITION_START (coding, data);
2150                   continue;
2151                 }
2152             }
2153         }
2154
2155       ONE_MORE_CHAR (c);
2156
2157       /* Now encode the character C.  */
2158       if (c < 0x20 || c == 0x7F)
2159         {
2160           if (c == '\r')
2161             {
2162               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2163                 {
2164                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2165                     ENCODE_RESET_PLANE_AND_REGISTER;
2166                   *dst++ = c;
2167                   continue;
2168                 }
2169               /* fall down to treat '\r' as '\n' ...  */
2170               c = '\n';
2171             }
2172           if (c == '\n')
2173             {
2174               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2175                 ENCODE_RESET_PLANE_AND_REGISTER;
2176               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2177                 bcopy (coding->spec.iso2022.initial_designation,
2178                        coding->spec.iso2022.current_designation,
2179                        sizeof coding->spec.iso2022.initial_designation);
2180               if (coding->eol_type == CODING_EOL_LF
2181                   || coding->eol_type == CODING_EOL_UNDECIDED)
2182                 *dst++ = ISO_CODE_LF;
2183               else if (coding->eol_type == CODING_EOL_CRLF)
2184                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2185               else
2186                 *dst++ = ISO_CODE_CR;
2187               CODING_SPEC_ISO_BOL (coding) = 1;
2188             }
2189           else
2190             {
2191               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2192                 ENCODE_RESET_PLANE_AND_REGISTER;
2193               *dst++ = c;
2194             }
2195         }
2196       else if (ASCII_BYTE_P (c))
2197         ENCODE_ISO_CHARACTER (c);
2198       else if (SINGLE_BYTE_CHAR_P (c))
2199         {
2200           *dst++ = c;
2201           coding->errors++;
2202         }
2203       else if (coding->flags & CODING_FLAG_ISO_SAFE
2204                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2205         ENCODE_UNSAFE_CHARACTER (c);
2206       else
2207         ENCODE_ISO_CHARACTER (c);
2208
2209       coding->consumed_char++;
2210     }
2211
2212  label_end_of_loop:
2213   coding->consumed = src_base - source;
2214   coding->produced = coding->produced_char = dst - destination;
2215 }
2216
2217 \f
2218 /*** 4. SJIS and BIG5 handlers ***/
2219
2220 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2221    quite widely.  So, for the moment, Emacs supports them in the bare
2222    C code.  But, in the future, they may be supported only by CCL.  */
2223
2224 /* SJIS is a coding system encoding three character sets: ASCII, right
2225    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2226    as is.  A character of charset katakana-jisx0201 is encoded by
2227    "position-code + 0x80".  A character of charset japanese-jisx0208
2228    is encoded in 2-byte but two position-codes are divided and shifted
2229    so that it fit in the range below.
2230
2231    --- CODE RANGE of SJIS ---
2232    (character set)      (range)
2233    ASCII                0x00 .. 0x7F
2234    KATAKANA-JISX0201    0xA0 .. 0xDF
2235    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2236             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2237    -------------------------------
2238
2239 */
2240
2241 /* BIG5 is a coding system encoding two character sets: ASCII and
2242    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2243    character set and is encoded in two-byte.
2244
2245    --- CODE RANGE of BIG5 ---
2246    (character set)      (range)
2247    ASCII                0x00 .. 0x7F
2248    Big5 (1st byte)      0xA1 .. 0xFE
2249         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2250    --------------------------
2251
2252    Since the number of characters in Big5 is larger than maximum
2253    characters in Emacs' charset (96x96), it can't be handled as one
2254    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2255    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2256    contains frequently used characters and the latter contains less
2257    frequently used characters.  */
2258
2259 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2260    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2261    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2262    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2263
2264 /* Number of Big5 characters which have the same code in 1st byte.  */
2265 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2266
2267 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2268   do {                                                                  \
2269     unsigned int temp                                                   \
2270       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2271     if (b1 < 0xC9)                                                      \
2272       charset = charset_big5_1;                                         \
2273     else                                                                \
2274       {                                                                 \
2275         charset = charset_big5_2;                                       \
2276         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2277       }                                                                 \
2278     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2279     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2280   } while (0)
2281
2282 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2283   do {                                                                  \
2284     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2285     if (charset == charset_big5_2)                                      \
2286       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2287     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2288     b2 = temp % BIG5_SAME_ROW;                                          \
2289     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2290   } while (0)
2291
2292 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2293    Check if a text is encoded in SJIS.  If it is, return
2294    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2295
2296 int
2297 detect_coding_sjis (src, src_end)
2298      unsigned char *src, *src_end;
2299 {
2300   int c;
2301   /* Dummy for ONE_MORE_BYTE.  */
2302   struct coding_system dummy_coding;
2303   struct coding_system *coding = &dummy_coding;
2304
2305   while (1)
2306     {
2307       ONE_MORE_BYTE (c);
2308       if (c >= 0x81)
2309         {
2310           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2311             {
2312               ONE_MORE_BYTE (c);
2313               if (c < 0x40 || c == 0x7F || c > 0xFC)
2314                 return 0;
2315             }
2316           else if (c > 0xDF)
2317             return 0;
2318         }
2319     }
2320  label_end_of_loop:
2321   return CODING_CATEGORY_MASK_SJIS;
2322 }
2323
2324 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2325    Check if a text is encoded in BIG5.  If it is, return
2326    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2327
2328 int
2329 detect_coding_big5 (src, src_end)
2330      unsigned char *src, *src_end;
2331 {
2332   int c;
2333   /* Dummy for ONE_MORE_BYTE.  */
2334   struct coding_system dummy_coding;
2335   struct coding_system *coding = &dummy_coding;
2336
2337   while (1)
2338     {
2339       ONE_MORE_BYTE (c);
2340       if (c >= 0xA1)
2341         {
2342           ONE_MORE_BYTE (c);
2343           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2344             return 0;
2345         }
2346     }
2347  label_end_of_loop:
2348   return CODING_CATEGORY_MASK_BIG5;
2349 }
2350
2351 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2352    Check if a text is encoded in UTF-8.  If it is, return
2353    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2354
2355 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2356 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2357 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2358 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2359 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2360 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2361 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2362
2363 int
2364 detect_coding_utf_8 (src, src_end)
2365      unsigned char *src, *src_end;
2366 {
2367   unsigned char c;
2368   int seq_maybe_bytes;
2369   /* Dummy for ONE_MORE_BYTE.  */
2370   struct coding_system dummy_coding;
2371   struct coding_system *coding = &dummy_coding;
2372
2373   while (1)
2374     {
2375       ONE_MORE_BYTE (c);
2376       if (UTF_8_1_OCTET_P (c))
2377         continue;
2378       else if (UTF_8_2_OCTET_LEADING_P (c))
2379         seq_maybe_bytes = 1;
2380       else if (UTF_8_3_OCTET_LEADING_P (c))
2381         seq_maybe_bytes = 2;
2382       else if (UTF_8_4_OCTET_LEADING_P (c))
2383         seq_maybe_bytes = 3;
2384       else if (UTF_8_5_OCTET_LEADING_P (c))
2385         seq_maybe_bytes = 4;
2386       else if (UTF_8_6_OCTET_LEADING_P (c))
2387         seq_maybe_bytes = 5;
2388       else
2389         return 0;
2390
2391       do
2392         {
2393           ONE_MORE_BYTE (c);
2394           if (!UTF_8_EXTRA_OCTET_P (c))
2395             return 0;
2396           seq_maybe_bytes--;
2397         }
2398       while (seq_maybe_bytes > 0);
2399     }
2400
2401  label_end_of_loop:
2402   return CODING_CATEGORY_MASK_UTF_8;
2403 }
2404
2405 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2406    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2407    Little Endian (otherwise).  If it is, return
2408    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2409    else return 0.  */
2410
2411 #define UTF_16_INVALID_P(val)   \
2412   (((val) == 0xFFFE)            \
2413    || ((val) == 0xFFFF))
2414
2415 #define UTF_16_HIGH_SURROGATE_P(val) \
2416   (((val) & 0xD800) == 0xD800)
2417
2418 #define UTF_16_LOW_SURROGATE_P(val) \
2419   (((val) & 0xDC00) == 0xDC00)
2420
2421 int
2422 detect_coding_utf_16 (src, src_end)
2423      unsigned char *src, *src_end;
2424 {
2425   unsigned char c1, c2;
2426   /* Dummy for TWO_MORE_BYTES.  */
2427   struct coding_system dummy_coding;
2428   struct coding_system *coding = &dummy_coding;
2429
2430   TWO_MORE_BYTES (c1, c2);
2431
2432   if ((c1 == 0xFF) && (c2 == 0xFE))
2433     return CODING_CATEGORY_MASK_UTF_16_LE;
2434   else if ((c1 == 0xFE) && (c2 == 0xFF))
2435     return CODING_CATEGORY_MASK_UTF_16_BE;
2436
2437  label_end_of_loop:
2438   return 0;
2439 }
2440
2441 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2442    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2443
2444 static void
2445 decode_coding_sjis_big5 (coding, source, destination,
2446                          src_bytes, dst_bytes, sjis_p)
2447      struct coding_system *coding;
2448      unsigned char *source, *destination;
2449      int src_bytes, dst_bytes;
2450      int sjis_p;
2451 {
2452   unsigned char *src = source;
2453   unsigned char *src_end = source + src_bytes;
2454   unsigned char *dst = destination;
2455   unsigned char *dst_end = destination + dst_bytes;
2456   /* SRC_BASE remembers the start position in source in each loop.
2457      The loop will be exited when there's not enough source code
2458      (within macro ONE_MORE_BYTE), or when there's not enough
2459      destination area to produce a character (within macro
2460      EMIT_CHAR).  */
2461   unsigned char *src_base;
2462   Lisp_Object translation_table;
2463
2464   if (NILP (Venable_character_translation))
2465     translation_table = Qnil;
2466   else
2467     {
2468       translation_table = coding->translation_table_for_decode;
2469       if (NILP (translation_table))
2470         translation_table = Vstandard_translation_table_for_decode;
2471     }
2472
2473   coding->produced_char = 0;
2474   while (1)
2475     {
2476       int c, charset, c1, c2;
2477
2478       src_base = src;
2479       ONE_MORE_BYTE (c1);
2480
2481       if (c1 < 0x80)
2482         {
2483           charset = CHARSET_ASCII;
2484           if (c1 < 0x20)
2485             {
2486               if (c1 == '\r')
2487                 {
2488                   if (coding->eol_type == CODING_EOL_CRLF)
2489                     {
2490                       ONE_MORE_BYTE (c2);
2491                       if (c2 == '\n')
2492                         c1 = c2;
2493                       else if (coding->mode
2494                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2495                         {
2496                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2497                           goto label_end_of_loop;
2498                         }
2499                       else
2500                         /* To process C2 again, SRC is subtracted by 1.  */
2501                         src--;
2502                     }
2503                   else if (coding->eol_type == CODING_EOL_CR)
2504                     c1 = '\n';
2505                 }
2506               else if (c1 == '\n'
2507                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2508                        && (coding->eol_type == CODING_EOL_CR
2509                            || coding->eol_type == CODING_EOL_CRLF))
2510                 {
2511                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2512                   goto label_end_of_loop;
2513                 }
2514             }
2515         }
2516       else
2517         {
2518           if (sjis_p)
2519             {
2520               if (c1 >= 0xF0)
2521                 goto label_invalid_code;
2522               if (c1 < 0xA0 || c1 >= 0xE0)
2523                 {
2524                   /* SJIS -> JISX0208 */
2525                   ONE_MORE_BYTE (c2);
2526                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2527                     goto label_invalid_code;
2528                   DECODE_SJIS (c1, c2, c1, c2);
2529                   charset = charset_jisx0208;
2530                 }
2531               else
2532                 /* SJIS -> JISX0201-Kana */
2533                 charset = charset_katakana_jisx0201;
2534             }
2535           else
2536             {
2537               /* BIG5 -> Big5 */
2538               if (c1 < 0xA1 || c1 > 0xFE)
2539                 goto label_invalid_code;
2540               ONE_MORE_BYTE (c2);
2541               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2542                 goto label_invalid_code;
2543               DECODE_BIG5 (c1, c2, charset, c1, c2);
2544             }
2545         }
2546
2547       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2548       EMIT_CHAR (c);
2549       continue;
2550
2551     label_invalid_code:
2552       coding->errors++;
2553       src = src_base;
2554       c = *src++;
2555       EMIT_CHAR (c);
2556     }
2557
2558  label_end_of_loop:
2559   coding->consumed = coding->consumed_char = src_base - source;
2560   coding->produced = dst - destination;
2561   return;
2562 }
2563
2564 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2565    This function can encode charsets `ascii', `katakana-jisx0201',
2566    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2567    are sure that all these charsets are registered as official charset
2568    (i.e. do not have extended leading-codes).  Characters of other
2569    charsets are produced without any encoding.  If SJIS_P is 1, encode
2570    SJIS text, else encode BIG5 text.  */
2571
2572 static void
2573 encode_coding_sjis_big5 (coding, source, destination,
2574                          src_bytes, dst_bytes, sjis_p)
2575      struct coding_system *coding;
2576      unsigned char *source, *destination;
2577      int src_bytes, dst_bytes;
2578      int sjis_p;
2579 {
2580   unsigned char *src = source;
2581   unsigned char *src_end = source + src_bytes;
2582   unsigned char *dst = destination;
2583   unsigned char *dst_end = destination + dst_bytes;
2584   /* SRC_BASE remembers the start position in source in each loop.
2585      The loop will be exited when there's not enough source text to
2586      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2587      there's not enough destination area to produce encoded codes
2588      (within macro EMIT_BYTES).  */
2589   unsigned char *src_base;
2590   Lisp_Object translation_table;
2591
2592   if (NILP (Venable_character_translation))
2593     translation_table = Qnil;
2594   else
2595     {
2596       translation_table = coding->translation_table_for_encode;
2597       if (NILP (translation_table))
2598         translation_table = Vstandard_translation_table_for_encode;
2599     }
2600
2601   while (1)
2602     {
2603       int c, charset, c1, c2;
2604
2605       src_base = src;
2606       ONE_MORE_CHAR (c);
2607
2608       /* Now encode the character C.  */
2609       if (SINGLE_BYTE_CHAR_P (c))
2610         {
2611           switch (c)
2612             {
2613             case '\r':
2614               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2615                 {
2616                   EMIT_ONE_BYTE (c);
2617                   break;
2618                 }
2619               c = '\n';
2620             case '\n':
2621               if (coding->eol_type == CODING_EOL_CRLF)
2622                 {
2623                   EMIT_TWO_BYTES ('\r', c);
2624                   break;
2625                 }
2626               else if (coding->eol_type == CODING_EOL_CR)
2627                 c = '\r';
2628             default:
2629               EMIT_ONE_BYTE (c);
2630             }
2631         }
2632       else
2633         {
2634           SPLIT_CHAR (c, charset, c1, c2);
2635           if (sjis_p)
2636             {
2637               if (charset == charset_jisx0208
2638                   || charset == charset_jisx0208_1978)
2639                 {
2640                   ENCODE_SJIS (c1, c2, c1, c2);
2641                   EMIT_TWO_BYTES (c1, c2);
2642                 }
2643               else if (charset == charset_katakana_jisx0201)
2644                 EMIT_ONE_BYTE (c1 | 0x80);
2645               else if (charset == charset_latin_jisx0201)
2646                 EMIT_ONE_BYTE (c1);
2647               else
2648                 /* There's no way other than producing the internal
2649                    codes as is.  */
2650                 EMIT_BYTES (src_base, src);
2651             }
2652           else
2653             {
2654               if (charset == charset_big5_1 || charset == charset_big5_2)
2655                 {
2656                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2657                   EMIT_TWO_BYTES (c1, c2);
2658                 }
2659               else
2660                 /* There's no way other than producing the internal
2661                    codes as is.  */
2662                 EMIT_BYTES (src_base, src);
2663             }
2664         }
2665       coding->consumed_char++;
2666     }
2667
2668  label_end_of_loop:
2669   coding->consumed = src_base - source;
2670   coding->produced = coding->produced_char = dst - destination;
2671 }
2672
2673 \f
2674 /*** 5. CCL handlers ***/
2675
2676 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2677    Check if a text is encoded in a coding system of which
2678    encoder/decoder are written in CCL program.  If it is, return
2679    CODING_CATEGORY_MASK_CCL, else return 0.  */
2680
2681 int
2682 detect_coding_ccl (src, src_end)
2683      unsigned char *src, *src_end;
2684 {
2685   unsigned char *valid;
2686   int c;
2687   /* Dummy for ONE_MORE_BYTE.  */
2688   struct coding_system dummy_coding;
2689   struct coding_system *coding = &dummy_coding;
2690
2691   /* No coding system is assigned to coding-category-ccl.  */
2692   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2693     return 0;
2694
2695   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2696   while (1)
2697     {
2698       ONE_MORE_BYTE (c);
2699       if (! valid[c])
2700         return 0;
2701     }
2702  label_end_of_loop:
2703   return CODING_CATEGORY_MASK_CCL;
2704 }
2705
2706 \f
2707 /*** 6. End-of-line handlers ***/
2708
2709 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2710
2711 static void
2712 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2713      struct coding_system *coding;
2714      unsigned char *source, *destination;
2715      int src_bytes, dst_bytes;
2716 {
2717   unsigned char *src = source;
2718   unsigned char *dst = destination;
2719   unsigned char *src_end = src + src_bytes;
2720   unsigned char *dst_end = dst + dst_bytes;
2721   Lisp_Object translation_table;
2722   /* SRC_BASE remembers the start position in source in each loop.
2723      The loop will be exited when there's not enough source code
2724      (within macro ONE_MORE_BYTE), or when there's not enough
2725      destination area to produce a character (within macro
2726      EMIT_CHAR).  */
2727   unsigned char *src_base;
2728   int c;
2729
2730   translation_table = Qnil;
2731   switch (coding->eol_type)
2732     {
2733     case CODING_EOL_CRLF:
2734       while (1)
2735         {
2736           src_base = src;
2737           ONE_MORE_BYTE (c);
2738           if (c == '\r')
2739             {
2740               ONE_MORE_BYTE (c);
2741               if (c != '\n')
2742                 {
2743                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2744                     {
2745                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2746                       goto label_end_of_loop;
2747                     }
2748                   src--;
2749                   c = '\r';
2750                 }
2751             }
2752           else if (c == '\n'
2753                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2754             {
2755               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2756               goto label_end_of_loop;
2757             }
2758           EMIT_CHAR (c);
2759         }
2760       break;
2761
2762     case CODING_EOL_CR:
2763       while (1)
2764         {
2765           src_base = src;
2766           ONE_MORE_BYTE (c);
2767           if (c == '\n')
2768             {
2769               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2770                 {
2771                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2772                   goto label_end_of_loop;
2773                 }
2774             }
2775           else if (c == '\r')
2776             c = '\n';
2777           EMIT_CHAR (c);
2778         }
2779       break;
2780
2781     default:                    /* no need for EOL handling */
2782       while (1)
2783         {
2784           src_base = src;
2785           ONE_MORE_BYTE (c);
2786           EMIT_CHAR (c);
2787         }
2788     }
2789
2790  label_end_of_loop:
2791   coding->consumed = coding->consumed_char = src_base - source;
2792   coding->produced = dst - destination;
2793   return;
2794 }
2795
2796 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2797    format of end-of-line according to `coding->eol_type'.  It also
2798    convert multibyte form 8-bit characers to unibyte if
2799    CODING->src_multibyte is nonzero.  If `coding->mode &
2800    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2801    also means end-of-line.  */
2802
2803 static void
2804 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2805      struct coding_system *coding;
2806      unsigned char *source, *destination;
2807      int src_bytes, dst_bytes;
2808 {
2809   unsigned char *src = source;
2810   unsigned char *dst = destination;
2811   unsigned char *src_end = src + src_bytes;
2812   unsigned char *dst_end = dst + dst_bytes;
2813   Lisp_Object translation_table;
2814   /* SRC_BASE remembers the start position in source in each loop.
2815      The loop will be exited when there's not enough source text to
2816      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2817      there's not enough destination area to produce encoded codes
2818      (within macro EMIT_BYTES).  */
2819   unsigned char *src_base;
2820   int c;
2821   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2822
2823   translation_table = Qnil;
2824   if (coding->src_multibyte
2825       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2826     {
2827       src_end--;
2828       src_bytes--;
2829       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2830     }
2831
2832   if (coding->eol_type == CODING_EOL_CRLF)
2833     {
2834       while (src < src_end)
2835         {
2836           src_base = src;
2837           c = *src++;
2838           if (c >= 0x20)
2839             EMIT_ONE_BYTE (c);
2840           else if (c == '\n' || (c == '\r' && selective_display))
2841             EMIT_TWO_BYTES ('\r', '\n');
2842           else
2843             EMIT_ONE_BYTE (c);
2844         }
2845       src_base = src;
2846     label_end_of_loop:
2847       ;
2848     }
2849   else
2850     {
2851       if (!dst_bytes || src_bytes <= dst_bytes)
2852         {
2853           safe_bcopy (src, dst, src_bytes);
2854           src_base = src_end;
2855           dst += src_bytes;
2856         }
2857       else
2858         {
2859           if (coding->src_multibyte
2860               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2861             dst_bytes--;
2862           safe_bcopy (src, dst, dst_bytes);
2863           src_base = src + dst_bytes;
2864           dst = destination + dst_bytes;
2865           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2866         }
2867       if (coding->eol_type == CODING_EOL_CR)
2868         {
2869           for (src = destination; src < dst; src++)
2870             if (*src == '\n') *src = '\r';
2871         }
2872       else if (selective_display)
2873         {
2874           for (src = destination; src < dst; src++)
2875             if (*src == '\r') *src = '\n';
2876         }
2877     }
2878   if (coding->src_multibyte)
2879     dst = destination + str_as_unibyte (destination, dst - destination);
2880
2881   coding->consumed = src_base - source;
2882   coding->produced = dst - destination;
2883   coding->produced_char = coding->produced;
2884 }
2885
2886 \f
2887 /*** 7. C library functions ***/
2888
2889 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2890    has a property `coding-system'.  The value of this property is a
2891    vector of length 5 (called as coding-vector).  Among elements of
2892    this vector, the first (element[0]) and the fifth (element[4])
2893    carry important information for decoding/encoding.  Before
2894    decoding/encoding, this information should be set in fields of a
2895    structure of type `coding_system'.
2896
2897    A value of property `coding-system' can be a symbol of another
2898    subsidiary coding-system.  In that case, Emacs gets coding-vector
2899    from that symbol.
2900
2901    `element[0]' contains information to be set in `coding->type'.  The
2902    value and its meaning is as follows:
2903
2904    0 -- coding_type_emacs_mule
2905    1 -- coding_type_sjis
2906    2 -- coding_type_iso2022
2907    3 -- coding_type_big5
2908    4 -- coding_type_ccl encoder/decoder written in CCL
2909    nil -- coding_type_no_conversion
2910    t -- coding_type_undecided (automatic conversion on decoding,
2911                                no-conversion on encoding)
2912
2913    `element[4]' contains information to be set in `coding->flags' and
2914    `coding->spec'.  The meaning varies by `coding->type'.
2915
2916    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2917    of length 32 (of which the first 13 sub-elements are used now).
2918    Meanings of these sub-elements are:
2919
2920    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2921         If the value is an integer of valid charset, the charset is
2922         assumed to be designated to graphic register N initially.
2923
2924         If the value is minus, it is a minus value of charset which
2925         reserves graphic register N, which means that the charset is
2926         not designated initially but should be designated to graphic
2927         register N just before encoding a character in that charset.
2928
2929         If the value is nil, graphic register N is never used on
2930         encoding.
2931
2932    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2933         Each value takes t or nil.  See the section ISO2022 of
2934         `coding.h' for more information.
2935
2936    If `coding->type' is `coding_type_big5', element[4] is t to denote
2937    BIG5-ETen or nil to denote BIG5-HKU.
2938
2939    If `coding->type' takes the other value, element[4] is ignored.
2940
2941    Emacs Lisp's coding system also carries information about format of
2942    end-of-line in a value of property `eol-type'.  If the value is
2943    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2944    means CODING_EOL_CR.  If it is not integer, it should be a vector
2945    of subsidiary coding systems of which property `eol-type' has one
2946    of above values.
2947
2948 */
2949
2950 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2951    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2952    is setup so that no conversion is necessary and return -1, else
2953    return 0.  */
2954
2955 int
2956 setup_coding_system (coding_system, coding)
2957      Lisp_Object coding_system;
2958      struct coding_system *coding;
2959 {
2960   Lisp_Object coding_spec, coding_type, eol_type, plist;
2961   Lisp_Object val;
2962   int i;
2963
2964   /* Initialize some fields required for all kinds of coding systems.  */
2965   coding->symbol = coding_system;
2966   coding->common_flags = 0;
2967   coding->mode = 0;
2968   coding->heading_ascii = -1;
2969   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2970   coding->composing = COMPOSITION_DISABLED;
2971   coding->cmp_data = NULL;
2972
2973   if (NILP (coding_system))
2974     goto label_invalid_coding_system;
2975
2976   coding_spec = Fget (coding_system, Qcoding_system);
2977
2978   if (!VECTORP (coding_spec)
2979       || XVECTOR (coding_spec)->size != 5
2980       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2981     goto label_invalid_coding_system;
2982
2983   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2984   if (VECTORP (eol_type))
2985     {
2986       coding->eol_type = CODING_EOL_UNDECIDED;
2987       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2988     }
2989   else if (XFASTINT (eol_type) == 1)
2990     {
2991       coding->eol_type = CODING_EOL_CRLF;
2992       coding->common_flags
2993         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2994     }
2995   else if (XFASTINT (eol_type) == 2)
2996     {
2997       coding->eol_type = CODING_EOL_CR;
2998       coding->common_flags
2999         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3000     }
3001   else
3002     coding->eol_type = CODING_EOL_LF;
3003
3004   coding_type = XVECTOR (coding_spec)->contents[0];
3005   /* Try short cut.  */
3006   if (SYMBOLP (coding_type))
3007     {
3008       if (EQ (coding_type, Qt))
3009         {
3010           coding->type = coding_type_undecided;
3011           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3012         }
3013       else
3014         coding->type = coding_type_no_conversion;
3015       return 0;
3016     }
3017
3018   /* Get values of coding system properties:
3019      `post-read-conversion', `pre-write-conversion',
3020      `translation-table-for-decode', `translation-table-for-encode'.  */
3021   plist = XVECTOR (coding_spec)->contents[3];
3022   /* Pre & post conversion functions should be disabled if
3023      inhibit_eol_conversion is nozero.  This is the case that a code
3024      conversion function is called while those functions are running.  */
3025   if (! inhibit_pre_post_conversion)
3026     {
3027       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3028       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3029     }
3030   val = Fplist_get (plist, Qtranslation_table_for_decode);
3031   if (SYMBOLP (val))
3032     val = Fget (val, Qtranslation_table_for_decode);
3033   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3034   val = Fplist_get (plist, Qtranslation_table_for_encode);
3035   if (SYMBOLP (val))
3036     val = Fget (val, Qtranslation_table_for_encode);
3037   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3038   val = Fplist_get (plist, Qcoding_category);
3039   if (!NILP (val))
3040     {
3041       val = Fget (val, Qcoding_category_index);
3042       if (INTEGERP (val))
3043         coding->category_idx = XINT (val);
3044       else
3045         goto label_invalid_coding_system;
3046     }
3047   else
3048     goto label_invalid_coding_system;
3049
3050   /* If the coding system has non-nil `composition' property, enable
3051      composition handling.  */
3052   val = Fplist_get (plist, Qcomposition);
3053   if (!NILP (val))
3054     coding->composing = COMPOSITION_NO;
3055
3056   switch (XFASTINT (coding_type))
3057     {
3058     case 0:
3059       coding->type = coding_type_emacs_mule;
3060       if (!NILP (coding->post_read_conversion))
3061         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3062       if (!NILP (coding->pre_write_conversion))
3063         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3064       break;
3065
3066     case 1:
3067       coding->type = coding_type_sjis;
3068       coding->common_flags
3069         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3070       break;
3071
3072     case 2:
3073       coding->type = coding_type_iso2022;
3074       coding->common_flags
3075         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3076       {
3077         Lisp_Object val, temp;
3078         Lisp_Object *flags;
3079         int i, charset, reg_bits = 0;
3080
3081         val = XVECTOR (coding_spec)->contents[4];
3082
3083         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3084           goto label_invalid_coding_system;
3085
3086         flags = XVECTOR (val)->contents;
3087         coding->flags
3088           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3089              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3090              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3091              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3092              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3093              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3094              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3095              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3096              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3097              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3098              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3099              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3100              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3101              );
3102
3103         /* Invoke graphic register 0 to plane 0.  */
3104         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3105         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3106         CODING_SPEC_ISO_INVOCATION (coding, 1)
3107           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3108         /* Not single shifting at first.  */
3109         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3110         /* Beginning of buffer should also be regarded as bol. */
3111         CODING_SPEC_ISO_BOL (coding) = 1;
3112
3113         for (charset = 0; charset <= MAX_CHARSET; charset++)
3114           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3115         val = Vcharset_revision_alist;
3116         while (CONSP (val))
3117           {
3118             charset = get_charset_id (Fcar_safe (XCAR (val)));
3119             if (charset >= 0
3120                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3121                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3122               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3123             val = XCDR (val);
3124           }
3125
3126         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3127            FLAGS[REG] can be one of below:
3128                 integer CHARSET: CHARSET occupies register I,
3129                 t: designate nothing to REG initially, but can be used
3130                   by any charsets,
3131                 list of integer, nil, or t: designate the first
3132                   element (if integer) to REG initially, the remaining
3133                   elements (if integer) is designated to REG on request,
3134                   if an element is t, REG can be used by any charsets,
3135                 nil: REG is never used.  */
3136         for (charset = 0; charset <= MAX_CHARSET; charset++)
3137           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3138             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3139         for (i = 0; i < 4; i++)
3140           {
3141             if (INTEGERP (flags[i])
3142                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3143                 || (charset = get_charset_id (flags[i])) >= 0)
3144               {
3145                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3146                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3147               }
3148             else if (EQ (flags[i], Qt))
3149               {
3150                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3151                 reg_bits |= 1 << i;
3152                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3153               }
3154             else if (CONSP (flags[i]))
3155               {
3156                 Lisp_Object tail;
3157                 tail = flags[i];
3158
3159                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3160                 if (INTEGERP (XCAR (tail))
3161                     && (charset = XINT (XCAR (tail)),
3162                         CHARSET_VALID_P (charset))
3163                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3164                   {
3165                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3166                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3167                   }
3168                 else
3169                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3170                 tail = XCDR (tail);
3171                 while (CONSP (tail))
3172                   {
3173                     if (INTEGERP (XCAR (tail))
3174                         && (charset = XINT (XCAR (tail)),
3175                             CHARSET_VALID_P (charset))
3176                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3177                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3178                         = i;
3179                     else if (EQ (XCAR (tail), Qt))
3180                       reg_bits |= 1 << i;
3181                     tail = XCDR (tail);
3182                   }
3183               }
3184             else
3185               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3186
3187             CODING_SPEC_ISO_DESIGNATION (coding, i)
3188               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3189           }
3190
3191         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3192           {
3193             /* REG 1 can be used only by locking shift in 7-bit env.  */
3194             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3195               reg_bits &= ~2;
3196             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3197               /* Without any shifting, only REG 0 and 1 can be used.  */
3198               reg_bits &= 3;
3199           }
3200
3201         if (reg_bits)
3202           for (charset = 0; charset <= MAX_CHARSET; charset++)
3203             {
3204               if (CHARSET_VALID_P (charset)
3205                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3206                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3207                 {
3208                   /* There exist some default graphic registers to be
3209                      used by CHARSET.  */
3210
3211                   /* We had better avoid designating a charset of
3212                      CHARS96 to REG 0 as far as possible.  */
3213                   if (CHARSET_CHARS (charset) == 96)
3214                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3215                       = (reg_bits & 2
3216                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3217                   else
3218                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3219                       = (reg_bits & 1
3220                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3221                 }
3222             }
3223       }
3224       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3225       coding->spec.iso2022.last_invalid_designation_register = -1;
3226       break;
3227
3228     case 3:
3229       coding->type = coding_type_big5;
3230       coding->common_flags
3231         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3232       coding->flags
3233         = (NILP (XVECTOR (coding_spec)->contents[4])
3234            ? CODING_FLAG_BIG5_HKU
3235            : CODING_FLAG_BIG5_ETEN);
3236       break;
3237
3238     case 4:
3239       coding->type = coding_type_ccl;
3240       coding->common_flags
3241         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3242       {
3243         val = XVECTOR (coding_spec)->contents[4];
3244         if (! CONSP (val)
3245             || setup_ccl_program (&(coding->spec.ccl.decoder),
3246                                   XCAR (val)) < 0
3247             || setup_ccl_program (&(coding->spec.ccl.encoder),
3248                                   XCDR (val)) < 0)
3249           goto label_invalid_coding_system;
3250
3251         bzero (coding->spec.ccl.valid_codes, 256);
3252         val = Fplist_get (plist, Qvalid_codes);
3253         if (CONSP (val))
3254           {
3255             Lisp_Object this;
3256
3257             for (; CONSP (val); val = XCDR (val))
3258               {
3259                 this = XCAR (val);
3260                 if (INTEGERP (this)
3261                     && XINT (this) >= 0 && XINT (this) < 256)
3262                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3263                 else if (CONSP (this)
3264                          && INTEGERP (XCAR (this))
3265                          && INTEGERP (XCDR (this)))
3266                   {
3267                     int start = XINT (XCAR (this));
3268                     int end = XINT (XCDR (this));
3269
3270                     if (start >= 0 && start <= end && end < 256)
3271                       while (start <= end)
3272                         coding->spec.ccl.valid_codes[start++] = 1;
3273                   }
3274               }
3275           }
3276       }
3277       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3278       coding->spec.ccl.cr_carryover = 0;
3279       break;
3280
3281     case 5:
3282       coding->type = coding_type_raw_text;
3283       break;
3284
3285     default:
3286       goto label_invalid_coding_system;
3287     }
3288   return 0;
3289
3290  label_invalid_coding_system:
3291   coding->type = coding_type_no_conversion;
3292   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3293   coding->common_flags = 0;
3294   coding->eol_type = CODING_EOL_LF;
3295   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3296   return -1;
3297 }
3298
3299 /* Free memory blocks allocated for storing composition information.  */
3300
3301 void
3302 coding_free_composition_data (coding)
3303      struct coding_system *coding;
3304 {
3305   struct composition_data *cmp_data = coding->cmp_data, *next;
3306
3307   if (!cmp_data)
3308     return;
3309   /* Memory blocks are chained.  At first, rewind to the first, then,
3310      free blocks one by one.  */
3311   while (cmp_data->prev)
3312     cmp_data = cmp_data->prev;
3313   while (cmp_data)
3314     {
3315       next = cmp_data->next;
3316       xfree (cmp_data);
3317       cmp_data = next;
3318     }
3319   coding->cmp_data = NULL;
3320 }
3321
3322 /* Set `char_offset' member of all memory blocks pointed by
3323    coding->cmp_data to POS.  */
3324
3325 void
3326 coding_adjust_composition_offset (coding, pos)
3327      struct coding_system *coding;
3328      int pos;
3329 {
3330   struct composition_data *cmp_data;
3331
3332   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3333     cmp_data->char_offset = pos;
3334 }
3335
3336 /* Setup raw-text or one of its subsidiaries in the structure
3337    coding_system CODING according to the already setup value eol_type
3338    in CODING.  CODING should be setup for some coding system in
3339    advance.  */
3340
3341 void
3342 setup_raw_text_coding_system (coding)
3343      struct coding_system *coding;
3344 {
3345   if (coding->type != coding_type_raw_text)
3346     {
3347       coding->symbol = Qraw_text;
3348       coding->type = coding_type_raw_text;
3349       if (coding->eol_type != CODING_EOL_UNDECIDED)
3350         {
3351           Lisp_Object subsidiaries;
3352           subsidiaries = Fget (Qraw_text, Qeol_type);
3353
3354           if (VECTORP (subsidiaries)
3355               && XVECTOR (subsidiaries)->size == 3)
3356             coding->symbol
3357               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3358         }
3359       setup_coding_system (coding->symbol, coding);
3360     }
3361   return;
3362 }
3363
3364 /* Emacs has a mechanism to automatically detect a coding system if it
3365    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3366    it's impossible to distinguish some coding systems accurately
3367    because they use the same range of codes.  So, at first, coding
3368    systems are categorized into 7, those are:
3369
3370    o coding-category-emacs-mule
3371
3372         The category for a coding system which has the same code range
3373         as Emacs' internal format.  Assigned the coding-system (Lisp
3374         symbol) `emacs-mule' by default.
3375
3376    o coding-category-sjis
3377
3378         The category for a coding system which has the same code range
3379         as SJIS.  Assigned the coding-system (Lisp
3380         symbol) `japanese-shift-jis' by default.
3381
3382    o coding-category-iso-7
3383
3384         The category for a coding system which has the same code range
3385         as ISO2022 of 7-bit environment.  This doesn't use any locking
3386         shift and single shift functions.  This can encode/decode all
3387         charsets.  Assigned the coding-system (Lisp symbol)
3388         `iso-2022-7bit' by default.
3389
3390    o coding-category-iso-7-tight
3391
3392         Same as coding-category-iso-7 except that this can
3393         encode/decode only the specified charsets.
3394
3395    o coding-category-iso-8-1
3396
3397         The category for a coding system which has the same code range
3398         as ISO2022 of 8-bit environment and graphic plane 1 used only
3399         for DIMENSION1 charset.  This doesn't use any locking shift
3400         and single shift functions.  Assigned the coding-system (Lisp
3401         symbol) `iso-latin-1' by default.
3402
3403    o coding-category-iso-8-2
3404
3405         The category for a coding system which has the same code range
3406         as ISO2022 of 8-bit environment and graphic plane 1 used only
3407         for DIMENSION2 charset.  This doesn't use any locking shift
3408         and single shift functions.  Assigned the coding-system (Lisp
3409         symbol) `japanese-iso-8bit' by default.
3410
3411    o coding-category-iso-7-else
3412
3413         The category for a coding system which has the same code range
3414         as ISO2022 of 7-bit environemnt but uses locking shift or
3415         single shift functions.  Assigned the coding-system (Lisp
3416         symbol) `iso-2022-7bit-lock' by default.
3417
3418    o coding-category-iso-8-else
3419
3420         The category for a coding system which has the same code range
3421         as ISO2022 of 8-bit environemnt but uses locking shift or
3422         single shift functions.  Assigned the coding-system (Lisp
3423         symbol) `iso-2022-8bit-ss2' by default.
3424
3425    o coding-category-big5
3426
3427         The category for a coding system which has the same code range
3428         as BIG5.  Assigned the coding-system (Lisp symbol)
3429         `cn-big5' by default.
3430
3431    o coding-category-utf-8
3432
3433         The category for a coding system which has the same code range
3434         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3435         symbol) `utf-8' by default.
3436
3437    o coding-category-utf-16-be
3438
3439         The category for a coding system in which a text has an
3440         Unicode signature (cf. Unicode Standard) in the order of BIG
3441         endian at the head.  Assigned the coding-system (Lisp symbol)
3442         `utf-16-be' by default.
3443
3444    o coding-category-utf-16-le
3445
3446         The category for a coding system in which a text has an
3447         Unicode signature (cf. Unicode Standard) in the order of
3448         LITTLE endian at the head.  Assigned the coding-system (Lisp
3449         symbol) `utf-16-le' by default.
3450
3451    o coding-category-ccl
3452
3453         The category for a coding system of which encoder/decoder is
3454         written in CCL programs.  The default value is nil, i.e., no
3455         coding system is assigned.
3456
3457    o coding-category-binary
3458
3459         The category for a coding system not categorized in any of the
3460         above.  Assigned the coding-system (Lisp symbol)
3461         `no-conversion' by default.
3462
3463    Each of them is a Lisp symbol and the value is an actual
3464    `coding-system's (this is also a Lisp symbol) assigned by a user.
3465    What Emacs does actually is to detect a category of coding system.
3466    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3467    decide only one possible category, it selects a category of the
3468    highest priority.  Priorities of categories are also specified by a
3469    user in a Lisp variable `coding-category-list'.
3470
3471 */
3472
3473 static
3474 int ascii_skip_code[256];
3475
3476 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3477    If it detects possible coding systems, return an integer in which
3478    appropriate flag bits are set.  Flag bits are defined by macros
3479    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3480    it should point the table `coding_priorities'.  In that case, only
3481    the flag bit for a coding system of the highest priority is set in
3482    the returned value.
3483
3484    How many ASCII characters are at the head is returned as *SKIP.  */
3485
3486 static int
3487 detect_coding_mask (source, src_bytes, priorities, skip)
3488      unsigned char *source;
3489      int src_bytes, *priorities, *skip;
3490 {
3491   register unsigned char c;
3492   unsigned char *src = source, *src_end = source + src_bytes;
3493   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3494   int i, idx;
3495
3496   /* At first, skip all ASCII characters and control characters except
3497      for three ISO2022 specific control characters.  */
3498   ascii_skip_code[ISO_CODE_SO] = 0;
3499   ascii_skip_code[ISO_CODE_SI] = 0;
3500   ascii_skip_code[ISO_CODE_ESC] = 0;
3501
3502  label_loop_detect_coding:
3503   while (src < src_end && ascii_skip_code[*src]) src++;
3504   *skip = src - source;
3505
3506   if (src >= src_end)
3507     /* We found nothing other than ASCII.  There's nothing to do.  */
3508     return 0;
3509
3510   c = *src;
3511   /* The text seems to be encoded in some multilingual coding system.
3512      Now, try to find in which coding system the text is encoded.  */
3513   if (c < 0x80)
3514     {
3515       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3516       /* C is an ISO2022 specific control code of C0.  */
3517       mask = detect_coding_iso2022 (src, src_end);
3518       if (mask == 0)
3519         {
3520           /* No valid ISO2022 code follows C.  Try again.  */
3521           src++;
3522           if (c == ISO_CODE_ESC)
3523             ascii_skip_code[ISO_CODE_ESC] = 1;
3524           else
3525             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3526           goto label_loop_detect_coding;
3527         }
3528       if (priorities)
3529         {
3530           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3531             {
3532               if (mask & priorities[i])
3533                 return priorities[i];
3534             }
3535           return CODING_CATEGORY_MASK_RAW_TEXT;
3536         }
3537     }
3538   else
3539     {
3540       int try;
3541
3542       if (c < 0xA0)
3543         {
3544           /* C is the first byte of SJIS character code,
3545              or a leading-code of Emacs' internal format (emacs-mule),
3546              or the first byte of UTF-16.  */
3547           try = (CODING_CATEGORY_MASK_SJIS
3548                   | CODING_CATEGORY_MASK_EMACS_MULE
3549                   | CODING_CATEGORY_MASK_UTF_16_BE
3550                   | CODING_CATEGORY_MASK_UTF_16_LE);
3551
3552           /* Or, if C is a special latin extra code,
3553              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3554              or is an ISO2022 control-sequence-introducer (CSI),
3555              we should also consider the possibility of ISO2022 codings.  */
3556           if ((VECTORP (Vlatin_extra_code_table)
3557                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3558               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3559               || (c == ISO_CODE_CSI
3560                   && (src < src_end
3561                       && (*src == ']'
3562                           || ((*src == '0' || *src == '1' || *src == '2')
3563                               && src + 1 < src_end
3564                               && src[1] == ']')))))
3565             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3566                      | CODING_CATEGORY_MASK_ISO_8BIT);
3567         }
3568       else
3569         /* C is a character of ISO2022 in graphic plane right,
3570            or a SJIS's 1-byte character code (i.e. JISX0201),
3571            or the first byte of BIG5's 2-byte code,
3572            or the first byte of UTF-8/16.  */
3573         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3574                 | CODING_CATEGORY_MASK_ISO_8BIT
3575                 | CODING_CATEGORY_MASK_SJIS
3576                 | CODING_CATEGORY_MASK_BIG5
3577                 | CODING_CATEGORY_MASK_UTF_8
3578                 | CODING_CATEGORY_MASK_UTF_16_BE
3579                 | CODING_CATEGORY_MASK_UTF_16_LE);
3580
3581       /* Or, we may have to consider the possibility of CCL.  */
3582       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3583           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3584               ->spec.ccl.valid_codes)[c])
3585         try |= CODING_CATEGORY_MASK_CCL;
3586
3587       mask = 0;
3588       utf16_examined_p = iso2022_examined_p = 0;
3589       if (priorities)
3590         {
3591           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3592             {
3593               if (!iso2022_examined_p
3594                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3595                 {
3596                   mask |= detect_coding_iso2022 (src, src_end);
3597                   iso2022_examined_p = 1;
3598                 }
3599               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3600                 mask |= detect_coding_sjis (src, src_end);
3601               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3602                 mask |= detect_coding_utf_8 (src, src_end);
3603               else if (!utf16_examined_p
3604                        && (priorities[i] & try &
3605                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3606                 {
3607                   mask |= detect_coding_utf_16 (src, src_end);
3608                   utf16_examined_p = 1;
3609                 }
3610               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3611                 mask |= detect_coding_big5 (src, src_end);
3612               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3613                 mask |= detect_coding_emacs_mule (src, src_end);
3614               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3615                 mask |= detect_coding_ccl (src, src_end);
3616               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3617                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3618               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3619                 mask |= CODING_CATEGORY_MASK_BINARY;
3620               if (mask & priorities[i])
3621                 return priorities[i];
3622             }
3623           return CODING_CATEGORY_MASK_RAW_TEXT;
3624         }
3625       if (try & CODING_CATEGORY_MASK_ISO)
3626         mask |= detect_coding_iso2022 (src, src_end);
3627       if (try & CODING_CATEGORY_MASK_SJIS)
3628         mask |= detect_coding_sjis (src, src_end);
3629       if (try & CODING_CATEGORY_MASK_BIG5)
3630         mask |= detect_coding_big5 (src, src_end);
3631       if (try & CODING_CATEGORY_MASK_UTF_8)
3632         mask |= detect_coding_utf_8 (src, src_end);
3633       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3634         mask |= detect_coding_utf_16 (src, src_end);
3635       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3636         mask |= detect_coding_emacs_mule (src, src_end);
3637       if (try & CODING_CATEGORY_MASK_CCL)
3638         mask |= detect_coding_ccl (src, src_end);
3639     }
3640   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3641 }
3642
3643 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3644    The information of the detected coding system is set in CODING.  */
3645
3646 void
3647 detect_coding (coding, src, src_bytes)
3648      struct coding_system *coding;
3649      unsigned char *src;
3650      int src_bytes;
3651 {
3652   unsigned int idx;
3653   int skip, mask, i;
3654   Lisp_Object val;
3655
3656   val = Vcoding_category_list;
3657   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3658   coding->heading_ascii = skip;
3659
3660   if (!mask) return;
3661
3662   /* We found a single coding system of the highest priority in MASK.  */
3663   idx = 0;
3664   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3665   if (! mask)
3666     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3667
3668   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3669
3670   if (coding->eol_type != CODING_EOL_UNDECIDED)
3671     {
3672       Lisp_Object tmp;
3673
3674       tmp = Fget (val, Qeol_type);
3675       if (VECTORP (tmp))
3676         val = XVECTOR (tmp)->contents[coding->eol_type];
3677     }
3678
3679   /* Setup this new coding system while preserving some slots.  */
3680   {
3681     int src_multibyte = coding->src_multibyte;
3682     int dst_multibyte = coding->dst_multibyte;
3683
3684     setup_coding_system (val, coding);
3685     coding->src_multibyte = src_multibyte;
3686     coding->dst_multibyte = dst_multibyte;
3687     coding->heading_ascii = skip;
3688   }
3689 }
3690
3691 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3692    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3693    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3694
3695    How many non-eol characters are at the head is returned as *SKIP.  */
3696
3697 #define MAX_EOL_CHECK_COUNT 3
3698
3699 static int
3700 detect_eol_type (source, src_bytes, skip)
3701      unsigned char *source;
3702      int src_bytes, *skip;
3703 {
3704   unsigned char *src = source, *src_end = src + src_bytes;
3705   unsigned char c;
3706   int total = 0;                /* How many end-of-lines are found so far.  */
3707   int eol_type = CODING_EOL_UNDECIDED;
3708   int this_eol_type;
3709
3710   *skip = 0;
3711
3712   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3713     {
3714       c = *src++;
3715       if (c == '\n' || c == '\r')
3716         {
3717           if (*skip == 0)
3718             *skip = src - 1 - source;
3719           total++;
3720           if (c == '\n')
3721             this_eol_type = CODING_EOL_LF;
3722           else if (src >= src_end || *src != '\n')
3723             this_eol_type = CODING_EOL_CR;
3724           else
3725             this_eol_type = CODING_EOL_CRLF, src++;
3726
3727           if (eol_type == CODING_EOL_UNDECIDED)
3728             /* This is the first end-of-line.  */
3729             eol_type = this_eol_type;
3730           else if (eol_type != this_eol_type)
3731             {
3732               /* The found type is different from what found before.  */
3733               eol_type = CODING_EOL_INCONSISTENT;
3734               break;
3735             }
3736         }
3737     }
3738
3739   if (*skip == 0)
3740     *skip = src_end - source;
3741   return eol_type;
3742 }
3743
3744 /* Like detect_eol_type, but detect EOL type in 2-octet
3745    big-endian/little-endian format for coding systems utf-16-be and
3746    utf-16-le.  */
3747
3748 static int
3749 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3750      unsigned char *source;
3751      int src_bytes, *skip;
3752 {
3753   unsigned char *src = source, *src_end = src + src_bytes;
3754   unsigned int c1, c2;
3755   int total = 0;                /* How many end-of-lines are found so far.  */
3756   int eol_type = CODING_EOL_UNDECIDED;
3757   int this_eol_type;
3758   int msb, lsb;
3759
3760   if (big_endian_p)
3761     msb = 0, lsb = 1;
3762   else
3763     msb = 1, lsb = 0;
3764
3765   *skip = 0;
3766
3767   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3768     {
3769       c1 = (src[msb] << 8) | (src[lsb]);
3770       src += 2;
3771
3772       if (c1 == '\n' || c1 == '\r')
3773         {
3774           if (*skip == 0)
3775             *skip = src - 2 - source;
3776           total++;
3777           if (c1 == '\n')
3778             {
3779               this_eol_type = CODING_EOL_LF;
3780             }
3781           else
3782             {
3783               if ((src + 1) >= src_end)
3784                 {
3785                   this_eol_type = CODING_EOL_CR;
3786                 }
3787               else
3788                 {
3789                   c2 = (src[msb] << 8) | (src[lsb]);
3790                   if (c2 == '\n')
3791                     this_eol_type = CODING_EOL_CRLF, src += 2;
3792                   else
3793                     this_eol_type = CODING_EOL_CR;
3794                 }
3795             }
3796
3797           if (eol_type == CODING_EOL_UNDECIDED)
3798             /* This is the first end-of-line.  */
3799             eol_type = this_eol_type;
3800           else if (eol_type != this_eol_type)
3801             {
3802               /* The found type is different from what found before.  */
3803               eol_type = CODING_EOL_INCONSISTENT;
3804               break;
3805             }
3806         }
3807     }
3808
3809   if (*skip == 0)
3810     *skip = src_end - source;
3811   return eol_type;
3812 }
3813
3814 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3815    is encoded.  If it detects an appropriate format of end-of-line, it
3816    sets the information in *CODING.  */
3817
3818 void
3819 detect_eol (coding, src, src_bytes)
3820      struct coding_system *coding;
3821      unsigned char *src;
3822      int src_bytes;
3823 {
3824   Lisp_Object val;
3825   int skip;
3826   int eol_type;
3827
3828   switch (coding->category_idx)
3829     {
3830     case CODING_CATEGORY_IDX_UTF_16_BE:
3831       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3832       break;
3833     case CODING_CATEGORY_IDX_UTF_16_LE:
3834       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3835       break;
3836     default:
3837       eol_type = detect_eol_type (src, src_bytes, &skip);
3838       break;
3839     }
3840
3841   if (coding->heading_ascii > skip)
3842     coding->heading_ascii = skip;
3843   else
3844     skip = coding->heading_ascii;
3845
3846   if (eol_type == CODING_EOL_UNDECIDED)
3847     return;
3848   if (eol_type == CODING_EOL_INCONSISTENT)
3849     {
3850 #if 0
3851       /* This code is suppressed until we find a better way to
3852          distinguish raw text file and binary file.  */
3853
3854       /* If we have already detected that the coding is raw-text, the
3855          coding should actually be no-conversion.  */
3856       if (coding->type == coding_type_raw_text)
3857         {
3858           setup_coding_system (Qno_conversion, coding);
3859           return;
3860         }
3861       /* Else, let's decode only text code anyway.  */
3862 #endif /* 0 */
3863       eol_type = CODING_EOL_LF;
3864     }
3865
3866   val = Fget (coding->symbol, Qeol_type);
3867   if (VECTORP (val) && XVECTOR (val)->size == 3)
3868     {
3869       int src_multibyte = coding->src_multibyte;
3870       int dst_multibyte = coding->dst_multibyte;
3871
3872       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3873       coding->src_multibyte = src_multibyte;
3874       coding->dst_multibyte = dst_multibyte;
3875       coding->heading_ascii = skip;
3876     }
3877 }
3878
3879 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3880
3881 #define DECODING_BUFFER_MAG(coding)                     \
3882   (coding->type == coding_type_iso2022                  \
3883    ? 3                                                  \
3884    : (coding->type == coding_type_ccl                   \
3885       ? coding->spec.ccl.decoder.buf_magnification      \
3886       : 2))
3887
3888 /* Return maximum size (bytes) of a buffer enough for decoding
3889    SRC_BYTES of text encoded in CODING.  */
3890
3891 int
3892 decoding_buffer_size (coding, src_bytes)
3893      struct coding_system *coding;
3894      int src_bytes;
3895 {
3896   return (src_bytes * DECODING_BUFFER_MAG (coding)
3897           + CONVERSION_BUFFER_EXTRA_ROOM);
3898 }
3899
3900 /* Return maximum size (bytes) of a buffer enough for encoding
3901    SRC_BYTES of text to CODING.  */
3902
3903 int
3904 encoding_buffer_size (coding, src_bytes)
3905      struct coding_system *coding;
3906      int src_bytes;
3907 {
3908   int magnification;
3909
3910   if (coding->type == coding_type_ccl)
3911     magnification = coding->spec.ccl.encoder.buf_magnification;
3912   else if (CODING_REQUIRE_ENCODING (coding))
3913     magnification = 3;
3914   else
3915     magnification = 1;
3916
3917   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3918 }
3919
3920 /* Working buffer for code conversion.  */
3921 struct conversion_buffer
3922 {
3923   int size;                     /* size of data.  */
3924   int on_stack;                 /* 1 if allocated by alloca.  */
3925   unsigned char *data;
3926 };
3927
3928 /* Don't use alloca for allocating memory space larger than this, lest
3929    we overflow their stack.  */
3930 #define MAX_ALLOCA 16*1024
3931
3932 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
3933 #define allocate_conversion_buffer(buf, len)            \
3934   do {                                                  \
3935     if (len < MAX_ALLOCA)                               \
3936       {                                                 \
3937         buf.data = (unsigned char *) alloca (len);      \
3938         buf.on_stack = 1;                               \
3939       }                                                 \
3940     else                                                \
3941       {                                                 \
3942         buf.data = (unsigned char *) xmalloc (len);     \
3943         buf.on_stack = 0;                               \
3944       }                                                 \
3945     buf.size = len;                                     \
3946   } while (0)
3947
3948 /* Double the allocated memory for *BUF.  */
3949 static void
3950 extend_conversion_buffer (buf)
3951      struct conversion_buffer *buf;
3952 {
3953   if (buf->on_stack)
3954     {
3955       unsigned char *save = buf->data;
3956       buf->data = (unsigned char *) xmalloc (buf->size * 2);
3957       bcopy (save, buf->data, buf->size);
3958       buf->on_stack = 0;
3959     }
3960   else
3961     {
3962       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3963     }
3964   buf->size *= 2;
3965 }
3966
3967 /* Free the allocated memory for BUF if it is not on stack.  */
3968 static void
3969 free_conversion_buffer (buf)
3970      struct conversion_buffer *buf;
3971 {
3972   if (!buf->on_stack)
3973     xfree (buf->data);
3974 }
3975
3976 int
3977 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3978      struct coding_system *coding;
3979      unsigned char *source, *destination;
3980      int src_bytes, dst_bytes, encodep;
3981 {
3982   struct ccl_program *ccl
3983     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3984   int result;
3985
3986   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3987   if (encodep)
3988     ccl->eol_type = coding->eol_type;
3989   ccl->multibyte = coding->src_multibyte;
3990   coding->produced = ccl_driver (ccl, source, destination,
3991                                  src_bytes, dst_bytes, &(coding->consumed));
3992   if (encodep)
3993     coding->produced_char = coding->produced;
3994   else
3995     {
3996       int bytes
3997         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3998       coding->produced = str_as_multibyte (destination, bytes,
3999                                            coding->produced,
4000                                            &(coding->produced_char));
4001     }
4002
4003   switch (ccl->status)
4004     {
4005     case CCL_STAT_SUSPEND_BY_SRC:
4006       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4007       break;
4008     case CCL_STAT_SUSPEND_BY_DST:
4009       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4010       break;
4011     case CCL_STAT_QUIT:
4012     case CCL_STAT_INVALID_CMD:
4013       coding->result = CODING_FINISH_INTERRUPT;
4014       break;
4015     default:
4016       coding->result = CODING_FINISH_NORMAL;
4017       break;
4018     }
4019   return coding->result;
4020 }
4021
4022 /* Decode EOL format of the text at PTR of BYTES length destructively
4023    according to CODING->eol_type.  This is called after the CCL
4024    program produced a decoded text at PTR.  If we do CRLF->LF
4025    conversion, update CODING->produced and CODING->produced_char.  */
4026
4027 static void
4028 decode_eol_post_ccl (coding, ptr, bytes)
4029      struct coding_system *coding;
4030      unsigned char *ptr;
4031      int bytes;
4032 {
4033   Lisp_Object val, saved_coding_symbol;
4034   unsigned char *pend = ptr + bytes;
4035   int dummy;
4036
4037   /* Remember the current coding system symbol.  We set it back when
4038      an inconsistent EOL is found so that `last-coding-system-used' is
4039      set to the coding system that doesn't specify EOL conversion.  */
4040   saved_coding_symbol = coding->symbol;
4041
4042   coding->spec.ccl.cr_carryover = 0;
4043   if (coding->eol_type == CODING_EOL_UNDECIDED)
4044     {
4045       /* Here, to avoid the call of setup_coding_system, we directly
4046          call detect_eol_type.  */
4047       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4048       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4049         coding->eol_type = CODING_EOL_LF;
4050       if (coding->eol_type != CODING_EOL_UNDECIDED)
4051         {
4052           val = Fget (coding->symbol, Qeol_type);
4053           if (VECTORP (val) && XVECTOR (val)->size == 3)
4054             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4055         }
4056       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4057     }
4058
4059   if (coding->eol_type == CODING_EOL_LF
4060       || coding->eol_type == CODING_EOL_UNDECIDED)
4061     {
4062       /* We have nothing to do.  */
4063       ptr = pend;
4064     }
4065   else if (coding->eol_type == CODING_EOL_CRLF)
4066     {
4067       unsigned char *pstart = ptr, *p = ptr;
4068
4069       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4070           && *(pend - 1) == '\r')
4071         {
4072           /* If the last character is CR, we can't handle it here
4073              because LF will be in the not-yet-decoded source text.
4074              Recorded that the CR is not yet processed.  */
4075           coding->spec.ccl.cr_carryover = 1;
4076           coding->produced--;
4077           coding->produced_char--;
4078           pend--;
4079         }
4080       while (ptr < pend)
4081         {
4082           if (*ptr == '\r')
4083             {
4084               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4085                 {
4086                   *p++ = '\n';
4087                   ptr += 2;
4088                 }
4089               else
4090                 {
4091                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4092                     goto undo_eol_conversion;
4093                   *p++ = *ptr++;
4094                 }
4095             }
4096           else if (*ptr == '\n'
4097                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4098             goto undo_eol_conversion;
4099           else
4100             *p++ = *ptr++;
4101           continue;
4102
4103         undo_eol_conversion:
4104           /* We have faced with inconsistent EOL format at PTR.
4105              Convert all LFs before PTR back to CRLFs.  */
4106           for (p--, ptr--; p >= pstart; p--)
4107             {
4108               if (*p == '\n')
4109                 *ptr-- = '\n', *ptr-- = '\r';
4110               else
4111                 *ptr-- = *p;
4112             }
4113           /*  If carryover is recorded, cancel it because we don't
4114               convert CRLF anymore.  */
4115           if (coding->spec.ccl.cr_carryover)
4116             {
4117               coding->spec.ccl.cr_carryover = 0;
4118               coding->produced++;
4119               coding->produced_char++;
4120               pend++;
4121             }
4122           p = ptr = pend;
4123           coding->eol_type = CODING_EOL_LF;
4124           coding->symbol = saved_coding_symbol;
4125         }
4126       if (p < pend)
4127         {
4128           /* As each two-byte sequence CRLF was converted to LF, (PEND
4129              - P) is the number of deleted characters.  */
4130           coding->produced -= pend - p;
4131           coding->produced_char -= pend - p;
4132         }
4133     }
4134   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4135     {
4136       unsigned char *p = ptr;
4137
4138       for (; ptr < pend; ptr++)
4139         {
4140           if (*ptr == '\r')
4141             *ptr = '\n';
4142           else if (*ptr == '\n'
4143                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4144             {
4145               for (; p < ptr; p++)
4146                 {
4147                   if (*p == '\n')
4148                     *p = '\r';
4149                 }
4150               ptr = pend;
4151               coding->eol_type = CODING_EOL_LF;
4152               coding->symbol = saved_coding_symbol;
4153             }
4154         }
4155     }
4156 }
4157
4158 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4159    decoding, it may detect coding system and format of end-of-line if
4160    those are not yet decided.  The source should be unibyte, the
4161    result is multibyte if CODING->dst_multibyte is nonzero, else
4162    unibyte.  */
4163
4164 int
4165 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4166      struct coding_system *coding;
4167      unsigned char *source, *destination;
4168      int src_bytes, dst_bytes;
4169 {
4170   if (coding->type == coding_type_undecided)
4171     detect_coding (coding, source, src_bytes);
4172
4173   if (coding->eol_type == CODING_EOL_UNDECIDED
4174       && coding->type != coding_type_ccl)
4175     detect_eol (coding, source, src_bytes);
4176
4177   coding->produced = coding->produced_char = 0;
4178   coding->consumed = coding->consumed_char = 0;
4179   coding->errors = 0;
4180   coding->result = CODING_FINISH_NORMAL;
4181
4182   switch (coding->type)
4183     {
4184     case coding_type_sjis:
4185       decode_coding_sjis_big5 (coding, source, destination,
4186                                src_bytes, dst_bytes, 1);
4187       break;
4188
4189     case coding_type_iso2022:
4190       decode_coding_iso2022 (coding, source, destination,
4191                              src_bytes, dst_bytes);
4192       break;
4193
4194     case coding_type_big5:
4195       decode_coding_sjis_big5 (coding, source, destination,
4196                                src_bytes, dst_bytes, 0);
4197       break;
4198
4199     case coding_type_emacs_mule:
4200       decode_coding_emacs_mule (coding, source, destination,
4201                                 src_bytes, dst_bytes);
4202       break;
4203
4204     case coding_type_ccl:
4205       if (coding->spec.ccl.cr_carryover)
4206         {
4207           /* Set the CR which is not processed by the previous call of
4208              decode_eol_post_ccl in DESTINATION.  */
4209           *destination = '\r';
4210           coding->produced++;
4211           coding->produced_char++;
4212           dst_bytes--;
4213         }
4214       ccl_coding_driver (coding, source,
4215                          destination + coding->spec.ccl.cr_carryover,
4216                          src_bytes, dst_bytes, 0);
4217       if (coding->eol_type != CODING_EOL_LF)
4218         decode_eol_post_ccl (coding, destination, coding->produced);
4219       break;
4220
4221     default:
4222       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4223     }
4224
4225   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4226       && coding->consumed == src_bytes)
4227     coding->result = CODING_FINISH_NORMAL;
4228
4229   if (coding->mode & CODING_MODE_LAST_BLOCK
4230       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4231     {
4232       unsigned char *src = source + coding->consumed;
4233       unsigned char *dst = destination + coding->produced;
4234
4235       src_bytes -= coding->consumed;
4236       coding->errors++;
4237       if (COMPOSING_P (coding))
4238         DECODE_COMPOSITION_END ('1');
4239       while (src_bytes--)
4240         {
4241           int c = *src++;
4242           dst += CHAR_STRING (c, dst);
4243           coding->produced_char++;
4244         }
4245       coding->consumed = coding->consumed_char = src - source;
4246       coding->produced = dst - destination;
4247       coding->result = CODING_FINISH_NORMAL;
4248     }
4249
4250   if (!coding->dst_multibyte)
4251     {
4252       coding->produced = str_as_unibyte (destination, coding->produced);
4253       coding->produced_char = coding->produced;
4254     }
4255
4256   return coding->result;
4257 }
4258
4259 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4260    multibyteness of the source is CODING->src_multibyte, the
4261    multibyteness of the result is always unibyte.  */
4262
4263 int
4264 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4265      struct coding_system *coding;
4266      unsigned char *source, *destination;
4267      int src_bytes, dst_bytes;
4268 {
4269   coding->produced = coding->produced_char = 0;
4270   coding->consumed = coding->consumed_char = 0;
4271   coding->errors = 0;
4272   coding->result = CODING_FINISH_NORMAL;
4273
4274   switch (coding->type)
4275     {
4276     case coding_type_sjis:
4277       encode_coding_sjis_big5 (coding, source, destination,
4278                                src_bytes, dst_bytes, 1);
4279       break;
4280
4281     case coding_type_iso2022:
4282       encode_coding_iso2022 (coding, source, destination,
4283                              src_bytes, dst_bytes);
4284       break;
4285
4286     case coding_type_big5:
4287       encode_coding_sjis_big5 (coding, source, destination,
4288                                src_bytes, dst_bytes, 0);
4289       break;
4290
4291     case coding_type_emacs_mule:
4292       encode_coding_emacs_mule (coding, source, destination,
4293                                 src_bytes, dst_bytes);
4294       break;
4295
4296     case coding_type_ccl:
4297       ccl_coding_driver (coding, source, destination,
4298                          src_bytes, dst_bytes, 1);
4299       break;
4300
4301     default:
4302       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4303     }
4304
4305   if (coding->mode & CODING_MODE_LAST_BLOCK
4306       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4307     {
4308       unsigned char *src = source + coding->consumed;
4309       unsigned char *src_end = src + src_bytes;
4310       unsigned char *dst = destination + coding->produced;
4311
4312       if (coding->type == coding_type_iso2022)
4313         ENCODE_RESET_PLANE_AND_REGISTER;
4314       if (COMPOSING_P (coding))
4315         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4316       if (coding->consumed < src_bytes)
4317         {
4318           int len = src_bytes - coding->consumed;
4319
4320           BCOPY_SHORT (source + coding->consumed, dst, len);
4321           if (coding->src_multibyte)
4322             len = str_as_unibyte (dst, len);
4323           dst += len;
4324           coding->consumed = src_bytes;
4325         }
4326       coding->produced = coding->produced_char = dst - destination;
4327       coding->result = CODING_FINISH_NORMAL;
4328     }
4329
4330   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4331       && coding->consumed == src_bytes)
4332     coding->result = CODING_FINISH_NORMAL;
4333
4334   return coding->result;
4335 }
4336
4337 /* Scan text in the region between *BEG and *END (byte positions),
4338    skip characters which we don't have to decode by coding system
4339    CODING at the head and tail, then set *BEG and *END to the region
4340    of the text we actually have to convert.  The caller should move
4341    the gap out of the region in advance if the region is from a
4342    buffer.
4343
4344    If STR is not NULL, *BEG and *END are indices into STR.  */
4345
4346 static void
4347 shrink_decoding_region (beg, end, coding, str)
4348      int *beg, *end;
4349      struct coding_system *coding;
4350      unsigned char *str;
4351 {
4352   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4353   int eol_conversion;
4354   Lisp_Object translation_table;
4355
4356   if (coding->type == coding_type_ccl
4357       || coding->type == coding_type_undecided
4358       || coding->eol_type != CODING_EOL_LF
4359       || !NILP (coding->post_read_conversion)
4360       || coding->composing != COMPOSITION_DISABLED)
4361     {
4362       /* We can't skip any data.  */
4363       return;
4364     }
4365   if (coding->type == coding_type_no_conversion
4366       || coding->type == coding_type_raw_text
4367       || coding->type == coding_type_emacs_mule)
4368     {
4369       /* We need no conversion, but don't have to skip any data here.
4370          Decoding routine handles them effectively anyway.  */
4371       return;
4372     }
4373
4374   translation_table = coding->translation_table_for_decode;
4375   if (NILP (translation_table) && !NILP (Venable_character_translation))
4376     translation_table = Vstandard_translation_table_for_decode;
4377   if (CHAR_TABLE_P (translation_table))
4378     {
4379       int i;
4380       for (i = 0; i < 128; i++)
4381         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4382           break;
4383       if (i < 128)
4384         /* Some ASCII character should be translated.  We give up
4385            shrinking.  */
4386         return;
4387     }
4388
4389   if (coding->heading_ascii >= 0)
4390     /* Detection routine has already found how much we can skip at the
4391        head.  */
4392     *beg += coding->heading_ascii;
4393
4394   if (str)
4395     {
4396       begp_orig = begp = str + *beg;
4397       endp_orig = endp = str + *end;
4398     }
4399   else
4400     {
4401       begp_orig = begp = BYTE_POS_ADDR (*beg);
4402       endp_orig = endp = begp + *end - *beg;
4403     }
4404
4405   eol_conversion = (coding->eol_type == CODING_EOL_CR
4406                     || coding->eol_type == CODING_EOL_CRLF);
4407
4408   switch (coding->type)
4409     {
4410     case coding_type_sjis:
4411     case coding_type_big5:
4412       /* We can skip all ASCII characters at the head.  */
4413       if (coding->heading_ascii < 0)
4414         {
4415           if (eol_conversion)
4416             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4417           else
4418             while (begp < endp && *begp < 0x80) begp++;
4419         }
4420       /* We can skip all ASCII characters at the tail except for the
4421          second byte of SJIS or BIG5 code.  */
4422       if (eol_conversion)
4423         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4424       else
4425         while (begp < endp && endp[-1] < 0x80) endp--;
4426       /* Do not consider LF as ascii if preceded by CR, since that
4427          confuses eol decoding. */
4428       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4429         endp++;
4430       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4431         endp++;
4432       break;
4433
4434     case coding_type_iso2022:
4435       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4436         /* We can't skip any data.  */
4437         break;
4438       if (coding->heading_ascii < 0)
4439         {
4440           /* We can skip all ASCII characters at the head except for a
4441              few control codes.  */
4442           while (begp < endp && (c = *begp) < 0x80
4443                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4444                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4445                  && (!eol_conversion || c != ISO_CODE_LF))
4446             begp++;
4447         }
4448       switch (coding->category_idx)
4449         {
4450         case CODING_CATEGORY_IDX_ISO_8_1:
4451         case CODING_CATEGORY_IDX_ISO_8_2:
4452           /* We can skip all ASCII characters at the tail.  */
4453           if (eol_conversion)
4454             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4455           else
4456             while (begp < endp && endp[-1] < 0x80) endp--;
4457           /* Do not consider LF as ascii if preceded by CR, since that
4458              confuses eol decoding. */
4459           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4460             endp++;
4461           break;
4462
4463         case CODING_CATEGORY_IDX_ISO_7:
4464         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4465           {
4466             /* We can skip all charactes at the tail except for 8-bit
4467                codes and ESC and the following 2-byte at the tail.  */
4468             unsigned char *eight_bit = NULL;
4469
4470             if (eol_conversion)
4471               while (begp < endp
4472                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4473                 {
4474                   if (!eight_bit && c & 0x80) eight_bit = endp;
4475                   endp--;
4476                 }
4477             else
4478               while (begp < endp
4479                      && (c = endp[-1]) != ISO_CODE_ESC)
4480                 {
4481                   if (!eight_bit && c & 0x80) eight_bit = endp;
4482                   endp--;
4483                 }
4484             /* Do not consider LF as ascii if preceded by CR, since that
4485                confuses eol decoding. */
4486             if (begp < endp && endp < endp_orig
4487                 && endp[-1] == '\r' && endp[0] == '\n')
4488               endp++;
4489             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4490               {
4491                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4492                   /* This is an ASCII designation sequence.  We can
4493                      surely skip the tail.  But, if we have
4494                      encountered an 8-bit code, skip only the codes
4495                      after that.  */
4496                   endp = eight_bit ? eight_bit : endp + 2;
4497                 else
4498                   /* Hmmm, we can't skip the tail.  */
4499                   endp = endp_orig;
4500               }
4501             else if (eight_bit)
4502               endp = eight_bit;
4503           }
4504         }
4505       break;
4506
4507     default:
4508       abort ();
4509     }
4510   *beg += begp - begp_orig;
4511   *end += endp - endp_orig;
4512   return;
4513 }
4514
4515 /* Like shrink_decoding_region but for encoding.  */
4516
4517 static void
4518 shrink_encoding_region (beg, end, coding, str)
4519      int *beg, *end;
4520      struct coding_system *coding;
4521      unsigned char *str;
4522 {
4523   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4524   int eol_conversion;
4525   Lisp_Object translation_table;
4526
4527   if (coding->type == coding_type_ccl
4528       || coding->eol_type == CODING_EOL_CRLF
4529       || coding->eol_type == CODING_EOL_CR
4530       || coding->cmp_data && coding->cmp_data->used > 0)
4531     {
4532       /* We can't skip any data.  */
4533       return;
4534     }
4535   if (coding->type == coding_type_no_conversion
4536       || coding->type == coding_type_raw_text
4537       || coding->type == coding_type_emacs_mule
4538       || coding->type == coding_type_undecided)
4539     {
4540       /* We need no conversion, but don't have to skip any data here.
4541          Encoding routine handles them effectively anyway.  */
4542       return;
4543     }
4544
4545   translation_table = coding->translation_table_for_encode;
4546   if (NILP (translation_table) && !NILP (Venable_character_translation))
4547     translation_table = Vstandard_translation_table_for_encode;
4548   if (CHAR_TABLE_P (translation_table))
4549     {
4550       int i;
4551       for (i = 0; i < 128; i++)
4552         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4553           break;
4554       if (i < 128)
4555         /* Some ASCII character should be tranlsated.  We give up
4556            shrinking.  */
4557         return;
4558     }
4559
4560   if (str)
4561     {
4562       begp_orig = begp = str + *beg;
4563       endp_orig = endp = str + *end;
4564     }
4565   else
4566     {
4567       begp_orig = begp = BYTE_POS_ADDR (*beg);
4568       endp_orig = endp = begp + *end - *beg;
4569     }
4570
4571   eol_conversion = (coding->eol_type == CODING_EOL_CR
4572                     || coding->eol_type == CODING_EOL_CRLF);
4573
4574   /* Here, we don't have to check coding->pre_write_conversion because
4575      the caller is expected to have handled it already.  */
4576   switch (coding->type)
4577     {
4578     case coding_type_iso2022:
4579       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4580         /* We can't skip any data.  */
4581         break;
4582       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4583         {
4584           unsigned char *bol = begp;
4585           while (begp < endp && *begp < 0x80)
4586             {
4587               begp++;
4588               if (begp[-1] == '\n')
4589                 bol = begp;
4590             }
4591           begp = bol;
4592           goto label_skip_tail;
4593         }
4594       /* fall down ... */
4595
4596     case coding_type_sjis:
4597     case coding_type_big5:
4598       /* We can skip all ASCII characters at the head and tail.  */
4599       if (eol_conversion)
4600         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4601       else
4602         while (begp < endp && *begp < 0x80) begp++;
4603     label_skip_tail:
4604       if (eol_conversion)
4605         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4606       else
4607         while (begp < endp && *(endp - 1) < 0x80) endp--;
4608       break;
4609
4610     default:
4611       abort ();
4612     }
4613
4614   *beg += begp - begp_orig;
4615   *end += endp - endp_orig;
4616   return;
4617 }
4618
4619 /* As shrinking conversion region requires some overhead, we don't try
4620    shrinking if the length of conversion region is less than this
4621    value.  */
4622 static int shrink_conversion_region_threshhold = 1024;
4623
4624 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4625   do {                                                                  \
4626     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4627       {                                                                 \
4628         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4629         else shrink_decoding_region (beg, end, coding, str);            \
4630       }                                                                 \
4631   } while (0)
4632
4633 static Lisp_Object
4634 code_convert_region_unwind (dummy)
4635      Lisp_Object dummy;
4636 {
4637   inhibit_pre_post_conversion = 0;
4638   return Qnil;
4639 }
4640
4641 /* Store information about all compositions in the range FROM and TO
4642    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4643    buffer or a string, defaults to the current buffer.  */
4644
4645 void
4646 coding_save_composition (coding, from, to, obj)
4647      struct coding_system *coding;
4648      int from, to;
4649      Lisp_Object obj;
4650 {
4651   Lisp_Object prop;
4652   int start, end;
4653
4654   if (coding->composing == COMPOSITION_DISABLED)
4655     return;
4656   if (!coding->cmp_data)
4657     coding_allocate_composition_data (coding, from);
4658   if (!find_composition (from, to, &start, &end, &prop, obj)
4659       || end > to)
4660     return;
4661   if (start < from
4662       && (!find_composition (end, to, &start, &end, &prop, obj)
4663           || end > to))
4664     return;
4665   coding->composing = COMPOSITION_NO;
4666   do
4667     {
4668       if (COMPOSITION_VALID_P (start, end, prop))
4669         {
4670           enum composition_method method = COMPOSITION_METHOD (prop);
4671           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4672               >= COMPOSITION_DATA_SIZE)
4673             coding_allocate_composition_data (coding, from);
4674           /* For relative composition, we remember start and end
4675              positions, for the other compositions, we also remember
4676              components.  */
4677           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4678           if (method != COMPOSITION_RELATIVE)
4679             {
4680               /* We must store a*/
4681               Lisp_Object val, ch;
4682
4683               val = COMPOSITION_COMPONENTS (prop);
4684               if (CONSP (val))
4685                 while (CONSP (val))
4686                   {
4687                     ch = XCAR (val), val = XCDR (val);
4688                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4689                   }
4690               else if (VECTORP (val) || STRINGP (val))
4691                 {
4692                   int len = (VECTORP (val)
4693                              ? XVECTOR (val)->size : XSTRING (val)->size);
4694                   int i;
4695                   for (i = 0; i < len; i++)
4696                     {
4697                       ch = (STRINGP (val)
4698                             ? Faref (val, make_number (i))
4699                             : XVECTOR (val)->contents[i]);
4700                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4701                     }
4702                 }
4703               else              /* INTEGERP (val) */
4704                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4705             }
4706           CODING_ADD_COMPOSITION_END (coding, end - from);
4707         }
4708       start = end;
4709     }
4710   while (start < to
4711          && find_composition (start, to, &start, &end, &prop, obj)
4712          && end <= to);
4713
4714   /* Make coding->cmp_data point to the first memory block.  */
4715   while (coding->cmp_data->prev)
4716     coding->cmp_data = coding->cmp_data->prev;
4717   coding->cmp_data_start = 0;
4718 }
4719
4720 /* Reflect the saved information about compositions to OBJ.
4721    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4722    is a buffer or a string, defaults to the current buffer.  */
4723
4724 void
4725 coding_restore_composition (coding, obj)
4726      struct coding_system *coding;
4727      Lisp_Object obj;
4728 {
4729   struct composition_data *cmp_data = coding->cmp_data;
4730
4731   if (!cmp_data)
4732     return;
4733
4734   while (cmp_data->prev)
4735     cmp_data = cmp_data->prev;
4736
4737   while (cmp_data)
4738     {
4739       int i;
4740
4741       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
4742            i += cmp_data->data[i])
4743         {
4744           int *data = cmp_data->data + i;
4745           enum composition_method method = (enum composition_method) data[3];
4746           Lisp_Object components;
4747
4748           if (method == COMPOSITION_RELATIVE)
4749             components = Qnil;
4750           else
4751             {
4752               int len = data[0] - 4, j;
4753               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4754
4755               for (j = 0; j < len; j++)
4756                 args[j] = make_number (data[4 + j]);
4757               components = (method == COMPOSITION_WITH_ALTCHARS
4758                             ? Fstring (len, args) : Fvector (len, args));
4759             }
4760           compose_text (data[1], data[2], components, Qnil, obj);
4761         }
4762       cmp_data = cmp_data->next;
4763     }
4764 }
4765
4766 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4767    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4768    coding system CODING, and return the status code of code conversion
4769    (currently, this value has no meaning).
4770
4771    How many characters (and bytes) are converted to how many
4772    characters (and bytes) are recorded in members of the structure
4773    CODING.
4774
4775    If REPLACE is nonzero, we do various things as if the original text
4776    is deleted and a new text is inserted.  See the comments in
4777    replace_range (insdel.c) to know what we are doing.
4778
4779    If REPLACE is zero, it is assumed that the source text is unibyte.
4780    Otherwize, it is assumed that the source text is multibyte.  */
4781
4782 int
4783 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4784      int from, from_byte, to, to_byte, encodep, replace;
4785      struct coding_system *coding;
4786 {
4787   int len = to - from, len_byte = to_byte - from_byte;
4788   int require, inserted, inserted_byte;
4789   int head_skip, tail_skip, total_skip = 0;
4790   Lisp_Object saved_coding_symbol;
4791   int first = 1;
4792   unsigned char *src, *dst;
4793   Lisp_Object deletion;
4794   int orig_point = PT, orig_len = len;
4795   int prev_Z;
4796   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4797
4798   coding->src_multibyte = replace && multibyte_p;
4799   coding->dst_multibyte = multibyte_p;
4800
4801   deletion = Qnil;
4802   saved_coding_symbol = Qnil;
4803
4804   if (from < PT && PT < to)
4805     {
4806       TEMP_SET_PT_BOTH (from, from_byte);
4807       orig_point = from;
4808     }
4809
4810   if (replace)
4811     {
4812       int saved_from = from;
4813       int saved_inhibit_modification_hooks;
4814
4815       prepare_to_modify_buffer (from, to, &from);
4816       if (saved_from != from)
4817         {
4818           to = from + len;
4819           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4820           len_byte = to_byte - from_byte;
4821         }
4822
4823       /* The code conversion routine can not preserve text properties
4824          for now.  So, we must remove all text properties in the
4825          region.  Here, we must suppress all modification hooks.  */
4826       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4827       inhibit_modification_hooks = 1;
4828       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4829       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4830     }
4831
4832   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4833     {
4834       /* We must detect encoding of text and eol format.  */
4835
4836       if (from < GPT && to > GPT)
4837         move_gap_both (from, from_byte);
4838       if (coding->type == coding_type_undecided)
4839         {
4840           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4841           if (coding->type == coding_type_undecided)
4842             {
4843               /* It seems that the text contains only ASCII, but we
4844                  should not leave it undecided because the deeper
4845                  decoding routine (decode_coding) tries to detect the
4846                  encodings again in vain.  */
4847               coding->type = coding_type_emacs_mule;
4848               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
4849             }
4850         }
4851       if (coding->eol_type == CODING_EOL_UNDECIDED
4852           && coding->type != coding_type_ccl)
4853         {
4854           saved_coding_symbol = coding->symbol;
4855           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4856           if (coding->eol_type == CODING_EOL_UNDECIDED)
4857             coding->eol_type = CODING_EOL_LF;
4858           /* We had better recover the original eol format if we
4859              encounter an inconsitent eol format while decoding.  */
4860           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4861         }
4862     }
4863
4864   /* Now we convert the text.  */
4865
4866   /* For encoding, we must process pre-write-conversion in advance.  */
4867   if (! inhibit_pre_post_conversion
4868       && encodep
4869       && SYMBOLP (coding->pre_write_conversion)
4870       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4871     {
4872       /* The function in pre-write-conversion may put a new text in a
4873          new buffer.  */
4874       struct buffer *prev = current_buffer;
4875       Lisp_Object new;
4876       int count = specpdl_ptr - specpdl;
4877
4878       record_unwind_protect (code_convert_region_unwind, Qnil);
4879       /* We should not call any more pre-write/post-read-conversion
4880          functions while this pre-write-conversion is running.  */
4881       inhibit_pre_post_conversion = 1;
4882       call2 (coding->pre_write_conversion,
4883              make_number (from), make_number (to));
4884       inhibit_pre_post_conversion = 0;
4885       /* Discard the unwind protect.  */
4886       specpdl_ptr--;
4887
4888       if (current_buffer != prev)
4889         {
4890           len = ZV - BEGV;
4891           new = Fcurrent_buffer ();
4892           set_buffer_internal_1 (prev);
4893           del_range_2 (from, from_byte, to, to_byte, 0);
4894           TEMP_SET_PT_BOTH (from, from_byte);
4895           insert_from_buffer (XBUFFER (new), 1, len, 0);
4896           Fkill_buffer (new);
4897           if (orig_point >= to)
4898             orig_point += len - orig_len;
4899           else if (orig_point > from)
4900             orig_point = from;
4901           orig_len = len;
4902           to = from + len;
4903           from_byte = CHAR_TO_BYTE (from);
4904           to_byte = CHAR_TO_BYTE (to);
4905           len_byte = to_byte - from_byte;
4906           TEMP_SET_PT_BOTH (from, from_byte);
4907         }
4908     }
4909
4910   if (replace)
4911     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4912
4913   if (coding->composing != COMPOSITION_DISABLED)
4914     {
4915       if (encodep)
4916         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4917       else
4918         coding_allocate_composition_data (coding, from);
4919     }
4920
4921   /* Try to skip the heading and tailing ASCIIs.  */
4922   if (coding->type != coding_type_ccl)
4923     {
4924       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4925
4926       if (from < GPT && GPT < to)
4927         move_gap_both (from, from_byte);
4928       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4929       if (from_byte == to_byte
4930           && (encodep || NILP (coding->post_read_conversion))
4931           && ! CODING_REQUIRE_FLUSHING (coding))
4932         {
4933           coding->produced = len_byte;
4934           coding->produced_char = len;
4935           if (!replace)
4936             /* We must record and adjust for this new text now.  */
4937             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4938           return 0;
4939         }
4940
4941       head_skip = from_byte - from_byte_orig;
4942       tail_skip = to_byte_orig - to_byte;
4943       total_skip = head_skip + tail_skip;
4944       from += head_skip;
4945       to -= tail_skip;
4946       len -= total_skip; len_byte -= total_skip;
4947     }
4948
4949   /* For converion, we must put the gap before the text in addition to
4950      making the gap larger for efficient decoding.  The required gap
4951      size starts from 2000 which is the magic number used in make_gap.
4952      But, after one batch of conversion, it will be incremented if we
4953      find that it is not enough .  */
4954   require = 2000;
4955
4956   if (GAP_SIZE  < require)
4957     make_gap (require - GAP_SIZE);
4958   move_gap_both (from, from_byte);
4959
4960   inserted = inserted_byte = 0;
4961
4962   GAP_SIZE += len_byte;
4963   ZV -= len;
4964   Z -= len;
4965   ZV_BYTE -= len_byte;
4966   Z_BYTE -= len_byte;
4967
4968   if (GPT - BEG < BEG_UNCHANGED)
4969     BEG_UNCHANGED = GPT - BEG;
4970   if (Z - GPT < END_UNCHANGED)
4971     END_UNCHANGED = Z - GPT;
4972
4973   if (!encodep && coding->src_multibyte)
4974     {
4975       /* Decoding routines expects that the source text is unibyte.
4976          We must convert 8-bit characters of multibyte form to
4977          unibyte.  */
4978       int len_byte_orig = len_byte;
4979       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4980       if (len_byte < len_byte_orig)
4981         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4982                     len_byte);
4983       coding->src_multibyte = 0;
4984     }
4985
4986   for (;;)
4987     {
4988       int result;
4989
4990       /* The buffer memory is now:
4991          +--------+converted-text+---------+-------original-text-------+---+
4992          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4993                   |<---------------------- GAP ----------------------->|  */
4994       src = GAP_END_ADDR - len_byte;
4995       dst = GPT_ADDR + inserted_byte;
4996
4997       if (encodep)
4998         result = encode_coding (coding, src, dst, len_byte, 0);
4999       else
5000         result = decode_coding (coding, src, dst, len_byte, 0);
5001
5002       /* The buffer memory is now:
5003          +--------+-------converted-text----+--+------original-text----+---+
5004          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5005                   |<---------------------- GAP ----------------------->|  */
5006
5007       inserted += coding->produced_char;
5008       inserted_byte += coding->produced;
5009       len_byte -= coding->consumed;
5010
5011       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5012         {
5013           coding_allocate_composition_data (coding, from + inserted);
5014           continue;
5015         }
5016
5017       src += coding->consumed;
5018       dst += coding->produced;
5019
5020       if (result == CODING_FINISH_NORMAL)
5021         {
5022           src += len_byte;
5023           break;
5024         }
5025       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5026         {
5027           unsigned char *pend = dst, *p = pend - inserted_byte;
5028           Lisp_Object eol_type;
5029
5030           /* Encode LFs back to the original eol format (CR or CRLF).  */
5031           if (coding->eol_type == CODING_EOL_CR)
5032             {
5033               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5034             }
5035           else
5036             {
5037               int count = 0;
5038
5039               while (p < pend) if (*p++ == '\n') count++;
5040               if (src - dst < count)
5041                 {
5042                   /* We don't have sufficient room for encoding LFs
5043                      back to CRLF.  We must record converted and
5044                      not-yet-converted text back to the buffer
5045                      content, enlarge the gap, then record them out of
5046                      the buffer contents again.  */
5047                   int add = len_byte + inserted_byte;
5048
5049                   GAP_SIZE -= add;
5050                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5051                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5052                   make_gap (count - GAP_SIZE);
5053                   GAP_SIZE += add;
5054                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5055                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5056                   /* Don't forget to update SRC, DST, and PEND.  */
5057                   src = GAP_END_ADDR - len_byte;
5058                   dst = GPT_ADDR + inserted_byte;
5059                   pend = dst;
5060                 }
5061               inserted += count;
5062               inserted_byte += count;
5063               coding->produced += count;
5064               p = dst = pend + count;
5065               while (count)
5066                 {
5067                   *--p = *--pend;
5068                   if (*p == '\n') count--, *--p = '\r';
5069                 }
5070             }
5071
5072           /* Suppress eol-format conversion in the further conversion.  */
5073           coding->eol_type = CODING_EOL_LF;
5074
5075           /* Set the coding system symbol to that for Unix-like EOL.  */
5076           eol_type = Fget (saved_coding_symbol, Qeol_type);
5077           if (VECTORP (eol_type)
5078               && XVECTOR (eol_type)->size == 3
5079               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5080             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5081           else
5082             coding->symbol = saved_coding_symbol;
5083
5084           continue;
5085         }
5086       if (len_byte <= 0)
5087         {
5088           if (coding->type != coding_type_ccl
5089               || coding->mode & CODING_MODE_LAST_BLOCK)
5090             break;
5091           coding->mode |= CODING_MODE_LAST_BLOCK;
5092           continue;
5093         }
5094       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5095         {
5096           /* The source text ends in invalid codes.  Let's just
5097              make them valid buffer contents, and finish conversion.  */
5098           inserted += len_byte;
5099           inserted_byte += len_byte;
5100           while (len_byte--)
5101             *dst++ = *src++;
5102           break;
5103         }
5104       if (result == CODING_FINISH_INTERRUPT)
5105         {
5106           /* The conversion procedure was interrupted by a user.  */
5107           break;
5108         }
5109       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5110       if (coding->consumed < 1)
5111         {
5112           /* It's quite strange to require more memory without
5113              consuming any bytes.  Perhaps CCL program bug.  */
5114           break;
5115         }
5116       if (first)
5117         {
5118           /* We have just done the first batch of conversion which was
5119              stoped because of insufficient gap.  Let's reconsider the
5120              required gap size (i.e. SRT - DST) now.
5121
5122              We have converted ORIG bytes (== coding->consumed) into
5123              NEW bytes (coding->produced).  To convert the remaining
5124              LEN bytes, we may need REQUIRE bytes of gap, where:
5125                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5126                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5127              Here, we are sure that NEW >= ORIG.  */
5128           float ratio = coding->produced - coding->consumed;
5129           ratio /= coding->consumed;
5130           require = len_byte * ratio;
5131           first = 0;
5132         }
5133       if ((src - dst) < (require + 2000))
5134         {
5135           /* See the comment above the previous call of make_gap.  */
5136           int add = len_byte + inserted_byte;
5137
5138           GAP_SIZE -= add;
5139           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5140           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5141           make_gap (require + 2000);
5142           GAP_SIZE += add;
5143           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5144           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5145         }
5146     }
5147   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5148
5149   if (encodep && coding->dst_multibyte)
5150     {
5151       /* The output is unibyte.  We must convert 8-bit characters to
5152          multibyte form.  */
5153       if (inserted_byte * 2 > GAP_SIZE)
5154         {
5155           GAP_SIZE -= inserted_byte;
5156           ZV += inserted_byte; Z += inserted_byte;
5157           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5158           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5159           make_gap (inserted_byte - GAP_SIZE);
5160           GAP_SIZE += inserted_byte;
5161           ZV -= inserted_byte; Z -= inserted_byte;
5162           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5163           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5164         }
5165       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5166     }
5167
5168   /* If we have shrinked the conversion area, adjust it now.  */
5169   if (total_skip > 0)
5170     {
5171       if (tail_skip > 0)
5172         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5173       inserted += total_skip; inserted_byte += total_skip;
5174       GAP_SIZE += total_skip;
5175       GPT -= head_skip; GPT_BYTE -= head_skip;
5176       ZV -= total_skip; ZV_BYTE -= total_skip;
5177       Z -= total_skip; Z_BYTE -= total_skip;
5178       from -= head_skip; from_byte -= head_skip;
5179       to += tail_skip; to_byte += tail_skip;
5180     }
5181
5182   prev_Z = Z;
5183   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5184   inserted = Z - prev_Z;
5185
5186   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5187     coding_restore_composition (coding, Fcurrent_buffer ());
5188   coding_free_composition_data (coding);
5189
5190   if (! inhibit_pre_post_conversion
5191       && ! encodep && ! NILP (coding->post_read_conversion))
5192     {
5193       Lisp_Object val;
5194       int count = specpdl_ptr - specpdl;
5195
5196       if (from != PT)
5197         TEMP_SET_PT_BOTH (from, from_byte);
5198       prev_Z = Z;
5199       record_unwind_protect (code_convert_region_unwind, Qnil);
5200       /* We should not call any more pre-write/post-read-conversion
5201          functions while this post-read-conversion is running.  */
5202       inhibit_pre_post_conversion = 1;
5203       val = call1 (coding->post_read_conversion, make_number (inserted));
5204       inhibit_pre_post_conversion = 0;
5205       /* Discard the unwind protect.  */
5206       specpdl_ptr--;
5207       CHECK_NUMBER (val, 0);
5208       inserted += Z - prev_Z;
5209     }
5210
5211   if (orig_point >= from)
5212     {
5213       if (orig_point >= from + orig_len)
5214         orig_point += inserted - orig_len;
5215       else
5216         orig_point = from;
5217       TEMP_SET_PT (orig_point);
5218     }
5219
5220   if (replace)
5221     {
5222       signal_after_change (from, to - from, inserted);
5223       update_compositions (from, from + inserted, CHECK_BORDER);
5224     }
5225
5226   {
5227     coding->consumed = to_byte - from_byte;
5228     coding->consumed_char = to - from;
5229     coding->produced = inserted_byte;
5230     coding->produced_char = inserted;
5231   }
5232
5233   return 0;
5234 }
5235
5236 Lisp_Object
5237 run_pre_post_conversion_on_str (str, coding, encodep)
5238      Lisp_Object str;
5239      struct coding_system *coding;
5240      int encodep;
5241 {
5242   int count = specpdl_ptr - specpdl;
5243   struct gcpro gcpro1;
5244   struct buffer *prev = current_buffer;
5245   int multibyte = STRING_MULTIBYTE (str);
5246
5247   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5248   record_unwind_protect (code_convert_region_unwind, Qnil);
5249   GCPRO1 (str);
5250   temp_output_buffer_setup (" *code-converting-work*");
5251   set_buffer_internal (XBUFFER (Vstandard_output));
5252   /* We must insert the contents of STR as is without
5253      unibyte<->multibyte conversion.  For that, we adjust the
5254      multibyteness of the working buffer to that of STR.  */
5255   Ferase_buffer ();
5256   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5257   insert_from_string (str, 0, 0,
5258                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5259   UNGCPRO;
5260   inhibit_pre_post_conversion = 1;
5261   if (encodep)
5262     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5263   else
5264     {
5265       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5266       call1 (coding->post_read_conversion, make_number (Z - BEG));
5267     }
5268   inhibit_pre_post_conversion = 0;
5269   str = make_buffer_string (BEG, Z, 1);
5270   return unbind_to (count, str);
5271 }
5272
5273 Lisp_Object
5274 decode_coding_string (str, coding, nocopy)
5275      Lisp_Object str;
5276      struct coding_system *coding;
5277      int nocopy;
5278 {
5279   int len;
5280   struct conversion_buffer buf;
5281   int from, to, to_byte;
5282   struct gcpro gcpro1;
5283   Lisp_Object saved_coding_symbol;
5284   int result;
5285   int require_decoding;
5286   int shrinked_bytes = 0;
5287   Lisp_Object newstr;
5288   int consumed, consumed_char, produced, produced_char;
5289
5290   from = 0;
5291   to = XSTRING (str)->size;
5292   to_byte = STRING_BYTES (XSTRING (str));
5293
5294   saved_coding_symbol = Qnil;
5295   if (CODING_REQUIRE_DETECTION (coding))
5296     {
5297       /* See the comments in code_convert_region.  */
5298       if (coding->type == coding_type_undecided)
5299         {
5300           detect_coding (coding, XSTRING (str)->data, to_byte);
5301           if (coding->type == coding_type_undecided)
5302             coding->type = coding_type_emacs_mule;
5303         }
5304       if (coding->eol_type == CODING_EOL_UNDECIDED
5305           && coding->type != coding_type_ccl)
5306         {
5307           saved_coding_symbol = coding->symbol;
5308           detect_eol (coding, XSTRING (str)->data, to_byte);
5309           if (coding->eol_type == CODING_EOL_UNDECIDED)
5310             coding->eol_type = CODING_EOL_LF;
5311           /* We had better recover the original eol format if we
5312              encounter an inconsitent eol format while decoding.  */
5313           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5314         }
5315     }
5316
5317   coding->src_multibyte = 0;
5318   coding->dst_multibyte = (coding->type != coding_type_no_conversion
5319                            && coding->type != coding_type_raw_text);
5320   require_decoding = CODING_REQUIRE_DECODING (coding);
5321
5322   if (STRING_MULTIBYTE (str))
5323     {
5324       /* Decoding routines expect the source text to be unibyte.  */
5325       str = Fstring_as_unibyte (str);
5326       to_byte = STRING_BYTES (XSTRING (str));
5327       nocopy = 1;
5328     }
5329
5330   /* Try to skip the heading and tailing ASCIIs.  */
5331   if (require_decoding && coding->type != coding_type_ccl)
5332     {
5333       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5334                                 0);
5335       if (from == to_byte)
5336         require_decoding = 0;
5337       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5338     }
5339
5340   if (!require_decoding)
5341     {
5342       coding->consumed = STRING_BYTES (XSTRING (str));
5343       coding->consumed_char = XSTRING (str)->size;
5344       if (coding->dst_multibyte)
5345         {
5346           str = Fstring_as_multibyte (str);
5347           nocopy = 1;
5348         }
5349       coding->produced = STRING_BYTES (XSTRING (str));
5350       coding->produced_char = XSTRING (str)->size;
5351       return (nocopy ? str : Fcopy_sequence (str));
5352     }
5353
5354   if (coding->composing != COMPOSITION_DISABLED)
5355     coding_allocate_composition_data (coding, from);
5356   len = decoding_buffer_size (coding, to_byte - from);
5357   allocate_conversion_buffer (buf, len);
5358
5359   consumed = consumed_char = produced = produced_char = 0;
5360   while (1)
5361     {
5362       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5363                               buf.data + produced, to_byte - from - consumed,
5364                               buf.size - produced);
5365       consumed += coding->consumed;
5366       consumed_char += coding->consumed_char;
5367       produced += coding->produced;
5368       produced_char += coding->produced_char;
5369       if (result == CODING_FINISH_NORMAL
5370           || (result == CODING_FINISH_INSUFFICIENT_SRC
5371               && coding->consumed == 0))
5372         break;
5373       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5374         coding_allocate_composition_data (coding, from + produced_char);
5375       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5376         extend_conversion_buffer (&buf);
5377       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5378         {
5379           /* Recover the original EOL format.  */
5380           if (coding->eol_type == CODING_EOL_CR)
5381             {
5382               unsigned char *p;
5383               for (p = buf.data; p < buf.data + produced; p++)
5384                 if (*p == '\n') *p = '\r';
5385             }
5386           else if (coding->eol_type == CODING_EOL_CRLF)
5387             {
5388               int num_eol = 0;
5389               unsigned char *p0, *p1;
5390               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5391                 if (*p0 == '\n') num_eol++;
5392               if (produced + num_eol >= buf.size)
5393                 extend_conversion_buffer (&buf);
5394               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5395                 {
5396                   *--p1 = *--p0;
5397                   if (*p0 == '\n') *--p1 = '\r';
5398                 }
5399               produced += num_eol;
5400               produced_char += num_eol;
5401             }
5402           coding->eol_type = CODING_EOL_LF;
5403           coding->symbol = saved_coding_symbol;
5404         }
5405     }
5406
5407   coding->consumed = consumed;
5408   coding->consumed_char = consumed_char;
5409   coding->produced = produced;
5410   coding->produced_char = produced_char;
5411
5412   if (coding->dst_multibyte)
5413     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5414                                            produced + shrinked_bytes);
5415   else
5416     newstr = make_uninit_string (produced + shrinked_bytes);
5417   if (from > 0)
5418     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5419   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5420   if (shrinked_bytes > from)
5421     bcopy (XSTRING (str)->data + to_byte,
5422            XSTRING (newstr)->data + from + produced,
5423            shrinked_bytes - from);
5424   free_conversion_buffer (&buf);
5425
5426   if (coding->cmp_data && coding->cmp_data->used)
5427     coding_restore_composition (coding, newstr);
5428   coding_free_composition_data (coding);
5429
5430   if (SYMBOLP (coding->post_read_conversion)
5431       && !NILP (Ffboundp (coding->post_read_conversion)))
5432     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5433
5434   return newstr;
5435 }
5436
5437 Lisp_Object
5438 encode_coding_string (str, coding, nocopy)
5439      Lisp_Object str;
5440      struct coding_system *coding;
5441      int nocopy;
5442 {
5443   int len;
5444   struct conversion_buffer buf;
5445   int from, to, to_byte;
5446   struct gcpro gcpro1;
5447   Lisp_Object saved_coding_symbol;
5448   int result;
5449   int shrinked_bytes = 0;
5450   Lisp_Object newstr;
5451   int consumed, consumed_char, produced, produced_char;
5452
5453   if (SYMBOLP (coding->pre_write_conversion)
5454       && !NILP (Ffboundp (coding->pre_write_conversion)))
5455     str = run_pre_post_conversion_on_str (str, coding, 1);
5456
5457   from = 0;
5458   to = XSTRING (str)->size;
5459   to_byte = STRING_BYTES (XSTRING (str));
5460
5461   saved_coding_symbol = Qnil;
5462
5463   /* Encoding routines determine the multibyteness of the source text
5464      by coding->src_multibyte.  */
5465   coding->src_multibyte = STRING_MULTIBYTE (str);
5466   coding->dst_multibyte = 0;
5467   if (! CODING_REQUIRE_ENCODING (coding))
5468     {
5469       coding->consumed = STRING_BYTES (XSTRING (str));
5470       coding->consumed_char = XSTRING (str)->size;
5471       if (STRING_MULTIBYTE (str))
5472         {
5473           str = Fstring_as_unibyte (str);
5474           nocopy = 1;
5475         }
5476       coding->produced = STRING_BYTES (XSTRING (str));
5477       coding->produced_char = XSTRING (str)->size;
5478       return (nocopy ? str : Fcopy_sequence (str));
5479     }
5480
5481   if (coding->composing != COMPOSITION_DISABLED)
5482     coding_save_composition (coding, from, to, str);
5483
5484   /* Try to skip the heading and tailing ASCIIs.  */
5485   if (coding->type != coding_type_ccl)
5486     {
5487       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5488                                 1);
5489       if (from == to_byte)
5490         return (nocopy ? str : Fcopy_sequence (str));
5491       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5492     }
5493
5494   len = encoding_buffer_size (coding, to_byte - from);
5495   allocate_conversion_buffer (buf, len);
5496
5497   consumed = consumed_char = produced = produced_char = 0;
5498   while (1)
5499     {
5500       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5501                               buf.data + produced, to_byte - from - consumed,
5502                               buf.size - produced);
5503       consumed += coding->consumed;
5504       consumed_char += coding->consumed_char;
5505       produced += coding->produced;
5506       produced_char += coding->produced_char;
5507       if (result == CODING_FINISH_NORMAL
5508           || (result == CODING_FINISH_INSUFFICIENT_SRC
5509               && coding->consumed == 0))
5510         break;
5511       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
5512       extend_conversion_buffer (&buf);
5513     }
5514
5515   coding->consumed = consumed;
5516   coding->consumed_char = consumed_char;
5517   coding->produced = produced;
5518   coding->produced_char = produced_char;
5519
5520   newstr = make_uninit_string (produced + shrinked_bytes);
5521   if (from > 0)
5522     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5523   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5524   if (shrinked_bytes > from)
5525     bcopy (XSTRING (str)->data + to_byte,
5526            XSTRING (newstr)->data + from + produced,
5527            shrinked_bytes - from);
5528
5529   free_conversion_buffer (&buf);
5530   coding_free_composition_data (coding);
5531
5532   return newstr;
5533 }
5534
5535 \f
5536 #ifdef emacs
5537 /*** 8. Emacs Lisp library functions ***/
5538
5539 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5540   "Return t if OBJECT is nil or a coding-system.\n\
5541 See the documentation of `make-coding-system' for information\n\
5542 about coding-system objects.")
5543   (obj)
5544      Lisp_Object obj;
5545 {
5546   if (NILP (obj))
5547     return Qt;
5548   if (!SYMBOLP (obj))
5549     return Qnil;
5550   /* Get coding-spec vector for OBJ.  */
5551   obj = Fget (obj, Qcoding_system);
5552   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5553           ? Qt : Qnil);
5554 }
5555
5556 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5557        Sread_non_nil_coding_system, 1, 1, 0,
5558   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5559   (prompt)
5560      Lisp_Object prompt;
5561 {
5562   Lisp_Object val;
5563   do
5564     {
5565       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5566                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5567     }
5568   while (XSTRING (val)->size == 0);
5569   return (Fintern (val, Qnil));
5570 }
5571
5572 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5573   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5574 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5575   (prompt, default_coding_system)
5576      Lisp_Object prompt, default_coding_system;
5577 {
5578   Lisp_Object val;
5579   if (SYMBOLP (default_coding_system))
5580     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5581   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5582                           Qt, Qnil, Qcoding_system_history,
5583                           default_coding_system, Qnil);
5584   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5585 }
5586
5587 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5588        1, 1, 0,
5589   "Check validity of CODING-SYSTEM.\n\
5590 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5591 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5592 The value of property should be a vector of length 5.")
5593   (coding_system)
5594      Lisp_Object coding_system;
5595 {
5596   CHECK_SYMBOL (coding_system, 0);
5597   if (!NILP (Fcoding_system_p (coding_system)))
5598     return coding_system;
5599   while (1)
5600     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5601 }
5602 \f
5603 Lisp_Object
5604 detect_coding_system (src, src_bytes, highest)
5605      unsigned char *src;
5606      int src_bytes, highest;
5607 {
5608   int coding_mask, eol_type;
5609   Lisp_Object val, tmp;
5610   int dummy;
5611
5612   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5613   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5614   if (eol_type == CODING_EOL_INCONSISTENT)
5615     eol_type = CODING_EOL_UNDECIDED;
5616
5617   if (!coding_mask)
5618     {
5619       val = Qundecided;
5620       if (eol_type != CODING_EOL_UNDECIDED)
5621         {
5622           Lisp_Object val2;
5623           val2 = Fget (Qundecided, Qeol_type);
5624           if (VECTORP (val2))
5625             val = XVECTOR (val2)->contents[eol_type];
5626         }
5627       return (highest ? val : Fcons (val, Qnil));
5628     }
5629
5630   /* At first, gather possible coding systems in VAL.  */
5631   val = Qnil;
5632   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5633     {
5634       Lisp_Object category_val, category_index;
5635
5636       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5637       category_val = Fsymbol_value (XCAR (tmp));
5638       if (!NILP (category_val)
5639           && NATNUMP (category_index)
5640           && (coding_mask & (1 << XFASTINT (category_index))))
5641         {
5642           val = Fcons (category_val, val);
5643           if (highest)
5644             break;
5645         }
5646     }
5647   if (!highest)
5648     val = Fnreverse (val);
5649
5650   /* Then, replace the elements with subsidiary coding systems.  */
5651   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5652     {
5653       if (eol_type != CODING_EOL_UNDECIDED
5654           && eol_type != CODING_EOL_INCONSISTENT)
5655         {
5656           Lisp_Object eol;
5657           eol = Fget (XCAR (tmp), Qeol_type);
5658           if (VECTORP (eol))
5659             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5660         }
5661     }
5662   return (highest ? XCAR (val) : val);
5663 }
5664
5665 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5666        2, 3, 0,
5667   "Detect coding system of the text in the region between START and END.\n\
5668 Return a list of possible coding systems ordered by priority.\n\
5669 \n\
5670 If only ASCII characters are found, it returns a list of single element\n\
5671 `undecided' or its subsidiary coding system according to a detected\n\
5672 end-of-line format.\n\
5673 \n\
5674 If optional argument HIGHEST is non-nil, return the coding system of\n\
5675 highest priority.")
5676   (start, end, highest)
5677      Lisp_Object start, end, highest;
5678 {
5679   int from, to;
5680   int from_byte, to_byte;
5681
5682   CHECK_NUMBER_COERCE_MARKER (start, 0);
5683   CHECK_NUMBER_COERCE_MARKER (end, 1);
5684
5685   validate_region (&start, &end);
5686   from = XINT (start), to = XINT (end);
5687   from_byte = CHAR_TO_BYTE (from);
5688   to_byte = CHAR_TO_BYTE (to);
5689
5690   if (from < GPT && to >= GPT)
5691     move_gap_both (to, to_byte);
5692
5693   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5694                                to_byte - from_byte,
5695                                !NILP (highest));
5696 }
5697
5698 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5699        1, 2, 0,
5700   "Detect coding system of the text in STRING.\n\
5701 Return a list of possible coding systems ordered by priority.\n\
5702 \n\
5703 If only ASCII characters are found, it returns a list of single element\n\
5704 `undecided' or its subsidiary coding system according to a detected\n\
5705 end-of-line format.\n\
5706 \n\
5707 If optional argument HIGHEST is non-nil, return the coding system of\n\
5708 highest priority.")
5709   (string, highest)
5710      Lisp_Object string, highest;
5711 {
5712   CHECK_STRING (string, 0);
5713
5714   return detect_coding_system (XSTRING (string)->data,
5715                                STRING_BYTES (XSTRING (string)),
5716                                !NILP (highest));
5717 }
5718
5719 /* Return an intersection of lists L1 and L2.  */
5720
5721 static Lisp_Object
5722 intersection (l1, l2)
5723      Lisp_Object l1, l2;
5724 {
5725   Lisp_Object val;
5726
5727   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5728     {
5729       if (!NILP (Fmemq (XCAR (l1), l2)))
5730         val = Fcons (XCAR (l1), val);
5731     }
5732   return val;
5733 }
5734
5735
5736 /*  Subroutine for Fsafe_coding_systems_region_internal.
5737
5738     Return a list of coding systems that safely encode the multibyte
5739     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5740     possible coding systems.  If it is nil, it means that we have not
5741     yet found any coding systems.
5742
5743     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5744     element of WORK_TABLE is set to t once the element is looked up.
5745
5746     If a non-ASCII single byte char is found, set
5747     *single_byte_char_found to 1.  */
5748
5749 static Lisp_Object
5750 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5751      unsigned char *p, *pend;
5752      Lisp_Object safe_codings, work_table;
5753      int *single_byte_char_found;
5754 {
5755   int c, len, idx;
5756   Lisp_Object val;
5757
5758   while (p < pend)
5759     {
5760       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5761       p += len;
5762       if (ASCII_BYTE_P (c))
5763         /* We can ignore ASCII characters here.  */
5764         continue;
5765       if (SINGLE_BYTE_CHAR_P (c))
5766         *single_byte_char_found = 1;
5767       if (NILP (safe_codings))
5768         continue;
5769       /* Check the safe coding systems for C.  */
5770       val = char_table_ref_and_index (work_table, c, &idx);
5771       if (EQ (val, Qt))
5772         /* This element was already checked.  Ignore it.  */
5773         continue;
5774       /* Remember that we checked this element.  */
5775       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
5776
5777       /* If there are some safe coding systems for C and we have
5778          already found the other set of coding systems for the
5779          different characters, get the intersection of them.  */
5780       if (!EQ (safe_codings, Qt) && !NILP (val))
5781         val = intersection (safe_codings, val);
5782       safe_codings = val;
5783     }
5784   return safe_codings;
5785 }
5786
5787
5788 /* Return a list of coding systems that safely encode the text between
5789    START and END.  If the text contains only ASCII or is unibyte,
5790    return t.  */
5791
5792 DEFUN ("find-coding-systems-region-internal",
5793        Ffind_coding_systems_region_internal,
5794        Sfind_coding_systems_region_internal, 2, 2, 0,
5795   "Internal use only.")
5796   (start, end)
5797      Lisp_Object start, end;
5798 {
5799   Lisp_Object work_table, safe_codings;
5800   int non_ascii_p = 0;
5801   int single_byte_char_found = 0;
5802   unsigned char *p1, *p1end, *p2, *p2end, *p;
5803   Lisp_Object args[2];
5804
5805   if (STRINGP (start))
5806     {
5807       if (!STRING_MULTIBYTE (start))
5808         return Qt;
5809       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5810       p2 = p2end = p1end;
5811       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5812         non_ascii_p = 1;
5813     }
5814   else
5815     {
5816       int from, to, stop;
5817
5818       CHECK_NUMBER_COERCE_MARKER (start, 0);
5819       CHECK_NUMBER_COERCE_MARKER (end, 1);
5820       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5821         args_out_of_range (start, end);
5822       if (NILP (current_buffer->enable_multibyte_characters))
5823         return Qt;
5824       from = CHAR_TO_BYTE (XINT (start));
5825       to = CHAR_TO_BYTE (XINT (end));
5826       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5827       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5828       if (stop == to)
5829         p2 = p2end = p1end;
5830       else
5831         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5832       if (XINT (end) - XINT (start) != to - from)
5833         non_ascii_p = 1;
5834     }
5835
5836   if (!non_ascii_p)
5837     {
5838       /* We are sure that the text contains no multibyte character.
5839          Check if it contains eight-bit-graphic.  */
5840       p = p1;
5841       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5842       if (p == p1end)
5843         {
5844           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5845           if (p == p2end)
5846             return Qt;
5847         }
5848     }
5849
5850   /* The text contains non-ASCII characters.  */
5851   work_table = Fcopy_sequence (Vchar_coding_system_table);
5852   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5853                                     &single_byte_char_found);
5854   if (p2 < p2end)
5855     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5856                                       &single_byte_char_found);
5857
5858   if (!single_byte_char_found)
5859     {
5860       /* Append generic coding systems.  */
5861       Lisp_Object args[2];
5862       args[0] = safe_codings;
5863       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5864                                         make_number (0));
5865       safe_codings = Fappend (2, args);
5866     }
5867   else
5868     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5869   return safe_codings;
5870 }
5871
5872
5873 Lisp_Object
5874 code_convert_region1 (start, end, coding_system, encodep)
5875      Lisp_Object start, end, coding_system;
5876      int encodep;
5877 {
5878   struct coding_system coding;
5879   int from, to, len;
5880
5881   CHECK_NUMBER_COERCE_MARKER (start, 0);
5882   CHECK_NUMBER_COERCE_MARKER (end, 1);
5883   CHECK_SYMBOL (coding_system, 2);
5884
5885   validate_region (&start, &end);
5886   from = XFASTINT (start);
5887   to = XFASTINT (end);
5888
5889   if (NILP (coding_system))
5890     return make_number (to - from);
5891
5892   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5893     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5894
5895   coding.mode |= CODING_MODE_LAST_BLOCK;
5896   coding.src_multibyte = coding.dst_multibyte
5897     = !NILP (current_buffer->enable_multibyte_characters);
5898   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5899                        &coding, encodep, 1);
5900   Vlast_coding_system_used = coding.symbol;
5901   return make_number (coding.produced_char);
5902 }
5903
5904 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5905        3, 3, "r\nzCoding system: ",
5906   "Decode the current region by specified coding system.\n\
5907 When called from a program, takes three arguments:\n\
5908 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5909 This function sets `last-coding-system-used' to the precise coding system\n\
5910 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5911 not fully specified.)\n\
5912 It returns the length of the decoded text.")
5913   (start, end, coding_system)
5914      Lisp_Object start, end, coding_system;
5915 {
5916   return code_convert_region1 (start, end, coding_system, 0);
5917 }
5918
5919 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5920        3, 3, "r\nzCoding system: ",
5921   "Encode the current region by specified coding system.\n\
5922 When called from a program, takes three arguments:\n\
5923 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5924 This function sets `last-coding-system-used' to the precise coding system\n\
5925 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5926 not fully specified.)\n\
5927 It returns the length of the encoded text.")
5928   (start, end, coding_system)
5929      Lisp_Object start, end, coding_system;
5930 {
5931   return code_convert_region1 (start, end, coding_system, 1);
5932 }
5933
5934 Lisp_Object
5935 code_convert_string1 (string, coding_system, nocopy, encodep)
5936      Lisp_Object string, coding_system, nocopy;
5937      int encodep;
5938 {
5939   struct coding_system coding;
5940
5941   CHECK_STRING (string, 0);
5942   CHECK_SYMBOL (coding_system, 1);
5943
5944   if (NILP (coding_system))
5945     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5946
5947   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5948     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5949
5950   coding.mode |= CODING_MODE_LAST_BLOCK;
5951   string = (encodep
5952             ? encode_coding_string (string, &coding, !NILP (nocopy))
5953             : decode_coding_string (string, &coding, !NILP (nocopy)));
5954   Vlast_coding_system_used = coding.symbol;
5955
5956   return string;
5957 }
5958
5959 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5960        2, 3, 0,
5961   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5962 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5963 if the decoding operation is trivial.\n\
5964 This function sets `last-coding-system-used' to the precise coding system\n\
5965 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5966 not fully specified.)")
5967   (string, coding_system, nocopy)
5968      Lisp_Object string, coding_system, nocopy;
5969 {
5970   return code_convert_string1 (string, coding_system, nocopy, 0);
5971 }
5972
5973 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5974        2, 3, 0,
5975   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5976 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5977 if the encoding operation is trivial.\n\
5978 This function sets `last-coding-system-used' to the precise coding system\n\
5979 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5980 not fully specified.)")
5981   (string, coding_system, nocopy)
5982      Lisp_Object string, coding_system, nocopy;
5983 {
5984   return code_convert_string1 (string, coding_system, nocopy, 1);
5985 }
5986
5987 /* Encode or decode STRING according to CODING_SYSTEM.
5988    Do not set Vlast_coding_system_used.
5989
5990    This function is called only from macros DECODE_FILE and
5991    ENCODE_FILE, thus we ignore character composition.  */
5992
5993 Lisp_Object
5994 code_convert_string_norecord (string, coding_system, encodep)
5995      Lisp_Object string, coding_system;
5996      int encodep;
5997 {
5998   struct coding_system coding;
5999
6000   CHECK_STRING (string, 0);
6001   CHECK_SYMBOL (coding_system, 1);
6002
6003   if (NILP (coding_system))
6004     return string;
6005
6006   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6007     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6008
6009   coding.composing = COMPOSITION_DISABLED;
6010   coding.mode |= CODING_MODE_LAST_BLOCK;
6011   return (encodep
6012           ? encode_coding_string (string, &coding, 1)
6013           : decode_coding_string (string, &coding, 1));
6014 }
6015 \f
6016 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6017   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6018 Return the corresponding character.")
6019   (code)
6020      Lisp_Object code;
6021 {
6022   unsigned char c1, c2, s1, s2;
6023   Lisp_Object val;
6024
6025   CHECK_NUMBER (code, 0);
6026   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6027   if (s1 == 0)
6028     {
6029       if (s2 < 0x80)
6030         XSETFASTINT (val, s2);
6031       else if (s2 >= 0xA0 || s2 <= 0xDF)
6032         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6033       else
6034         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6035     }
6036   else
6037     {
6038       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6039           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6040         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6041       DECODE_SJIS (s1, s2, c1, c2);
6042       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6043     }
6044   return val;
6045 }
6046
6047 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6048   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6049 Return the corresponding code in SJIS.")
6050   (ch)
6051      Lisp_Object ch;
6052 {
6053   int charset, c1, c2, s1, s2;
6054   Lisp_Object val;
6055
6056   CHECK_NUMBER (ch, 0);
6057   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6058   if (charset == CHARSET_ASCII)
6059     {
6060       val = ch;
6061     }
6062   else if (charset == charset_jisx0208
6063            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6064     {
6065       ENCODE_SJIS (c1, c2, s1, s2);
6066       XSETFASTINT (val, (s1 << 8) | s2);
6067     }
6068   else if (charset == charset_katakana_jisx0201
6069            && c1 > 0x20 && c2 < 0xE0)
6070     {
6071       XSETFASTINT (val, c1 | 0x80);
6072     }
6073   else
6074     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6075   return val;
6076 }
6077
6078 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6079   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6080 Return the corresponding character.")
6081   (code)
6082      Lisp_Object code;
6083 {
6084   int charset;
6085   unsigned char b1, b2, c1, c2;
6086   Lisp_Object val;
6087
6088   CHECK_NUMBER (code, 0);
6089   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6090   if (b1 == 0)
6091     {
6092       if (b2 >= 0x80)
6093         error ("Invalid BIG5 code: %x", XFASTINT (code));
6094       val = code;
6095     }
6096   else
6097     {
6098       if ((b1 < 0xA1 || b1 > 0xFE)
6099           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6100         error ("Invalid BIG5 code: %x", XFASTINT (code));
6101       DECODE_BIG5 (b1, b2, charset, c1, c2);
6102       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6103     }
6104   return val;
6105 }
6106
6107 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6108   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6109 Return the corresponding character code in Big5.")
6110   (ch)
6111      Lisp_Object ch;
6112 {
6113   int charset, c1, c2, b1, b2;
6114   Lisp_Object val;
6115
6116   CHECK_NUMBER (ch, 0);
6117   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6118   if (charset == CHARSET_ASCII)
6119     {
6120       val = ch;
6121     }
6122   else if ((charset == charset_big5_1
6123             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6124            || (charset == charset_big5_2
6125                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6126     {
6127       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6128       XSETFASTINT (val, (b1 << 8) | b2);
6129     }
6130   else
6131     error ("Can't encode to Big5: %d", XFASTINT (ch));
6132   return val;
6133 }
6134 \f
6135 DEFUN ("set-terminal-coding-system-internal",
6136        Fset_terminal_coding_system_internal,
6137        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6138   (coding_system)
6139      Lisp_Object coding_system;
6140 {
6141   CHECK_SYMBOL (coding_system, 0);
6142   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6143   /* We had better not send unsafe characters to terminal.  */
6144   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6145   /* Characer composition should be disabled.  */
6146   terminal_coding.composing = COMPOSITION_DISABLED;
6147   terminal_coding.src_multibyte = 1;
6148   terminal_coding.dst_multibyte = 0;
6149   return Qnil;
6150 }
6151
6152 DEFUN ("set-safe-terminal-coding-system-internal",
6153        Fset_safe_terminal_coding_system_internal,
6154        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6155   (coding_system)
6156      Lisp_Object coding_system;
6157 {
6158   CHECK_SYMBOL (coding_system, 0);
6159   setup_coding_system (Fcheck_coding_system (coding_system),
6160                        &safe_terminal_coding);
6161   /* Characer composition should be disabled.  */
6162   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6163   safe_terminal_coding.src_multibyte = 1;
6164   safe_terminal_coding.dst_multibyte = 0;
6165   return Qnil;
6166 }
6167
6168 DEFUN ("terminal-coding-system",
6169        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6170   "Return coding system specified for terminal output.")
6171   ()
6172 {
6173   return terminal_coding.symbol;
6174 }
6175
6176 DEFUN ("set-keyboard-coding-system-internal",
6177        Fset_keyboard_coding_system_internal,
6178        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6179   (coding_system)
6180      Lisp_Object coding_system;
6181 {
6182   CHECK_SYMBOL (coding_system, 0);
6183   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6184   /* Characer composition should be disabled.  */
6185   keyboard_coding.composing = COMPOSITION_DISABLED;
6186   return Qnil;
6187 }
6188
6189 DEFUN ("keyboard-coding-system",
6190        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6191   "Return coding system specified for decoding keyboard input.")
6192   ()
6193 {
6194   return keyboard_coding.symbol;
6195 }
6196
6197 \f
6198 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6199        Sfind_operation_coding_system,  1, MANY, 0,
6200   "Choose a coding system for an operation based on the target name.\n\
6201 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6202 DECODING-SYSTEM is the coding system to use for decoding\n\
6203 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6204 for encoding (in case OPERATION does encoding).\n\
6205 \n\
6206 The first argument OPERATION specifies an I/O primitive:\n\
6207   For file I/O, `insert-file-contents' or `write-region'.\n\
6208   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6209   For network I/O, `open-network-stream'.\n\
6210 \n\
6211 The remaining arguments should be the same arguments that were passed\n\
6212 to the primitive.  Depending on which primitive, one of those arguments\n\
6213 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6214 whichever argument specifies the file name is TARGET.\n\
6215 \n\
6216 TARGET has a meaning which depends on OPERATION:\n\
6217   For file I/O, TARGET is a file name.\n\
6218   For process I/O, TARGET is a process name.\n\
6219   For network I/O, TARGET is a service name or a port number\n\
6220 \n\
6221 This function looks up what specified for TARGET in,\n\
6222 `file-coding-system-alist', `process-coding-system-alist',\n\
6223 or `network-coding-system-alist' depending on OPERATION.\n\
6224 They may specify a coding system, a cons of coding systems,\n\
6225 or a function symbol to call.\n\
6226 In the last case, we call the function with one argument,\n\
6227 which is a list of all the arguments given to this function.")
6228   (nargs, args)
6229      int nargs;
6230      Lisp_Object *args;
6231 {
6232   Lisp_Object operation, target_idx, target, val;
6233   register Lisp_Object chain;
6234
6235   if (nargs < 2)
6236     error ("Too few arguments");
6237   operation = args[0];
6238   if (!SYMBOLP (operation)
6239       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6240     error ("Invalid first arguement");
6241   if (nargs < 1 + XINT (target_idx))
6242     error ("Too few arguments for operation: %s",
6243            XSYMBOL (operation)->name->data);
6244   target = args[XINT (target_idx) + 1];
6245   if (!(STRINGP (target)
6246         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6247     error ("Invalid %dth argument", XINT (target_idx) + 1);
6248
6249   chain = ((EQ (operation, Qinsert_file_contents)
6250             || EQ (operation, Qwrite_region))
6251            ? Vfile_coding_system_alist
6252            : (EQ (operation, Qopen_network_stream)
6253               ? Vnetwork_coding_system_alist
6254               : Vprocess_coding_system_alist));
6255   if (NILP (chain))
6256     return Qnil;
6257
6258   for (; CONSP (chain); chain = XCDR (chain))
6259     {
6260       Lisp_Object elt;
6261       elt = XCAR (chain);
6262
6263       if (CONSP (elt)
6264           && ((STRINGP (target)
6265                && STRINGP (XCAR (elt))
6266                && fast_string_match (XCAR (elt), target) >= 0)
6267               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6268         {
6269           val = XCDR (elt);
6270           /* Here, if VAL is both a valid coding system and a valid
6271              function symbol, we return VAL as a coding system.  */
6272           if (CONSP (val))
6273             return val;
6274           if (! SYMBOLP (val))
6275             return Qnil;
6276           if (! NILP (Fcoding_system_p (val)))
6277             return Fcons (val, val);
6278           if (! NILP (Ffboundp (val)))
6279             {
6280               val = call1 (val, Flist (nargs, args));
6281               if (CONSP (val))
6282                 return val;
6283               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6284                 return Fcons (val, val);
6285             }
6286           return Qnil;
6287         }
6288     }
6289   return Qnil;
6290 }
6291
6292 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6293        Supdate_coding_systems_internal, 0, 0, 0,
6294   "Update internal database for ISO2022 and CCL based coding systems.\n\
6295 When values of any coding categories are changed, you must\n\
6296 call this function")
6297   ()
6298 {
6299   int i;
6300
6301   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6302     {
6303       Lisp_Object val;
6304
6305       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6306       if (!NILP (val))
6307         {
6308           if (! coding_system_table[i])
6309             coding_system_table[i] = ((struct coding_system *)
6310                                       xmalloc (sizeof (struct coding_system)));
6311           setup_coding_system (val, coding_system_table[i]);
6312         }
6313       else if (coding_system_table[i])
6314         {
6315           xfree (coding_system_table[i]);
6316           coding_system_table[i] = NULL;
6317         }
6318     }
6319
6320   return Qnil;
6321 }
6322
6323 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6324        Sset_coding_priority_internal, 0, 0, 0,
6325   "Update internal database for the current value of `coding-category-list'.\n\
6326 This function is internal use only.")
6327   ()
6328 {
6329   int i = 0, idx;
6330   Lisp_Object val;
6331
6332   val = Vcoding_category_list;
6333
6334   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6335     {
6336       if (! SYMBOLP (XCAR (val)))
6337         break;
6338       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6339       if (idx >= CODING_CATEGORY_IDX_MAX)
6340         break;
6341       coding_priorities[i++] = (1 << idx);
6342       val = XCDR (val);
6343     }
6344   /* If coding-category-list is valid and contains all coding
6345      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6346      the following code saves Emacs from crashing.  */
6347   while (i < CODING_CATEGORY_IDX_MAX)
6348     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6349
6350   return Qnil;
6351 }
6352
6353 #endif /* emacs */
6354
6355 \f
6356 /*** 9. Post-amble ***/
6357
6358 void
6359 init_coding_once ()
6360 {
6361   int i;
6362
6363   /* Emacs' internal format specific initialize routine.  */
6364   for (i = 0; i <= 0x20; i++)
6365     emacs_code_class[i] = EMACS_control_code;
6366   emacs_code_class[0x0A] = EMACS_linefeed_code;
6367   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6368   for (i = 0x21 ; i < 0x7F; i++)
6369     emacs_code_class[i] = EMACS_ascii_code;
6370   emacs_code_class[0x7F] = EMACS_control_code;
6371   for (i = 0x80; i < 0xFF; i++)
6372     emacs_code_class[i] = EMACS_invalid_code;
6373   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6374   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6375   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6376   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6377
6378   /* ISO2022 specific initialize routine.  */
6379   for (i = 0; i < 0x20; i++)
6380     iso_code_class[i] = ISO_control_0;
6381   for (i = 0x21; i < 0x7F; i++)
6382     iso_code_class[i] = ISO_graphic_plane_0;
6383   for (i = 0x80; i < 0xA0; i++)
6384     iso_code_class[i] = ISO_control_1;
6385   for (i = 0xA1; i < 0xFF; i++)
6386     iso_code_class[i] = ISO_graphic_plane_1;
6387   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6388   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6389   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6390   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6391   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6392   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6393   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6394   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6395   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6396   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6397
6398   setup_coding_system (Qnil, &keyboard_coding);
6399   setup_coding_system (Qnil, &terminal_coding);
6400   setup_coding_system (Qnil, &safe_terminal_coding);
6401   setup_coding_system (Qnil, &default_buffer_file_coding);
6402
6403   bzero (coding_system_table, sizeof coding_system_table);
6404
6405   bzero (ascii_skip_code, sizeof ascii_skip_code);
6406   for (i = 0; i < 128; i++)
6407     ascii_skip_code[i] = 1;
6408
6409 #if defined (MSDOS) || defined (WINDOWSNT)
6410   system_eol_type = CODING_EOL_CRLF;
6411 #else
6412   system_eol_type = CODING_EOL_LF;
6413 #endif
6414
6415   inhibit_pre_post_conversion = 0;
6416 }
6417
6418 #ifdef emacs
6419
6420 void
6421 syms_of_coding ()
6422 {
6423   Qtarget_idx = intern ("target-idx");
6424   staticpro (&Qtarget_idx);
6425
6426   Qcoding_system_history = intern ("coding-system-history");
6427   staticpro (&Qcoding_system_history);
6428   Fset (Qcoding_system_history, Qnil);
6429
6430   /* Target FILENAME is the first argument.  */
6431   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6432   /* Target FILENAME is the third argument.  */
6433   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6434
6435   Qcall_process = intern ("call-process");
6436   staticpro (&Qcall_process);
6437   /* Target PROGRAM is the first argument.  */
6438   Fput (Qcall_process, Qtarget_idx, make_number (0));
6439
6440   Qcall_process_region = intern ("call-process-region");
6441   staticpro (&Qcall_process_region);
6442   /* Target PROGRAM is the third argument.  */
6443   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6444
6445   Qstart_process = intern ("start-process");
6446   staticpro (&Qstart_process);
6447   /* Target PROGRAM is the third argument.  */
6448   Fput (Qstart_process, Qtarget_idx, make_number (2));
6449
6450   Qopen_network_stream = intern ("open-network-stream");
6451   staticpro (&Qopen_network_stream);
6452   /* Target SERVICE is the fourth argument.  */
6453   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6454
6455   Qcoding_system = intern ("coding-system");
6456   staticpro (&Qcoding_system);
6457
6458   Qeol_type = intern ("eol-type");
6459   staticpro (&Qeol_type);
6460
6461   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6462   staticpro (&Qbuffer_file_coding_system);
6463
6464   Qpost_read_conversion = intern ("post-read-conversion");
6465   staticpro (&Qpost_read_conversion);
6466
6467   Qpre_write_conversion = intern ("pre-write-conversion");
6468   staticpro (&Qpre_write_conversion);
6469
6470   Qno_conversion = intern ("no-conversion");
6471   staticpro (&Qno_conversion);
6472
6473   Qundecided = intern ("undecided");
6474   staticpro (&Qundecided);
6475
6476   Qcoding_system_p = intern ("coding-system-p");
6477   staticpro (&Qcoding_system_p);
6478
6479   Qcoding_system_error = intern ("coding-system-error");
6480   staticpro (&Qcoding_system_error);
6481
6482   Fput (Qcoding_system_error, Qerror_conditions,
6483         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6484   Fput (Qcoding_system_error, Qerror_message,
6485         build_string ("Invalid coding system"));
6486
6487   Qcoding_category = intern ("coding-category");
6488   staticpro (&Qcoding_category);
6489   Qcoding_category_index = intern ("coding-category-index");
6490   staticpro (&Qcoding_category_index);
6491
6492   Vcoding_category_table
6493     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6494   staticpro (&Vcoding_category_table);
6495   {
6496     int i;
6497     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6498       {
6499         XVECTOR (Vcoding_category_table)->contents[i]
6500           = intern (coding_category_name[i]);
6501         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6502               Qcoding_category_index, make_number (i));
6503       }
6504   }
6505
6506   Qtranslation_table = intern ("translation-table");
6507   staticpro (&Qtranslation_table);
6508   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6509
6510   Qtranslation_table_id = intern ("translation-table-id");
6511   staticpro (&Qtranslation_table_id);
6512
6513   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6514   staticpro (&Qtranslation_table_for_decode);
6515
6516   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6517   staticpro (&Qtranslation_table_for_encode);
6518
6519   Qsafe_chars = intern ("safe-chars");
6520   staticpro (&Qsafe_chars);
6521
6522   Qchar_coding_system = intern ("char-coding-system");
6523   staticpro (&Qchar_coding_system);
6524
6525   /* Intern this now in case it isn't already done.
6526      Setting this variable twice is harmless.
6527      But don't staticpro it here--that is done in alloc.c.  */
6528   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6529   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6530   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6531
6532   Qvalid_codes = intern ("valid-codes");
6533   staticpro (&Qvalid_codes);
6534
6535   Qemacs_mule = intern ("emacs-mule");
6536   staticpro (&Qemacs_mule);
6537
6538   Qraw_text = intern ("raw-text");
6539   staticpro (&Qraw_text);
6540
6541   defsubr (&Scoding_system_p);
6542   defsubr (&Sread_coding_system);
6543   defsubr (&Sread_non_nil_coding_system);
6544   defsubr (&Scheck_coding_system);
6545   defsubr (&Sdetect_coding_region);
6546   defsubr (&Sdetect_coding_string);
6547   defsubr (&Sfind_coding_systems_region_internal);
6548   defsubr (&Sdecode_coding_region);
6549   defsubr (&Sencode_coding_region);
6550   defsubr (&Sdecode_coding_string);
6551   defsubr (&Sencode_coding_string);
6552   defsubr (&Sdecode_sjis_char);
6553   defsubr (&Sencode_sjis_char);
6554   defsubr (&Sdecode_big5_char);
6555   defsubr (&Sencode_big5_char);
6556   defsubr (&Sset_terminal_coding_system_internal);
6557   defsubr (&Sset_safe_terminal_coding_system_internal);
6558   defsubr (&Sterminal_coding_system);
6559   defsubr (&Sset_keyboard_coding_system_internal);
6560   defsubr (&Skeyboard_coding_system);
6561   defsubr (&Sfind_operation_coding_system);
6562   defsubr (&Supdate_coding_systems_internal);
6563   defsubr (&Sset_coding_priority_internal);
6564
6565   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6566     "List of coding systems.\n\
6567 \n\
6568 Do not alter the value of this variable manually.  This variable should be\n\
6569 updated by the functions `make-coding-system' and\n\
6570 `define-coding-system-alias'.");
6571   Vcoding_system_list = Qnil;
6572
6573   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6574     "Alist of coding system names.\n\
6575 Each element is one element list of coding system name.\n\
6576 This variable is given to `completing-read' as TABLE argument.\n\
6577 \n\
6578 Do not alter the value of this variable manually.  This variable should be\n\
6579 updated by the functions `make-coding-system' and\n\
6580 `define-coding-system-alias'.");
6581   Vcoding_system_alist = Qnil;
6582
6583   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6584     "List of coding-categories (symbols) ordered by priority.");
6585   {
6586     int i;
6587
6588     Vcoding_category_list = Qnil;
6589     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6590       Vcoding_category_list
6591         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6592                  Vcoding_category_list);
6593   }
6594
6595   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6596     "Specify the coding system for read operations.\n\
6597 It is useful to bind this variable with `let', but do not set it globally.\n\
6598 If the value is a coding system, it is used for decoding on read operation.\n\
6599 If not, an appropriate element is used from one of the coding system alists:\n\
6600 There are three such tables, `file-coding-system-alist',\n\
6601 `process-coding-system-alist', and `network-coding-system-alist'.");
6602   Vcoding_system_for_read = Qnil;
6603
6604   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6605     "Specify the coding system for write operations.\n\
6606 Programs bind this variable with `let', but you should not set it globally.\n\
6607 If the value is a coding system, it is used for encoding of output,\n\
6608 when writing it to a file and when sending it to a file or subprocess.\n\
6609 \n\
6610 If this does not specify a coding system, an appropriate element\n\
6611 is used from one of the coding system alists:\n\
6612 There are three such tables, `file-coding-system-alist',\n\
6613 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6614 For output to files, if the above procedure does not specify a coding system,\n\
6615 the value of `buffer-file-coding-system' is used.");
6616   Vcoding_system_for_write = Qnil;
6617
6618   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6619     "Coding system used in the latest file or process I/O.");
6620   Vlast_coding_system_used = Qnil;
6621
6622   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6623     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6624 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6625 such conversion.");
6626   inhibit_eol_conversion = 0;
6627
6628   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6629     "Non-nil means process buffer inherits coding system of process output.\n\
6630 Bind it to t if the process output is to be treated as if it were a file\n\
6631 read from some filesystem.");
6632   inherit_process_coding_system = 0;
6633
6634   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6635     "Alist to decide a coding system to use for a file I/O operation.\n\
6636 The format is ((PATTERN . VAL) ...),\n\
6637 where PATTERN is a regular expression matching a file name,\n\
6638 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6639 If VAL is a coding system, it is used for both decoding and encoding\n\
6640 the file contents.\n\
6641 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6642 and the cdr part is used for encoding.\n\
6643 If VAL is a function symbol, the function must return a coding system\n\
6644 or a cons of coding systems which are used as above.\n\
6645 \n\
6646 See also the function `find-operation-coding-system'\n\
6647 and the variable `auto-coding-alist'.");
6648   Vfile_coding_system_alist = Qnil;
6649
6650   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6651     "Alist to decide a coding system to use for a process I/O operation.\n\
6652 The format is ((PATTERN . VAL) ...),\n\
6653 where PATTERN is a regular expression matching a program name,\n\
6654 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6655 If VAL is a coding system, it is used for both decoding what received\n\
6656 from the program and encoding what sent to the program.\n\
6657 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6658 and the cdr part is used for encoding.\n\
6659 If VAL is a function symbol, the function must return a coding system\n\
6660 or a cons of coding systems which are used as above.\n\
6661 \n\
6662 See also the function `find-operation-coding-system'.");
6663   Vprocess_coding_system_alist = Qnil;
6664
6665   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6666     "Alist to decide a coding system to use for a network I/O operation.\n\
6667 The format is ((PATTERN . VAL) ...),\n\
6668 where PATTERN is a regular expression matching a network service name\n\
6669 or is a port number to connect to,\n\
6670 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6671 If VAL is a coding system, it is used for both decoding what received\n\
6672 from the network stream and encoding what sent to the network stream.\n\
6673 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6674 and the cdr part is used for encoding.\n\
6675 If VAL is a function symbol, the function must return a coding system\n\
6676 or a cons of coding systems which are used as above.\n\
6677 \n\
6678 See also the function `find-operation-coding-system'.");
6679   Vnetwork_coding_system_alist = Qnil;
6680
6681   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6682     "Coding system to use with system messages.");
6683   Vlocale_coding_system = Qnil;
6684
6685   /* The eol mnemonics are reset in startup.el system-dependently.  */
6686   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6687     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6688   eol_mnemonic_unix = build_string (":");
6689
6690   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6691     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6692   eol_mnemonic_dos = build_string ("\\");
6693
6694   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6695     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6696   eol_mnemonic_mac = build_string ("/");
6697
6698   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6699     "*String displayed in mode line when end-of-line format is not yet determined.");
6700   eol_mnemonic_undecided = build_string (":");
6701
6702   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6703     "*Non-nil enables character translation while encoding and decoding.");
6704   Venable_character_translation = Qt;
6705
6706   DEFVAR_LISP ("standard-translation-table-for-decode",
6707     &Vstandard_translation_table_for_decode,
6708     "Table for translating characters while decoding.");
6709   Vstandard_translation_table_for_decode = Qnil;
6710
6711   DEFVAR_LISP ("standard-translation-table-for-encode",
6712     &Vstandard_translation_table_for_encode,
6713     "Table for translationg characters while encoding.");
6714   Vstandard_translation_table_for_encode = Qnil;
6715
6716   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6717     "Alist of charsets vs revision numbers.\n\
6718 While encoding, if a charset (car part of an element) is found,\n\
6719 designate it with the escape sequence identifing revision (cdr part of the element).");
6720   Vcharset_revision_alist = Qnil;
6721
6722   DEFVAR_LISP ("default-process-coding-system",
6723                &Vdefault_process_coding_system,
6724     "Cons of coding systems used for process I/O by default.\n\
6725 The car part is used for decoding a process output,\n\
6726 the cdr part is used for encoding a text to be sent to a process.");
6727   Vdefault_process_coding_system = Qnil;
6728
6729   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6730     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6731 This is a vector of length 256.\n\
6732 If Nth element is non-nil, the existence of code N in a file\n\
6733 \(or output of subprocess) doesn't prevent it to be detected as\n\
6734 a coding system of ISO 2022 variant which has a flag\n\
6735 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6736 or reading output of a subprocess.\n\
6737 Only 128th through 159th elements has a meaning.");
6738   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6739
6740   DEFVAR_LISP ("select-safe-coding-system-function",
6741                &Vselect_safe_coding_system_function,
6742     "Function to call to select safe coding system for encoding a text.\n\
6743 \n\
6744 If set, this function is called to force a user to select a proper\n\
6745 coding system which can encode the text in the case that a default\n\
6746 coding system used in each operation can't encode the text.\n\
6747 \n\
6748 The default value is `select-safe-coding-system' (which see).");
6749   Vselect_safe_coding_system_function = Qnil;
6750
6751   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6752     "Char-table containing safe coding systems of each characters.\n\
6753 Each element doesn't include such generic coding systems that can\n\
6754 encode any characters.   They are in the first extra slot.");
6755   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6756
6757   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6758                &inhibit_iso_escape_detection,
6759     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6760 \n\
6761 By default, on reading a file, Emacs tries to detect how the text is\n\
6762 encoded.  This code detection is sensitive to escape sequences.  If\n\
6763 the sequence is valid as ISO2022, the code is determined as one of\n\
6764 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6765 coding system (e.g. `iso-2022-7bit').\n\
6766 \n\
6767 However, there may be a case that you want to read escape sequences in\n\
6768 a file as is.  In such a case, you can set this variable to non-nil.\n\
6769 Then, as the code detection ignores any escape sequences, no file is\n\
6770 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6771 escape sequences become visible in a buffer.\n\
6772 \n\
6773 The default value is nil, and it is strongly recommended not to change\n\
6774 it.  That is because many Emacs Lisp source files that contain\n\
6775 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6776 in Emacs's distribution, and they won't be decoded correctly on\n\
6777 reading if you suppress escape sequence detection.\n\
6778 \n\
6779 The other way to read escape sequences in a file without decoding is\n\
6780 to explicitly specify some coding system that doesn't use ISO2022's\n\
6781 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6782   inhibit_iso_escape_detection = 0;
6783 }
6784
6785 char *
6786 emacs_strerror (error_number)
6787      int error_number;
6788 {
6789   char *str;
6790
6791   synchronize_system_messages_locale ();
6792   str = strerror (error_number);
6793
6794   if (! NILP (Vlocale_coding_system))
6795     {
6796       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6797                                                       Vlocale_coding_system,
6798                                                       0);
6799       str = (char *) XSTRING (dec)->data;
6800     }
6801
6802   return str;
6803 }
6804
6805 #endif /* emacs */
6806