src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348
 349 #else  /* not emacs */
 350
 351 #include "mulelib.h"
 352
 353 #endif /* not emacs */
 354
 355 Lisp_Object Qcoding_system, Qeol_type;
 356 Lisp_Object Qbuffer_file_coding_system;
 357 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 358 Lisp_Object Qno_conversion, Qundecided;
 359 Lisp_Object Qcoding_system_history;
 360 Lisp_Object Qsafe_chars;
 361 Lisp_Object Qvalid_codes;
 362
 363 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 365 Lisp_Object Qstart_process, Qopen_network_stream;
 366 Lisp_Object Qtarget_idx;
 367
 368 Lisp_Object Vselect_safe_coding_system_function;
 369
 370 /* Mnemonic string for each format of end-of-line.  */
 371 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 372 /* Mnemonic string to indicate format of end-of-line is not yet
 373    decided.  */
 374 Lisp_Object eol_mnemonic_undecided;
 375
 376 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 377    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 378 int system_eol_type;
 379
 380 #ifdef emacs
 381
 382 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 383
 384 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 385
 386 /* Coding system emacs-mule and raw-text are for converting only
 387    end-of-line format.  */
 388 Lisp_Object Qemacs_mule, Qraw_text;
 389
 390 /* Coding-systems are handed between Emacs Lisp programs and C internal
 391    routines by the following three variables.  */
 392 /* Coding-system for reading files and receiving data from process.  */
 393 Lisp_Object Vcoding_system_for_read;
 394 /* Coding-system for writing files and sending data to process.  */
 395 Lisp_Object Vcoding_system_for_write;
 396 /* Coding-system actually used in the latest I/O.  */
 397 Lisp_Object Vlast_coding_system_used;
 398
 399 /* A vector of length 256 which contains information about special
 400    Latin codes (especially for dealing with Microsoft codes).  */
 401 Lisp_Object Vlatin_extra_code_table;
 402
 403 /* Flag to inhibit code conversion of end-of-line format.  */
 404 int inhibit_eol_conversion;
 405
 406 /* Flag to inhibit ISO2022 escape sequence detection.  */
 407 int inhibit_iso_escape_detection;
 408
 409 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 410 int inherit_process_coding_system;
 411
 412 /* Coding system to be used to encode text for terminal display.  */
 413 struct coding_system terminal_coding;
 414
 415 /* Coding system to be used to encode text for terminal display when
 416    terminal coding system is nil.  */
 417 struct coding_system safe_terminal_coding;
 418
 419 /* Coding system of what is sent from terminal keyboard.  */
 420 struct coding_system keyboard_coding;
 421
 422 /* Default coding system to be used to write a file.  */
 423 struct coding_system default_buffer_file_coding;
 424
 425 Lisp_Object Vfile_coding_system_alist;
 426 Lisp_Object Vprocess_coding_system_alist;
 427 Lisp_Object Vnetwork_coding_system_alist;
 428
 429 Lisp_Object Vlocale_coding_system;
 430
 431 #endif /* emacs */
 432
 433 Lisp_Object Qcoding_category, Qcoding_category_index;
 434
 435 /* List of symbols `coding-category-xxx' ordered by priority.  */
 436 Lisp_Object Vcoding_category_list;
 437
 438 /* Table of coding categories (Lisp symbols).  */
 439 Lisp_Object Vcoding_category_table;
 440
 441 /* Table of names of symbol for each coding-category.  */
 442 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 443   "coding-category-emacs-mule",
 444   "coding-category-sjis",
 445   "coding-category-iso-7",
 446   "coding-category-iso-7-tight",
 447   "coding-category-iso-8-1",
 448   "coding-category-iso-8-2",
 449   "coding-category-iso-7-else",
 450   "coding-category-iso-8-else",
 451   "coding-category-ccl",
 452   "coding-category-big5",
 453   "coding-category-utf-8",
 454   "coding-category-utf-16-be",
 455   "coding-category-utf-16-le",
 456   "coding-category-raw-text",
 457   "coding-category-binary"
 458 };
 459
 460 /* Table of pointers to coding systems corresponding to each coding
 461    categories.  */
 462 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 463
 464 /* Table of coding category masks.  Nth element is a mask for a coding
 465    category of which priority is Nth.  */
 466 static
 467 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 468
 469 /* Flag to tell if we look up translation table on character code
 470    conversion.  */
 471 Lisp_Object Venable_character_translation;
 472 /* Standard translation table to look up on decoding (reading).  */
 473 Lisp_Object Vstandard_translation_table_for_decode;
 474 /* Standard translation table to look up on encoding (writing).  */
 475 Lisp_Object Vstandard_translation_table_for_encode;
 476
 477 Lisp_Object Qtranslation_table;
 478 Lisp_Object Qtranslation_table_id;
 479 Lisp_Object Qtranslation_table_for_decode;
 480 Lisp_Object Qtranslation_table_for_encode;
 481
 482 /* Alist of charsets vs revision number.  */
 483 Lisp_Object Vcharset_revision_alist;
 484
 485 /* Default coding systems used for process I/O.  */
 486 Lisp_Object Vdefault_process_coding_system;
 487
 488 /* Global flag to tell that we can't call post-read-conversion and
 489    pre-write-conversion functions.  Usually the value is zero, but it
 490    is set to 1 temporarily while such functions are running.  This is
 491    to avoid infinite recursive call.  */
 492 static int inhibit_pre_post_conversion;
 493
 494 /* Char-table containing safe coding systems of each character.  */
 495 Lisp_Object Vchar_coding_system_table;
 496 Lisp_Object Qchar_coding_system;
 497
 498 /* Return `safe-chars' property of coding system CODING.  Don't check
 499    validity of CODING.  */
 500
 501 Lisp_Object
 502 coding_safe_chars (coding)
 503      struct coding_system *coding;
 504 {
 505   Lisp_Object coding_spec, plist, safe_chars;
 506
 507   coding_spec = Fget (coding->symbol, Qcoding_system);
 508   plist = XVECTOR (coding_spec)->contents[3];
 509   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 510   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 511 }
 512
 513 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 514   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 515
 516 \f
 517 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 518
 519 /* Emacs' internal format for representation of multiple character
 520    sets is a kind of multi-byte encoding, i.e. characters are
 521    represented by variable-length sequences of one-byte codes.
 522
 523    ASCII characters and control characters (e.g. `tab', `newline') are
 524    represented by one-byte sequences which are their ASCII codes, in
 525    the range 0x00 through 0x7F.
 526
 527    8-bit characters of the range 0x80..0x9F are represented by
 528    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 529    code + 0x20).
 530
 531    8-bit characters of the range 0xA0..0xFF are represented by
 532    one-byte sequences which are their 8-bit code.
 533
 534    The other characters are represented by a sequence of `base
 535    leading-code', optional `extended leading-code', and one or two
 536    `position-code's.  The length of the sequence is determined by the
 537    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 538    whereas extended leading-code and position-code take the range 0xA0
 539    through 0xFF.  See `charset.h' for more details about leading-code
 540    and position-code.
 541
 542    --- CODE RANGE of Emacs' internal format ---
 543    character set        range
 544    -------------        -----
 545    ascii                0x00..0x7F
 546    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 547    eight-bit-graphic    0xA0..0xBF
 548    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 549    ---------------------------------------------
 550
 551    As this is the internal character representation, the format is
 552    usually not used externally (i.e. in a file or in a data sent to a
 553    process).  But, it is possible to have a text externally in this
 554    format (i.e. by encoding by the coding system `emacs-mule').
 555
 556    In that case, a sequence of one-byte codes has a slightly different
 557    form.
 558
 559    Firstly, all characters in eight-bit-control are represented by
 560    one-byte sequences which are their 8-bit code.
 561
 562    Next, character composition data are represented by the byte
 563    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 564    where,
 565         METHOD is 0xF0 plus one of composition method (enum
 566         composition_method),
 567
 568         BYTES is 0xA0 plus the byte length of these composition data,
 569
 570         CHARS is 0xA0 plus the number of characters composed by these
 571         data,
 572
 573         COMPONENTs are characters of multibyte form or composition
 574         rules encoded by two-byte of ASCII codes.
 575
 576    In addition, for backward compatibility, the following formats are
 577    also recognized as composition data on decoding.
 578
 579    0x80 MSEQ ...
 580    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 581
 582    Here,
 583         MSEQ is a multibyte form but in these special format:
 584           ASCII: 0xA0 ASCII_CODE+0x80,
 585           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 586         RULE is a one byte code of the range 0xA0..0xF0 that
 587         represents a composition rule.
 588   */
 589
 590 enum emacs_code_class_type emacs_code_class[256];
 591
 592 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 593    Check if a text is encoded in Emacs' internal format.  If it is,
 594    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 595
 596 static int
 597 detect_coding_emacs_mule (src, src_end, multibytep)
 598       unsigned char *src, *src_end;
 599       int multibytep;
 600 {
 601   unsigned char c;
 602   int composing = 0;
 603   /* Dummy for ONE_MORE_BYTE.  */
 604   struct coding_system dummy_coding;
 605   struct coding_system *coding = &dummy_coding;
 606
 607   while (1)
 608     {
 609       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 610
 611       if (composing)
 612         {
 613           if (c < 0xA0)
 614             composing = 0;
 615           else if (c == 0xA0)
 616             {
 617               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 618               c &= 0x7F;
 619             }
 620           else
 621             c -= 0x20;
 622         }
 623
 624       if (c < 0x20)
 625         {
 626           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 627             return 0;
 628         }
 629       else if (c >= 0x80 && c < 0xA0)
 630         {
 631           if (c == 0x80)
 632             /* Old leading code for a composite character.  */
 633             composing = 1;
 634           else
 635             {
 636               unsigned char *src_base = src - 1;
 637               int bytes;
 638
 639               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 640                                                bytes))
 641                 return 0;
 642               src = src_base + bytes;
 643             }
 644         }
 645     }
 646  label_end_of_loop:
 647   return CODING_CATEGORY_MASK_EMACS_MULE;
 648 }
 649
 650
 651 /* Record the starting position START and METHOD of one composition.  */
 652
 653 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 654   do {                                                          \
 655     struct composition_data *cmp_data = coding->cmp_data;       \
 656     int *data = cmp_data->data + cmp_data->used;                \
 657     coding->cmp_data_start = cmp_data->used;                    \
 658     data[0] = -1;                                               \
 659     data[1] = cmp_data->char_offset + start;                    \
 660     data[3] = (int) method;                                     \
 661     cmp_data->used += 4;                                        \
 662   } while (0)
 663
 664 /* Record the ending position END of the current composition.  */
 665
 666 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 667   do {                                                          \
 668     struct composition_data *cmp_data = coding->cmp_data;       \
 669     int *data = cmp_data->data + coding->cmp_data_start;        \
 670     data[0] = cmp_data->used - coding->cmp_data_start;          \
 671     data[2] = cmp_data->char_offset + end;                      \
 672   } while (0)
 673
 674 /* Record one COMPONENT (alternate character or composition rule).  */
 675
 676 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
 677   (coding->cmp_data->data[coding->cmp_data->used++] = component)
 678
 679
 680 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 681    is not less than SRC_END, return -1 without incrementing Src.  */
 682
 683 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 684
 685
 686 /* Decode a character represented as a component of composition
 687    sequence of Emacs 20 style at SRC.  Set C to that character, store
 688    its multibyte form sequence at P, and set P to the end of that
 689    sequence.  If no valid character is found, set C to -1.  */
 690
 691 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 692   do {                                                          \
 693     int bytes;                                                  \
 694                                                                 \
 695     c = SAFE_ONE_MORE_BYTE ();                                  \
 696     if (c < 0)                                                  \
 697       break;                                                    \
 698     if (CHAR_HEAD_P (c))                                        \
 699       c = -1;                                                   \
 700     else if (c == 0xA0)                                         \
 701       {                                                         \
 702         c = SAFE_ONE_MORE_BYTE ();                              \
 703         if (c < 0xA0)                                           \
 704           c = -1;                                               \
 705         else                                                    \
 706           {                                                     \
 707             c -= 0xA0;                                          \
 708             *p++ = c;                                           \
 709           }                                                     \
 710       }                                                         \
 711     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 712       {                                                         \
 713         unsigned char *p0 = p;                                  \
 714                                                                 \
 715         c -= 0x20;                                              \
 716         *p++ = c;                                               \
 717         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 718         while (--bytes)                                         \
 719           {                                                     \
 720             c = SAFE_ONE_MORE_BYTE ();                          \
 721             if (c < 0)                                          \
 722               break;                                            \
 723             *p++ = c;                                           \
 724           }                                                     \
 725         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 726           c = STRING_CHAR (p0, bytes);                          \
 727         else                                                    \
 728           c = -1;                                               \
 729       }                                                         \
 730     else                                                        \
 731       c = -1;                                                   \
 732   } while (0)
 733
 734
 735 /* Decode a composition rule represented as a component of composition
 736    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 737    valid rule is found, set C to -1.  */
 738
 739 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 740   do {                                                  \
 741     c = SAFE_ONE_MORE_BYTE ();                          \
 742     c -= 0xA0;                                          \
 743     if (c < 0 || c >= 81)                               \
 744       c = -1;                                           \
 745     else                                                \
 746       {                                                 \
 747         gref = c / 9, nref = c % 9;                     \
 748         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 749       }                                                 \
 750   } while (0)
 751
 752
 753 /* Decode composition sequence encoded by `emacs-mule' at the source
 754    pointed by SRC.  SRC_END is the end of source.  Store information
 755    of the composition in CODING->cmp_data.
 756
 757    For backward compatibility, decode also a composition sequence of
 758    Emacs 20 style.  In that case, the composition sequence contains
 759    characters that should be extracted into a buffer or string.  Store
 760    those characters at *DESTINATION in multibyte form.
 761
 762    If we encounter an invalid byte sequence, return 0.
 763    If we encounter an insufficient source or destination, or
 764    insufficient space in CODING->cmp_data, return 1.
 765    Otherwise, return consumed bytes in the source.
 766
 767 */
 768 static INLINE int
 769 decode_composition_emacs_mule (coding, src, src_end,
 770                                destination, dst_end, dst_bytes)
 771      struct coding_system *coding;
 772      unsigned char *src, *src_end, **destination, *dst_end;
 773      int dst_bytes;
 774 {
 775   unsigned char *dst = *destination;
 776   int method, data_len, nchars;
 777   unsigned char *src_base = src++;
 778   /* Store components of composition.  */
 779   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 780   int ncomponent;
 781   /* Store multibyte form of characters to be composed.  This is for
 782      Emacs 20 style composition sequence.  */
 783   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 784   unsigned char *bufp = buf;
 785   int c, i, gref, nref;
 786
 787   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 788       >= COMPOSITION_DATA_SIZE)
 789     {
 790       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 791       return -1;
 792     }
 793
 794   ONE_MORE_BYTE (c);
 795   if (c - 0xF0 >= COMPOSITION_RELATIVE
 796            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 797     {
 798       int with_rule;
 799
 800       method = c - 0xF0;
 801       with_rule = (method == COMPOSITION_WITH_RULE
 802                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 803       ONE_MORE_BYTE (c);
 804       data_len = c - 0xA0;
 805       if (data_len < 4
 806           || src_base + data_len > src_end)
 807         return 0;
 808       ONE_MORE_BYTE (c);
 809       nchars = c - 0xA0;
 810       if (c < 1)
 811         return 0;
 812       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 813         {
 814           if (ncomponent % 2 && with_rule)
 815             {
 816               ONE_MORE_BYTE (gref);
 817               gref -= 32;
 818               ONE_MORE_BYTE (nref);
 819               nref -= 32;
 820               c = COMPOSITION_ENCODE_RULE (gref, nref);
 821             }
 822           else
 823             {
 824               int bytes;
 825               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 826                 c = STRING_CHAR (src, bytes);
 827               else
 828                 c = *src, bytes = 1;
 829               src += bytes;
 830             }
 831           component[ncomponent] = c;
 832         }
 833     }
 834   else
 835     {
 836       /* This may be an old Emacs 20 style format.  See the comment at
 837          the section 2 of this file.  */
 838       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 839       if (src == src_end
 840           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 841         goto label_end_of_loop;
 842
 843       src_end = src;
 844       src = src_base + 1;
 845       if (c < 0xC0)
 846         {
 847           method = COMPOSITION_RELATIVE;
 848           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 849             {
 850               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 851               if (c < 0)
 852                 break;
 853               component[ncomponent++] = c;
 854             }
 855           if (ncomponent < 2)
 856             return 0;
 857           nchars = ncomponent;
 858         }
 859       else if (c == 0xFF)
 860         {
 861           method = COMPOSITION_WITH_RULE;
 862           src++;
 863           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 864           if (c < 0)
 865             return 0;
 866           component[0] = c;
 867           for (ncomponent = 1;
 868                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 869             {
 870               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 871               if (c < 0)
 872                 break;
 873               component[ncomponent++] = c;
 874               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 875               if (c < 0)
 876                 break;
 877               component[ncomponent++] = c;
 878             }
 879           if (ncomponent < 3)
 880             return 0;
 881           nchars = (ncomponent + 1) / 2;
 882         }
 883       else
 884         return 0;
 885     }
 886
 887   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 888     {
 889       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 890       for (i = 0; i < ncomponent; i++)
 891         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 892       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 893       if (buf < bufp)
 894         {
 895           unsigned char *p = buf;
 896           EMIT_BYTES (p, bufp);
 897           *destination += bufp - buf;
 898           coding->produced_char += nchars;
 899         }
 900       return (src - src_base);
 901     }
 902  label_end_of_loop:
 903   return -1;
 904 }
 905
 906 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 907
 908 static void
 909 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 910      struct coding_system *coding;
 911      unsigned char *source, *destination;
 912      int src_bytes, dst_bytes;
 913 {
 914   unsigned char *src = source;
 915   unsigned char *src_end = source + src_bytes;
 916   unsigned char *dst = destination;
 917   unsigned char *dst_end = destination + dst_bytes;
 918   /* SRC_BASE remembers the start position in source in each loop.
 919      The loop will be exited when there's not enough source code, or
 920      when there's not enough destination area to produce a
 921      character.  */
 922   unsigned char *src_base;
 923
 924   coding->produced_char = 0;
 925   while ((src_base = src) < src_end)
 926     {
 927       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 928       int bytes;
 929
 930       if (*src == '\r')
 931         {
 932           int c = *src++;
 933
 934           if (coding->eol_type == CODING_EOL_CR)
 935             c = '\n';
 936           else if (coding->eol_type == CODING_EOL_CRLF)
 937             {
 938               ONE_MORE_BYTE (c);
 939               if (c != '\n')
 940                 {
 941                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 942                     {
 943                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 944                       goto label_end_of_loop;
 945                     }
 946                   src--;
 947                   c = '\r';
 948                 }
 949             }
 950           *dst++ = c;
 951           coding->produced_char++;
 952           continue;
 953         }
 954       else if (*src == '\n')
 955         {
 956           if ((coding->eol_type == CODING_EOL_CR
 957                || coding->eol_type == CODING_EOL_CRLF)
 958               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 959             {
 960               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 961               goto label_end_of_loop;
 962             }
 963           *dst++ = *src++;
 964           coding->produced_char++;
 965           continue;
 966         }
 967       else if (*src == 0x80)
 968         {
 969           /* Start of composition data.  */
 970           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 971                                                          &dst, dst_end,
 972                                                          dst_bytes);
 973           if (consumed < 0)
 974             goto label_end_of_loop;
 975           else if (consumed > 0)
 976             {
 977               src += consumed;
 978               continue;
 979             }
 980           bytes = CHAR_STRING (*src, tmp);
 981           p = tmp;
 982           src++;
 983         }
 984       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 985         {
 986           p = src;
 987           src += bytes;
 988         }
 989       else
 990         {
 991           bytes = CHAR_STRING (*src, tmp);
 992           p = tmp;
 993           src++;
 994         }
 995       if (dst + bytes >= (dst_bytes ? dst_end : src))
 996         {
 997           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 998           break;
 999         }
1000       while (bytes--) *dst++ = *p++;
1001       coding->produced_char++;
1002     }
1003  label_end_of_loop:
1004   coding->consumed = coding->consumed_char = src_base - source;
1005   coding->produced = dst - destination;
1006 }
1007
1008
1009 /* Encode composition data stored at DATA into a special byte sequence
1010    starting by 0x80.  Update CODING->cmp_data_start and maybe
1011    CODING->cmp_data for the next call.  */
1012
1013 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1014   do {                                                                  \
1015     unsigned char buf[1024], *p0 = buf, *p;                             \
1016     int len = data[0];                                                  \
1017     int i;                                                              \
1018                                                                         \
1019     buf[0] = 0x80;                                                      \
1020     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1021     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1022     p = buf + 4;                                                        \
1023     if (data[3] == COMPOSITION_WITH_RULE                                \
1024         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1025       {                                                                 \
1026         p += CHAR_STRING (data[4], p);                                  \
1027         for (i = 5; i < len; i += 2)                                    \
1028           {                                                             \
1029             int gref, nref;                                             \
1030              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1031             *p++ = 0x20 + gref;                                         \
1032             *p++ = 0x20 + nref;                                         \
1033             p += CHAR_STRING (data[i + 1], p);                          \
1034           }                                                             \
1035       }                                                                 \
1036     else                                                                \
1037       {                                                                 \
1038         for (i = 4; i < len; i++)                                       \
1039           p += CHAR_STRING (data[i], p);                                \
1040       }                                                                 \
1041     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1042                                                                         \
1043     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1044       {                                                                 \
1045         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1046         goto label_end_of_loop;                                         \
1047       }                                                                 \
1048     while (p0 < p)                                                      \
1049       *dst++ = *p0++;                                                   \
1050     coding->cmp_data_start += data[0];                                  \
1051     if (coding->cmp_data_start == coding->cmp_data->used                \
1052         && coding->cmp_data->next)                                      \
1053       {                                                                 \
1054         coding->cmp_data = coding->cmp_data->next;                      \
1055         coding->cmp_data_start = 0;                                     \
1056       }                                                                 \
1057   } while (0)
1058
1059
1060 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1061                             unsigned char *, int, int));
1062
1063 static void
1064 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1065      struct coding_system *coding;
1066      unsigned char *source, *destination;
1067      int src_bytes, dst_bytes;
1068 {
1069   unsigned char *src = source;
1070   unsigned char *src_end = source + src_bytes;
1071   unsigned char *dst = destination;
1072   unsigned char *dst_end = destination + dst_bytes;
1073   unsigned char *src_base;
1074   int c;
1075   int char_offset;
1076   int *data;
1077
1078   Lisp_Object translation_table;
1079
1080   translation_table = Qnil;
1081
1082   /* Optimization for the case that there's no composition.  */
1083   if (!coding->cmp_data || coding->cmp_data->used == 0)
1084     {
1085       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1086       return;
1087     }
1088
1089   char_offset = coding->cmp_data->char_offset;
1090   data = coding->cmp_data->data + coding->cmp_data_start;
1091   while (1)
1092     {
1093       src_base = src;
1094
1095       /* If SRC starts a composition, encode the information about the
1096          composition in advance.  */
1097       if (coding->cmp_data_start < coding->cmp_data->used
1098           && char_offset + coding->consumed_char == data[1])
1099         {
1100           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1101           char_offset = coding->cmp_data->char_offset;
1102           data = coding->cmp_data->data + coding->cmp_data_start;
1103         }
1104
1105       ONE_MORE_CHAR (c);
1106       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1107                         || coding->eol_type == CODING_EOL_CR))
1108         {
1109           if (coding->eol_type == CODING_EOL_CRLF)
1110             EMIT_TWO_BYTES ('\r', c);
1111           else
1112             EMIT_ONE_BYTE ('\r');
1113         }
1114       else if (SINGLE_BYTE_CHAR_P (c))
1115         EMIT_ONE_BYTE (c);
1116       else
1117         EMIT_BYTES (src_base, src);
1118       coding->consumed_char++;
1119     }
1120  label_end_of_loop:
1121   coding->consumed = src_base - source;
1122   coding->produced = coding->produced_char = dst - destination;
1123   return;
1124 }
1125
1126 \f
1127 /*** 3. ISO2022 handlers ***/
1128
1129 /* The following note describes the coding system ISO2022 briefly.
1130    Since the intention of this note is to help understand the
1131    functions in this file, some parts are NOT ACCURATE or are OVERLY
1132    SIMPLIFIED.  For thorough understanding, please refer to the
1133    original document of ISO2022.  This is equivalent to the standard
1134    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1135
1136    ISO2022 provides many mechanisms to encode several character sets
1137    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1138    is encoded using bytes less than 128.  This may make the encoded
1139    text a little bit longer, but the text passes more easily through
1140    several types of gateway, some of which strip off the MSB (Most
1141    Significant Bit).
1142
1143    There are two kinds of character sets: control character sets and
1144    graphic character sets.  The former contain control characters such
1145    as `newline' and `escape' to provide control functions (control
1146    functions are also provided by escape sequences).  The latter
1147    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1148    two control character sets and many graphic character sets.
1149
1150    Graphic character sets are classified into one of the following
1151    four classes, according to the number of bytes (DIMENSION) and
1152    number of characters in one dimension (CHARS) of the set:
1153    - DIMENSION1_CHARS94
1154    - DIMENSION1_CHARS96
1155    - DIMENSION2_CHARS94
1156    - DIMENSION2_CHARS96
1157
1158    In addition, each character set is assigned an identification tag,
1159    unique for each set, called the "final character" (denoted as <F>
1160    hereafter).  The <F> of each character set is decided by ECMA(*)
1161    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1162    (0x30..0x3F are for private use only).
1163
1164    Note (*): ECMA = European Computer Manufacturers Association
1165
1166    Here are examples of graphic character sets [NAME(<F>)]:
1167         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1168         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1169         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1170         o DIMENSION2_CHARS96 -- none for the moment
1171
1172    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1173         C0 [0x00..0x1F] -- control character plane 0
1174         GL [0x20..0x7F] -- graphic character plane 0
1175         C1 [0x80..0x9F] -- control character plane 1
1176         GR [0xA0..0xFF] -- graphic character plane 1
1177
1178    A control character set is directly designated and invoked to C0 or
1179    C1 by an escape sequence.  The most common case is that:
1180    - ISO646's  control character set is designated/invoked to C0, and
1181    - ISO6429's control character set is designated/invoked to C1,
1182    and usually these designations/invocations are omitted in encoded
1183    text.  In a 7-bit environment, only C0 can be used, and a control
1184    character for C1 is encoded by an appropriate escape sequence to
1185    fit into the environment.  All control characters for C1 are
1186    defined to have corresponding escape sequences.
1187
1188    A graphic character set is at first designated to one of four
1189    graphic registers (G0 through G3), then these graphic registers are
1190    invoked to GL or GR.  These designations and invocations can be
1191    done independently.  The most common case is that G0 is invoked to
1192    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1193    these invocations and designations are omitted in encoded text.
1194    In a 7-bit environment, only GL can be used.
1195
1196    When a graphic character set of CHARS94 is invoked to GL, codes
1197    0x20 and 0x7F of the GL area work as control characters SPACE and
1198    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1199    be used.
1200
1201    There are two ways of invocation: locking-shift and single-shift.
1202    With locking-shift, the invocation lasts until the next different
1203    invocation, whereas with single-shift, the invocation affects the
1204    following character only and doesn't affect the locking-shift
1205    state.  Invocations are done by the following control characters or
1206    escape sequences:
1207
1208    ----------------------------------------------------------------------
1209    abbrev  function                  cntrl escape seq   description
1210    ----------------------------------------------------------------------
1211    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1212    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1213    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1214    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1215    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1216    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1217    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1218    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1219    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1220    ----------------------------------------------------------------------
1221    (*) These are not used by any known coding system.
1222
1223    Control characters for these functions are defined by macros
1224    ISO_CODE_XXX in `coding.h'.
1225
1226    Designations are done by the following escape sequences:
1227    ----------------------------------------------------------------------
1228    escape sequence      description
1229    ----------------------------------------------------------------------
1230    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1231    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1232    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1233    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1234    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1235    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1236    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1237    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1238    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1239    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1240    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1241    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1242    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1243    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1244    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1245    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1246    ----------------------------------------------------------------------
1247
1248    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1249    of dimension 1, chars 94, and final character <F>, etc...
1250
1251    Note (*): Although these designations are not allowed in ISO2022,
1252    Emacs accepts them on decoding, and produces them on encoding
1253    CHARS96 character sets in a coding system which is characterized as
1254    7-bit environment, non-locking-shift, and non-single-shift.
1255
1256    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1257    '(' can be omitted.  We refer to this as "short-form" hereafter.
1258
1259    Now you may notice that there are a lot of ways of encoding the
1260    same multilingual text in ISO2022.  Actually, there exist many
1261    coding systems such as Compound Text (used in X11's inter client
1262    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1263    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1264    localized platforms), and all of these are variants of ISO2022.
1265
1266    In addition to the above, Emacs handles two more kinds of escape
1267    sequences: ISO6429's direction specification and Emacs' private
1268    sequence for specifying character composition.
1269
1270    ISO6429's direction specification takes the following form:
1271         o CSI ']'      -- end of the current direction
1272         o CSI '0' ']'  -- end of the current direction
1273         o CSI '1' ']'  -- start of left-to-right text
1274         o CSI '2' ']'  -- start of right-to-left text
1275    The control character CSI (0x9B: control sequence introducer) is
1276    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1277
1278    Character composition specification takes the following form:
1279         o ESC '0' -- start relative composition
1280         o ESC '1' -- end composition
1281         o ESC '2' -- start rule-base composition (*)
1282         o ESC '3' -- start relative composition with alternate chars  (**)
1283         o ESC '4' -- start rule-base composition with alternate chars  (**)
1284   Since these are not standard escape sequences of any ISO standard,
1285   the use of them with these meanings is restricted to Emacs only.
1286
1287   (*) This form is used only in Emacs 20.5 and older versions,
1288   but the newer versions can safely decode it.
1289   (**) This form is used only in Emacs 21.1 and newer versions,
1290   and the older versions can't decode it.
1291
1292   Here's a list of example usages of these composition escape
1293   sequences (categorized by `enum composition_method').
1294
1295   COMPOSITION_RELATIVE:
1296         ESC 0 CHAR [ CHAR ] ESC 1
1297   COMPOSITION_WITH_RULE:
1298         ESC 2 CHAR [ RULE CHAR ] ESC 1
1299   COMPOSITION_WITH_ALTCHARS:
1300         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1301   COMPOSITION_WITH_RULE_ALTCHARS:
1302         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1303
1304 enum iso_code_class_type iso_code_class[256];
1305
1306 #define CHARSET_OK(idx, charset, c)                                     \
1307   (coding_system_table[idx]                                             \
1308    && (charset == CHARSET_ASCII                                         \
1309        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
1310            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1311    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1312                                               charset)                  \
1313        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1314
1315 #define SHIFT_OUT_OK(idx) \
1316   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1317
1318 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1319    Check if a text is encoded in ISO2022.  If it is, return an
1320    integer in which appropriate flag bits any of:
1321         CODING_CATEGORY_MASK_ISO_7
1322         CODING_CATEGORY_MASK_ISO_7_TIGHT
1323         CODING_CATEGORY_MASK_ISO_8_1
1324         CODING_CATEGORY_MASK_ISO_8_2
1325         CODING_CATEGORY_MASK_ISO_7_ELSE
1326         CODING_CATEGORY_MASK_ISO_8_ELSE
1327    are set.  If a code which should never appear in ISO2022 is found,
1328    returns 0.  */
1329
1330 static int
1331 detect_coding_iso2022 (src, src_end, multibytep)
1332      unsigned char *src, *src_end;
1333      int multibytep;
1334 {
1335   int mask = CODING_CATEGORY_MASK_ISO;
1336   int mask_found = 0;
1337   int reg[4], shift_out = 0, single_shifting = 0;
1338   int c, c1, charset;
1339   /* Dummy for ONE_MORE_BYTE.  */
1340   struct coding_system dummy_coding;
1341   struct coding_system *coding = &dummy_coding;
1342   Lisp_Object safe_chars;
1343
1344   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1345   while (mask && src < src_end)
1346     {
1347       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1348       switch (c)
1349         {
1350         case ISO_CODE_ESC:
1351           if (inhibit_iso_escape_detection)
1352             break;
1353           single_shifting = 0;
1354           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1355           if (c >= '(' && c <= '/')
1356             {
1357               /* Designation sequence for a charset of dimension 1.  */
1358               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1359               if (c1 < ' ' || c1 >= 0x80
1360                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1361                 /* Invalid designation sequence.  Just ignore.  */
1362                 break;
1363               reg[(c - '(') % 4] = charset;
1364             }
1365           else if (c == '$')
1366             {
1367               /* Designation sequence for a charset of dimension 2.  */
1368               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1369               if (c >= '@' && c <= 'B')
1370                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1371                 reg[0] = charset = iso_charset_table[1][0][c];
1372               else if (c >= '(' && c <= '/')
1373                 {
1374                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1375                   if (c1 < ' ' || c1 >= 0x80
1376                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1377                     /* Invalid designation sequence.  Just ignore.  */
1378                     break;
1379                   reg[(c - '(') % 4] = charset;
1380                 }
1381               else
1382                 /* Invalid designation sequence.  Just ignore.  */
1383                 break;
1384             }
1385           else if (c == 'N' || c == 'O')
1386             {
1387               /* ESC <Fe> for SS2 or SS3.  */
1388               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1389               break;
1390             }
1391           else if (c >= '0' && c <= '4')
1392             {
1393               /* ESC <Fp> for start/end composition.  */
1394               mask_found |= CODING_CATEGORY_MASK_ISO;
1395               break;
1396             }
1397           else
1398             /* Invalid escape sequence.  Just ignore.  */
1399             break;
1400
1401           /* We found a valid designation sequence for CHARSET.  */
1402           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1403           c = MAKE_CHAR (charset, 0, 0);
1404           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1405             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1406           else
1407             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1408           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1409             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1410           else
1411             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1412           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1413             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1414           else
1415             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1416           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1417             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1418           else
1419             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1420           break;
1421
1422         case ISO_CODE_SO:
1423           if (inhibit_iso_escape_detection)
1424             break;
1425           single_shifting = 0;
1426           if (shift_out == 0
1427               && (reg[1] >= 0
1428                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1429                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1430             {
1431               /* Locking shift out.  */
1432               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1433               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1434             }
1435           break;
1436
1437         case ISO_CODE_SI:
1438           if (inhibit_iso_escape_detection)
1439             break;
1440           single_shifting = 0;
1441           if (shift_out == 1)
1442             {
1443               /* Locking shift in.  */
1444               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1445               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1446             }
1447           break;
1448
1449         case ISO_CODE_CSI:
1450           single_shifting = 0;
1451         case ISO_CODE_SS2:
1452         case ISO_CODE_SS3:
1453           {
1454             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1455
1456             if (inhibit_iso_escape_detection)
1457               break;
1458             if (c != ISO_CODE_CSI)
1459               {
1460                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1461                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1462                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1463                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1464                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1465                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1466                 single_shifting = 1;
1467               }
1468             if (VECTORP (Vlatin_extra_code_table)
1469                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1470               {
1471                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1472                     & CODING_FLAG_ISO_LATIN_EXTRA)
1473                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1474                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1475                     & CODING_FLAG_ISO_LATIN_EXTRA)
1476                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1477               }
1478             mask &= newmask;
1479             mask_found |= newmask;
1480           }
1481           break;
1482
1483         default:
1484           if (c < 0x80)
1485             {
1486               single_shifting = 0;
1487               break;
1488             }
1489           else if (c < 0xA0)
1490             {
1491               single_shifting = 0;
1492               if (VECTORP (Vlatin_extra_code_table)
1493                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1494                 {
1495                   int newmask = 0;
1496
1497                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1498                       & CODING_FLAG_ISO_LATIN_EXTRA)
1499                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1500                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1501                       & CODING_FLAG_ISO_LATIN_EXTRA)
1502                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1503                   mask &= newmask;
1504                   mask_found |= newmask;
1505                 }
1506               else
1507                 return 0;
1508             }
1509           else
1510             {
1511               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1512                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1513               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1514               /* Check the length of succeeding codes of the range
1515                  0xA0..0FF.  If the byte length is odd, we exclude
1516                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1517                  when we are not single shifting.  */
1518               if (!single_shifting
1519                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1520                 {
1521                   int i = 1;
1522                   while (src < src_end)
1523                     {
1524                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1525                       if (c < 0xA0)
1526                         break;
1527                       i++;
1528                     }
1529
1530                   if (i & 1 && src < src_end)
1531                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1532                   else
1533                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1534                 }
1535             }
1536           break;
1537         }
1538     }
1539  label_end_of_loop:
1540   return (mask & mask_found);
1541 }
1542
1543 /* Decode a character of which charset is CHARSET, the 1st position
1544    code is C1, the 2nd position code is C2, and return the decoded
1545    character code.  If the variable `translation_table' is non-nil,
1546    returned the translated code.  */
1547
1548 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1549   (NILP (translation_table)                     \
1550    ? MAKE_CHAR (charset, c1, c2)                \
1551    : translate_char (translation_table, -1, charset, c1, c2))
1552
1553 /* Set designation state into CODING.  */
1554 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1555   do {                                                                     \
1556     int charset, c;                                                        \
1557                                                                            \
1558     if (final_char < '0' || final_char >= 128)                             \
1559       goto label_invalid_code;                                             \
1560     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1561                                  make_number (chars),                      \
1562                                  make_number (final_char));                \
1563     c = MAKE_CHAR (charset, 0, 0);                                         \
1564     if (charset >= 0                                                       \
1565         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1566             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1567       {                                                                    \
1568         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1569             && reg == 0                                                    \
1570             && charset == CHARSET_ASCII)                                   \
1571           {                                                                \
1572             /* We should insert this designation sequence as is so         \
1573                that it is surely written back to a file.  */               \
1574             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1575             goto label_invalid_code;                                       \
1576           }                                                                \
1577         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1578         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1579             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1580           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1581         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1582       }                                                                    \
1583     else                                                                   \
1584       {                                                                    \
1585         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1586         goto label_invalid_code;                                           \
1587       }                                                                    \
1588   } while (0)
1589
1590 /* Allocate a memory block for storing information about compositions.
1591    The block is chained to the already allocated blocks.  */
1592
1593 void
1594 coding_allocate_composition_data (coding, char_offset)
1595      struct coding_system *coding;
1596      int char_offset;
1597 {
1598   struct composition_data *cmp_data
1599     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1600
1601   cmp_data->char_offset = char_offset;
1602   cmp_data->used = 0;
1603   cmp_data->prev = coding->cmp_data;
1604   cmp_data->next = NULL;
1605   if (coding->cmp_data)
1606     coding->cmp_data->next = cmp_data;
1607   coding->cmp_data = cmp_data;
1608   coding->cmp_data_start = 0;
1609 }
1610
1611 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1612    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1613    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1614    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1615    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1616   */
1617
1618 #define DECODE_COMPOSITION_START(c1)                                       \
1619   do {                                                                     \
1620     if (coding->composing == COMPOSITION_DISABLED)                         \
1621       {                                                                    \
1622         *dst++ = ISO_CODE_ESC;                                             \
1623         *dst++ = c1 & 0x7f;                                                \
1624         coding->produced_char += 2;                                        \
1625       }                                                                    \
1626     else if (!COMPOSING_P (coding))                                        \
1627       {                                                                    \
1628         /* This is surely the start of a composition.  We must be sure     \
1629            that coding->cmp_data has enough space to store the             \
1630            information about the composition.  If not, terminate the       \
1631            current decoding loop, allocate one more memory block for       \
1632            coding->cmp_data in the caller, then start the decoding         \
1633            loop again.  We can't allocate memory here directly because     \
1634            it may cause buffer/string relocation.  */                      \
1635         if (!coding->cmp_data                                              \
1636             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1637                 >= COMPOSITION_DATA_SIZE))                                 \
1638           {                                                                \
1639             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1640             goto label_end_of_loop;                                        \
1641           }                                                                \
1642         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1643                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1644                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1645                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1646         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1647                                       coding->composing);                  \
1648         coding->composition_rule_follows = 0;                              \
1649       }                                                                    \
1650     else                                                                   \
1651       {                                                                    \
1652         /* We are already handling a composition.  If the method is        \
1653            the following two, the codes following the current escape       \
1654            sequence are actual characters stored in a buffer.  */          \
1655         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1656             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1657           {                                                                \
1658             coding->composing = COMPOSITION_RELATIVE;                      \
1659             coding->composition_rule_follows = 0;                          \
1660           }                                                                \
1661       }                                                                    \
1662   } while (0)
1663
1664 /* Handle composition end sequence ESC 1.  */
1665
1666 #define DECODE_COMPOSITION_END(c1)                                      \
1667   do {                                                                  \
1668     if (coding->composing == COMPOSITION_DISABLED)                      \
1669       {                                                                 \
1670         *dst++ = ISO_CODE_ESC;                                          \
1671         *dst++ = c1;                                                    \
1672         coding->produced_char += 2;                                     \
1673       }                                                                 \
1674     else                                                                \
1675       {                                                                 \
1676         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1677         coding->composing = COMPOSITION_NO;                             \
1678       }                                                                 \
1679   } while (0)
1680
1681 /* Decode a composition rule from the byte C1 (and maybe one more byte
1682    from SRC) and store one encoded composition rule in
1683    coding->cmp_data.  */
1684
1685 #define DECODE_COMPOSITION_RULE(c1)                                     \
1686   do {                                                                  \
1687     int rule = 0;                                                       \
1688     (c1) -= 32;                                                         \
1689     if (c1 < 81)                /* old format (before ver.21) */        \
1690       {                                                                 \
1691         int gref = (c1) / 9;                                            \
1692         int nref = (c1) % 9;                                            \
1693         if (gref == 4) gref = 10;                                       \
1694         if (nref == 4) nref = 10;                                       \
1695         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1696       }                                                                 \
1697     else if (c1 < 93)           /* new format (after ver.21) */         \
1698       {                                                                 \
1699         ONE_MORE_BYTE (c2);                                             \
1700         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1701       }                                                                 \
1702     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1703     coding->composition_rule_follows = 0;                               \
1704   } while (0)
1705
1706
1707 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1708
1709 static void
1710 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1711      struct coding_system *coding;
1712      unsigned char *source, *destination;
1713      int src_bytes, dst_bytes;
1714 {
1715   unsigned char *src = source;
1716   unsigned char *src_end = source + src_bytes;
1717   unsigned char *dst = destination;
1718   unsigned char *dst_end = destination + dst_bytes;
1719   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1720   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1721   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1722   /* SRC_BASE remembers the start position in source in each loop.
1723      The loop will be exited when there's not enough source code
1724      (within macro ONE_MORE_BYTE), or when there's not enough
1725      destination area to produce a character (within macro
1726      EMIT_CHAR).  */
1727   unsigned char *src_base;
1728   int c, charset;
1729   Lisp_Object translation_table;
1730   Lisp_Object safe_chars;
1731
1732   safe_chars = coding_safe_chars (coding);
1733
1734   if (NILP (Venable_character_translation))
1735     translation_table = Qnil;
1736   else
1737     {
1738       translation_table = coding->translation_table_for_decode;
1739       if (NILP (translation_table))
1740         translation_table = Vstandard_translation_table_for_decode;
1741     }
1742
1743   coding->result = CODING_FINISH_NORMAL;
1744
1745   while (1)
1746     {
1747       int c1, c2;
1748
1749       src_base = src;
1750       ONE_MORE_BYTE (c1);
1751
1752       /* We produce no character or one character.  */
1753       switch (iso_code_class [c1])
1754         {
1755         case ISO_0x20_or_0x7F:
1756           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1757             {
1758               DECODE_COMPOSITION_RULE (c1);
1759               continue;
1760             }
1761           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1762             {
1763               /* This is SPACE or DEL.  */
1764               charset = CHARSET_ASCII;
1765               break;
1766             }
1767           /* This is a graphic character, we fall down ...  */
1768
1769         case ISO_graphic_plane_0:
1770           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1771             {
1772               DECODE_COMPOSITION_RULE (c1);
1773               continue;
1774             }
1775           charset = charset0;
1776           break;
1777
1778         case ISO_0xA0_or_0xFF:
1779           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1780               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1781             goto label_invalid_code;
1782           /* This is a graphic character, we fall down ... */
1783
1784         case ISO_graphic_plane_1:
1785           if (charset1 < 0)
1786             goto label_invalid_code;
1787           charset = charset1;
1788           break;
1789
1790         case ISO_control_0:
1791           if (COMPOSING_P (coding))
1792             DECODE_COMPOSITION_END ('1');
1793
1794           /* All ISO2022 control characters in this class have the
1795              same representation in Emacs internal format.  */
1796           if (c1 == '\n'
1797               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1798               && (coding->eol_type == CODING_EOL_CR
1799                   || coding->eol_type == CODING_EOL_CRLF))
1800             {
1801               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1802               goto label_end_of_loop;
1803             }
1804           charset = CHARSET_ASCII;
1805           break;
1806
1807         case ISO_control_1:
1808           if (COMPOSING_P (coding))
1809             DECODE_COMPOSITION_END ('1');
1810           goto label_invalid_code;
1811
1812         case ISO_carriage_return:
1813           if (COMPOSING_P (coding))
1814             DECODE_COMPOSITION_END ('1');
1815
1816           if (coding->eol_type == CODING_EOL_CR)
1817             c1 = '\n';
1818           else if (coding->eol_type == CODING_EOL_CRLF)
1819             {
1820               ONE_MORE_BYTE (c1);
1821               if (c1 != ISO_CODE_LF)
1822                 {
1823                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1824                     {
1825                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1826                       goto label_end_of_loop;
1827                     }
1828                   src--;
1829                   c1 = '\r';
1830                 }
1831             }
1832           charset = CHARSET_ASCII;
1833           break;
1834
1835         case ISO_shift_out:
1836           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1837               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1838             goto label_invalid_code;
1839           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1840           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1841           continue;
1842
1843         case ISO_shift_in:
1844           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1845             goto label_invalid_code;
1846           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1847           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1848           continue;
1849
1850         case ISO_single_shift_2_7:
1851         case ISO_single_shift_2:
1852           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1853             goto label_invalid_code;
1854           /* SS2 is handled as an escape sequence of ESC 'N' */
1855           c1 = 'N';
1856           goto label_escape_sequence;
1857
1858         case ISO_single_shift_3:
1859           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1860             goto label_invalid_code;
1861           /* SS2 is handled as an escape sequence of ESC 'O' */
1862           c1 = 'O';
1863           goto label_escape_sequence;
1864
1865         case ISO_control_sequence_introducer:
1866           /* CSI is handled as an escape sequence of ESC '[' ...  */
1867           c1 = '[';
1868           goto label_escape_sequence;
1869
1870         case ISO_escape:
1871           ONE_MORE_BYTE (c1);
1872         label_escape_sequence:
1873           /* Escape sequences handled by Emacs are invocation,
1874              designation, direction specification, and character
1875              composition specification.  */
1876           switch (c1)
1877             {
1878             case '&':           /* revision of following character set */
1879               ONE_MORE_BYTE (c1);
1880               if (!(c1 >= '@' && c1 <= '~'))
1881                 goto label_invalid_code;
1882               ONE_MORE_BYTE (c1);
1883               if (c1 != ISO_CODE_ESC)
1884                 goto label_invalid_code;
1885               ONE_MORE_BYTE (c1);
1886               goto label_escape_sequence;
1887
1888             case '$':           /* designation of 2-byte character set */
1889               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1890                 goto label_invalid_code;
1891               ONE_MORE_BYTE (c1);
1892               if (c1 >= '@' && c1 <= 'B')
1893                 {       /* designation of JISX0208.1978, GB2312.1980,
1894                            or JISX0208.1980 */
1895                   DECODE_DESIGNATION (0, 2, 94, c1);
1896                 }
1897               else if (c1 >= 0x28 && c1 <= 0x2B)
1898                 {       /* designation of DIMENSION2_CHARS94 character set */
1899                   ONE_MORE_BYTE (c2);
1900                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1901                 }
1902               else if (c1 >= 0x2C && c1 <= 0x2F)
1903                 {       /* designation of DIMENSION2_CHARS96 character set */
1904                   ONE_MORE_BYTE (c2);
1905                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1906                 }
1907               else
1908                 goto label_invalid_code;
1909               /* We must update these variables now.  */
1910               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1911               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1912               continue;
1913
1914             case 'n':           /* invocation of locking-shift-2 */
1915               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1916                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1917                 goto label_invalid_code;
1918               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1919               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1920               continue;
1921
1922             case 'o':           /* invocation of locking-shift-3 */
1923               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1924                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1925                 goto label_invalid_code;
1926               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1927               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1928               continue;
1929
1930             case 'N':           /* invocation of single-shift-2 */
1931               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1932                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1933                 goto label_invalid_code;
1934               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1935               ONE_MORE_BYTE (c1);
1936               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1937                 goto label_invalid_code;
1938               break;
1939
1940             case 'O':           /* invocation of single-shift-3 */
1941               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1942                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1943                 goto label_invalid_code;
1944               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1945               ONE_MORE_BYTE (c1);
1946               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1947                 goto label_invalid_code;
1948               break;
1949
1950             case '0': case '2': case '3': case '4': /* start composition */
1951               DECODE_COMPOSITION_START (c1);
1952               continue;
1953
1954             case '1':           /* end composition */
1955               DECODE_COMPOSITION_END (c1);
1956               continue;
1957
1958             case '[':           /* specification of direction */
1959               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1960                 goto label_invalid_code;
1961               /* For the moment, nested direction is not supported.
1962                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1963                  left-to-right, and nonzero means right-to-left.  */
1964               ONE_MORE_BYTE (c1);
1965               switch (c1)
1966                 {
1967                 case ']':       /* end of the current direction */
1968                   coding->mode &= ~CODING_MODE_DIRECTION;
1969
1970                 case '0':       /* end of the current direction */
1971                 case '1':       /* start of left-to-right direction */
1972                   ONE_MORE_BYTE (c1);
1973                   if (c1 == ']')
1974                     coding->mode &= ~CODING_MODE_DIRECTION;
1975                   else
1976                     goto label_invalid_code;
1977                   break;
1978
1979                 case '2':       /* start of right-to-left direction */
1980                   ONE_MORE_BYTE (c1);
1981                   if (c1 == ']')
1982                     coding->mode |= CODING_MODE_DIRECTION;
1983                   else
1984                     goto label_invalid_code;
1985                   break;
1986
1987                 default:
1988                   goto label_invalid_code;
1989                 }
1990               continue;
1991
1992             default:
1993               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1994                 goto label_invalid_code;
1995               if (c1 >= 0x28 && c1 <= 0x2B)
1996                 {       /* designation of DIMENSION1_CHARS94 character set */
1997                   ONE_MORE_BYTE (c2);
1998                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1999                 }
2000               else if (c1 >= 0x2C && c1 <= 0x2F)
2001                 {       /* designation of DIMENSION1_CHARS96 character set */
2002                   ONE_MORE_BYTE (c2);
2003                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2004                 }
2005               else
2006                 goto label_invalid_code;
2007               /* We must update these variables now.  */
2008               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2009               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2010               continue;
2011             }
2012         }
2013
2014       /* Now we know CHARSET and 1st position code C1 of a character.
2015          Produce a multibyte sequence for that character while getting
2016          2nd position code C2 if necessary.  */
2017       if (CHARSET_DIMENSION (charset) == 2)
2018         {
2019           ONE_MORE_BYTE (c2);
2020           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2021             /* C2 is not in a valid range.  */
2022             goto label_invalid_code;
2023         }
2024       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2025       EMIT_CHAR (c);
2026       continue;
2027
2028     label_invalid_code:
2029       coding->errors++;
2030       if (COMPOSING_P (coding))
2031         DECODE_COMPOSITION_END ('1');
2032       src = src_base;
2033       c = *src++;
2034       EMIT_CHAR (c);
2035     }
2036
2037  label_end_of_loop:
2038   coding->consumed = coding->consumed_char = src_base - source;
2039   coding->produced = dst - destination;
2040   return;
2041 }
2042
2043
2044 /* ISO2022 encoding stuff.  */
2045
2046 /*
2047    It is not enough to say just "ISO2022" on encoding, we have to
2048    specify more details.  In Emacs, each ISO2022 coding system
2049    variant has the following specifications:
2050         1. Initial designation to G0 through G3.
2051         2. Allows short-form designation?
2052         3. ASCII should be designated to G0 before control characters?
2053         4. ASCII should be designated to G0 at end of line?
2054         5. 7-bit environment or 8-bit environment?
2055         6. Use locking-shift?
2056         7. Use Single-shift?
2057    And the following two are only for Japanese:
2058         8. Use ASCII in place of JIS0201-1976-Roman?
2059         9. Use JISX0208-1983 in place of JISX0208-1978?
2060    These specifications are encoded in `coding->flags' as flag bits
2061    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2062    details.
2063 */
2064
2065 /* Produce codes (escape sequence) for designating CHARSET to graphic
2066    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2067    '@', 'A', or 'B' and the coding system CODING allows, produce
2068    designation sequence of short-form.  */
2069
2070 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2071   do {                                                                  \
2072     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2073     char *intermediate_char_94 = "()*+";                                \
2074     char *intermediate_char_96 = ",-./";                                \
2075     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2076                                                                         \
2077     if (revision < 255)                                                 \
2078       {                                                                 \
2079         *dst++ = ISO_CODE_ESC;                                          \
2080         *dst++ = '&';                                                   \
2081         *dst++ = '@' + revision;                                        \
2082       }                                                                 \
2083     *dst++ = ISO_CODE_ESC;                                              \
2084     if (CHARSET_DIMENSION (charset) == 1)                               \
2085       {                                                                 \
2086         if (CHARSET_CHARS (charset) == 94)                              \
2087           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2088         else                                                            \
2089           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2090       }                                                                 \
2091     else                                                                \
2092       {                                                                 \
2093         *dst++ = '$';                                                   \
2094         if (CHARSET_CHARS (charset) == 94)                              \
2095           {                                                             \
2096             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2097                 || reg != 0                                             \
2098                 || final_char < '@' || final_char > 'B')                \
2099               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2100           }                                                             \
2101         else                                                            \
2102           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2103       }                                                                 \
2104     *dst++ = final_char;                                                \
2105     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2106   } while (0)
2107
2108 /* The following two macros produce codes (control character or escape
2109    sequence) for ISO2022 single-shift functions (single-shift-2 and
2110    single-shift-3).  */
2111
2112 #define ENCODE_SINGLE_SHIFT_2                           \
2113   do {                                                  \
2114     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2115       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2116     else                                                \
2117       *dst++ = ISO_CODE_SS2;                            \
2118     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2119   } while (0)
2120
2121 #define ENCODE_SINGLE_SHIFT_3                           \
2122   do {                                                  \
2123     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2124       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2125     else                                                \
2126       *dst++ = ISO_CODE_SS3;                            \
2127     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2128   } while (0)
2129
2130 /* The following four macros produce codes (control character or
2131    escape sequence) for ISO2022 locking-shift functions (shift-in,
2132    shift-out, locking-shift-2, and locking-shift-3).  */
2133
2134 #define ENCODE_SHIFT_IN                         \
2135   do {                                          \
2136     *dst++ = ISO_CODE_SI;                       \
2137     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2138   } while (0)
2139
2140 #define ENCODE_SHIFT_OUT                        \
2141   do {                                          \
2142     *dst++ = ISO_CODE_SO;                       \
2143     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2144   } while (0)
2145
2146 #define ENCODE_LOCKING_SHIFT_2                  \
2147   do {                                          \
2148     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2149     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2150   } while (0)
2151
2152 #define ENCODE_LOCKING_SHIFT_3                  \
2153   do {                                          \
2154     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2155     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2156   } while (0)
2157
2158 /* Produce codes for a DIMENSION1 character whose character set is
2159    CHARSET and whose position-code is C1.  Designation and invocation
2160    sequences are also produced in advance if necessary.  */
2161
2162 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2163   do {                                                                  \
2164     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2165       {                                                                 \
2166         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2167           *dst++ = c1 & 0x7F;                                           \
2168         else                                                            \
2169           *dst++ = c1 | 0x80;                                           \
2170         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2171         break;                                                          \
2172       }                                                                 \
2173     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2174       {                                                                 \
2175         *dst++ = c1 & 0x7F;                                             \
2176         break;                                                          \
2177       }                                                                 \
2178     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2179       {                                                                 \
2180         *dst++ = c1 | 0x80;                                             \
2181         break;                                                          \
2182       }                                                                 \
2183     else                                                                \
2184       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2185          must invoke it, or, at first, designate it to some graphic     \
2186          register.  Then repeat the loop to actually produce the        \
2187          character.  */                                                 \
2188       dst = encode_invocation_designation (charset, coding, dst);       \
2189   } while (1)
2190
2191 /* Produce codes for a DIMENSION2 character whose character set is
2192    CHARSET and whose position-codes are C1 and C2.  Designation and
2193    invocation codes are also produced in advance if necessary.  */
2194
2195 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2196   do {                                                                  \
2197     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2198       {                                                                 \
2199         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2200           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2201         else                                                            \
2202           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2203         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2204         break;                                                          \
2205       }                                                                 \
2206     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2207       {                                                                 \
2208         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2209         break;                                                          \
2210       }                                                                 \
2211     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2212       {                                                                 \
2213         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2214         break;                                                          \
2215       }                                                                 \
2216     else                                                                \
2217       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2218          must invoke it, or, at first, designate it to some graphic     \
2219          register.  Then repeat the loop to actually produce the        \
2220          character.  */                                                 \
2221       dst = encode_invocation_designation (charset, coding, dst);       \
2222   } while (1)
2223
2224 #define ENCODE_ISO_CHARACTER(c)                                 \
2225   do {                                                          \
2226     int charset, c1, c2;                                        \
2227                                                                 \
2228     SPLIT_CHAR (c, charset, c1, c2);                            \
2229     if (CHARSET_DEFINED_P (charset))                            \
2230       {                                                         \
2231         if (CHARSET_DIMENSION (charset) == 1)                   \
2232           {                                                     \
2233             if (charset == CHARSET_ASCII                        \
2234                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2235               charset = charset_latin_jisx0201;                 \
2236             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2237           }                                                     \
2238         else                                                    \
2239           {                                                     \
2240             if (charset == charset_jisx0208                     \
2241                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2242               charset = charset_jisx0208_1978;                  \
2243             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2244           }                                                     \
2245       }                                                         \
2246     else                                                        \
2247       {                                                         \
2248         *dst++ = c1;                                            \
2249         if (c2 >= 0)                                            \
2250           *dst++ = c2;                                          \
2251       }                                                         \
2252   } while (0)
2253
2254
2255 /* Instead of encoding character C, produce one or two `?'s.  */
2256
2257 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2258   do {                                                                  \
2259     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2260     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2261       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2262   } while (0)
2263
2264
2265 /* Produce designation and invocation codes at a place pointed by DST
2266    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2267    Return new DST.  */
2268
2269 unsigned char *
2270 encode_invocation_designation (charset, coding, dst)
2271      int charset;
2272      struct coding_system *coding;
2273      unsigned char *dst;
2274 {
2275   int reg;                      /* graphic register number */
2276
2277   /* At first, check designations.  */
2278   for (reg = 0; reg < 4; reg++)
2279     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2280       break;
2281
2282   if (reg >= 4)
2283     {
2284       /* CHARSET is not yet designated to any graphic registers.  */
2285       /* At first check the requested designation.  */
2286       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2287       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2288         /* Since CHARSET requests no special designation, designate it
2289            to graphic register 0.  */
2290         reg = 0;
2291
2292       ENCODE_DESIGNATION (charset, reg, coding);
2293     }
2294
2295   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2296       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2297     {
2298       /* Since the graphic register REG is not invoked to any graphic
2299          planes, invoke it to graphic plane 0.  */
2300       switch (reg)
2301         {
2302         case 0:                 /* graphic register 0 */
2303           ENCODE_SHIFT_IN;
2304           break;
2305
2306         case 1:                 /* graphic register 1 */
2307           ENCODE_SHIFT_OUT;
2308           break;
2309
2310         case 2:                 /* graphic register 2 */
2311           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2312             ENCODE_SINGLE_SHIFT_2;
2313           else
2314             ENCODE_LOCKING_SHIFT_2;
2315           break;
2316
2317         case 3:                 /* graphic register 3 */
2318           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2319             ENCODE_SINGLE_SHIFT_3;
2320           else
2321             ENCODE_LOCKING_SHIFT_3;
2322           break;
2323         }
2324     }
2325
2326   return dst;
2327 }
2328
2329 /* Produce 2-byte codes for encoded composition rule RULE.  */
2330
2331 #define ENCODE_COMPOSITION_RULE(rule)           \
2332   do {                                          \
2333     int gref, nref;                             \
2334     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2335     *dst++ = 32 + 81 + gref;                    \
2336     *dst++ = 32 + nref;                         \
2337   } while (0)
2338
2339 /* Produce codes for indicating the start of a composition sequence
2340    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2341    which specify information about the composition.  See the comment
2342    in coding.h for the format of DATA.  */
2343
2344 #define ENCODE_COMPOSITION_START(coding, data)                          \
2345   do {                                                                  \
2346     coding->composing = data[3];                                        \
2347     *dst++ = ISO_CODE_ESC;                                              \
2348     if (coding->composing == COMPOSITION_RELATIVE)                      \
2349       *dst++ = '0';                                                     \
2350     else                                                                \
2351       {                                                                 \
2352         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2353                   ? '3' : '4');                                         \
2354         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2355         coding->composition_rule_follows = 0;                           \
2356       }                                                                 \
2357   } while (0)
2358
2359 /* Produce codes for indicating the end of the current composition.  */
2360
2361 #define ENCODE_COMPOSITION_END(coding, data)                    \
2362   do {                                                          \
2363     *dst++ = ISO_CODE_ESC;                                      \
2364     *dst++ = '1';                                               \
2365     coding->cmp_data_start += data[0];                          \
2366     coding->composing = COMPOSITION_NO;                         \
2367     if (coding->cmp_data_start == coding->cmp_data->used        \
2368         && coding->cmp_data->next)                              \
2369       {                                                         \
2370         coding->cmp_data = coding->cmp_data->next;              \
2371         coding->cmp_data_start = 0;                             \
2372       }                                                         \
2373   } while (0)
2374
2375 /* Produce composition start sequence ESC 0.  Here, this sequence
2376    doesn't mean the start of a new composition but means that we have
2377    just produced components (alternate chars and composition rules) of
2378    the composition and the actual text follows in SRC.  */
2379
2380 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2381   do {                                          \
2382     *dst++ = ISO_CODE_ESC;                      \
2383     *dst++ = '0';                               \
2384     coding->composing = COMPOSITION_RELATIVE;   \
2385   } while (0)
2386
2387 /* The following three macros produce codes for indicating direction
2388    of text.  */
2389 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2390   do {                                                  \
2391     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2392       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2393     else                                                \
2394       *dst++ = ISO_CODE_CSI;                            \
2395   } while (0)
2396
2397 #define ENCODE_DIRECTION_R2L    \
2398   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2399
2400 #define ENCODE_DIRECTION_L2R    \
2401   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2402
2403 /* Produce codes for designation and invocation to reset the graphic
2404    planes and registers to initial state.  */
2405 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2406   do {                                                                      \
2407     int reg;                                                                \
2408     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2409       ENCODE_SHIFT_IN;                                                      \
2410     for (reg = 0; reg < 4; reg++)                                           \
2411       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2412           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2413               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2414         ENCODE_DESIGNATION                                                  \
2415           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2416   } while (0)
2417
2418 /* Produce designation sequences of charsets in the line started from
2419    SRC to a place pointed by DST, and return updated DST.
2420
2421    If the current block ends before any end-of-line, we may fail to
2422    find all the necessary designations.  */
2423
2424 static unsigned char *
2425 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2426      struct coding_system *coding;
2427      Lisp_Object translation_table;
2428      unsigned char *src, *src_end, *dst;
2429 {
2430   int charset, c, found = 0, reg;
2431   /* Table of charsets to be designated to each graphic register.  */
2432   int r[4];
2433
2434   for (reg = 0; reg < 4; reg++)
2435     r[reg] = -1;
2436
2437   while (found < 4)
2438     {
2439       ONE_MORE_CHAR (c);
2440       if (c == '\n')
2441         break;
2442
2443       charset = CHAR_CHARSET (c);
2444       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2445       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2446         {
2447           found++;
2448           r[reg] = charset;
2449         }
2450     }
2451
2452  label_end_of_loop:
2453   if (found)
2454     {
2455       for (reg = 0; reg < 4; reg++)
2456         if (r[reg] >= 0
2457             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2458           ENCODE_DESIGNATION (r[reg], reg, coding);
2459     }
2460
2461   return dst;
2462 }
2463
2464 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2465
2466 static void
2467 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2468      struct coding_system *coding;
2469      unsigned char *source, *destination;
2470      int src_bytes, dst_bytes;
2471 {
2472   unsigned char *src = source;
2473   unsigned char *src_end = source + src_bytes;
2474   unsigned char *dst = destination;
2475   unsigned char *dst_end = destination + dst_bytes;
2476   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2477      from DST_END to assure overflow checking is necessary only at the
2478      head of loop.  */
2479   unsigned char *adjusted_dst_end = dst_end - 19;
2480   /* SRC_BASE remembers the start position in source in each loop.
2481      The loop will be exited when there's not enough source text to
2482      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2483      there's not enough destination area to produce encoded codes
2484      (within macro EMIT_BYTES).  */
2485   unsigned char *src_base;
2486   int c;
2487   Lisp_Object translation_table;
2488   Lisp_Object safe_chars;
2489
2490   safe_chars = coding_safe_chars (coding);
2491
2492   if (NILP (Venable_character_translation))
2493     translation_table = Qnil;
2494   else
2495     {
2496       translation_table = coding->translation_table_for_encode;
2497       if (NILP (translation_table))
2498         translation_table = Vstandard_translation_table_for_encode;
2499     }
2500
2501   coding->consumed_char = 0;
2502   coding->errors = 0;
2503   while (1)
2504     {
2505       src_base = src;
2506
2507       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2508         {
2509           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2510           break;
2511         }
2512
2513       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2514           && CODING_SPEC_ISO_BOL (coding))
2515         {
2516           /* We have to produce designation sequences if any now.  */
2517           dst = encode_designation_at_bol (coding, translation_table,
2518                                            src, src_end, dst);
2519           CODING_SPEC_ISO_BOL (coding) = 0;
2520         }
2521
2522       /* Check composition start and end.  */
2523       if (coding->composing != COMPOSITION_DISABLED
2524           && coding->cmp_data_start < coding->cmp_data->used)
2525         {
2526           struct composition_data *cmp_data = coding->cmp_data;
2527           int *data = cmp_data->data + coding->cmp_data_start;
2528           int this_pos = cmp_data->char_offset + coding->consumed_char;
2529
2530           if (coding->composing == COMPOSITION_RELATIVE)
2531             {
2532               if (this_pos == data[2])
2533                 {
2534                   ENCODE_COMPOSITION_END (coding, data);
2535                   cmp_data = coding->cmp_data;
2536                   data = cmp_data->data + coding->cmp_data_start;
2537                 }
2538             }
2539           else if (COMPOSING_P (coding))
2540             {
2541               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2542               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2543                 /* We have consumed components of the composition.
2544                    What follows in SRC is the composition's base
2545                    text.  */
2546                 ENCODE_COMPOSITION_FAKE_START (coding);
2547               else
2548                 {
2549                   int c = cmp_data->data[coding->cmp_data_index++];
2550                   if (coding->composition_rule_follows)
2551                     {
2552                       ENCODE_COMPOSITION_RULE (c);
2553                       coding->composition_rule_follows = 0;
2554                     }
2555                   else
2556                     {
2557                       if (coding->flags & CODING_FLAG_ISO_SAFE
2558                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2559                         ENCODE_UNSAFE_CHARACTER (c);
2560                       else
2561                         ENCODE_ISO_CHARACTER (c);
2562                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2563                         coding->composition_rule_follows = 1;
2564                     }
2565                   continue;
2566                 }
2567             }
2568           if (!COMPOSING_P (coding))
2569             {
2570               if (this_pos == data[1])
2571                 {
2572                   ENCODE_COMPOSITION_START (coding, data);
2573                   continue;
2574                 }
2575             }
2576         }
2577
2578       ONE_MORE_CHAR (c);
2579
2580       /* Now encode the character C.  */
2581       if (c < 0x20 || c == 0x7F)
2582         {
2583           if (c == '\r')
2584             {
2585               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2586                 {
2587                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2588                     ENCODE_RESET_PLANE_AND_REGISTER;
2589                   *dst++ = c;
2590                   continue;
2591                 }
2592               /* fall down to treat '\r' as '\n' ...  */
2593               c = '\n';
2594             }
2595           if (c == '\n')
2596             {
2597               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2598                 ENCODE_RESET_PLANE_AND_REGISTER;
2599               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2600                 bcopy (coding->spec.iso2022.initial_designation,
2601                        coding->spec.iso2022.current_designation,
2602                        sizeof coding->spec.iso2022.initial_designation);
2603               if (coding->eol_type == CODING_EOL_LF
2604                   || coding->eol_type == CODING_EOL_UNDECIDED)
2605                 *dst++ = ISO_CODE_LF;
2606               else if (coding->eol_type == CODING_EOL_CRLF)
2607                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2608               else
2609                 *dst++ = ISO_CODE_CR;
2610               CODING_SPEC_ISO_BOL (coding) = 1;
2611             }
2612           else
2613             {
2614               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2615                 ENCODE_RESET_PLANE_AND_REGISTER;
2616               *dst++ = c;
2617             }
2618         }
2619       else if (ASCII_BYTE_P (c))
2620         ENCODE_ISO_CHARACTER (c);
2621       else if (SINGLE_BYTE_CHAR_P (c))
2622         {
2623           *dst++ = c;
2624           coding->errors++;
2625         }
2626       else if (coding->flags & CODING_FLAG_ISO_SAFE
2627                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2628         ENCODE_UNSAFE_CHARACTER (c);
2629       else
2630         ENCODE_ISO_CHARACTER (c);
2631
2632       coding->consumed_char++;
2633     }
2634
2635  label_end_of_loop:
2636   coding->consumed = src_base - source;
2637   coding->produced = coding->produced_char = dst - destination;
2638 }
2639
2640 \f
2641 /*** 4. SJIS and BIG5 handlers ***/
2642
2643 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2644    quite widely.  So, for the moment, Emacs supports them in the bare
2645    C code.  But, in the future, they may be supported only by CCL.  */
2646
2647 /* SJIS is a coding system encoding three character sets: ASCII, right
2648    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2649    as is.  A character of charset katakana-jisx0201 is encoded by
2650    "position-code + 0x80".  A character of charset japanese-jisx0208
2651    is encoded in 2-byte but two position-codes are divided and shifted
2652    so that it fits in the range below.
2653
2654    --- CODE RANGE of SJIS ---
2655    (character set)      (range)
2656    ASCII                0x00 .. 0x7F
2657    KATAKANA-JISX0201    0xA1 .. 0xDF
2658    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2659             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2660    -------------------------------
2661
2662 */
2663
2664 /* BIG5 is a coding system encoding two character sets: ASCII and
2665    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2666    character set and is encoded in two bytes.
2667
2668    --- CODE RANGE of BIG5 ---
2669    (character set)      (range)
2670    ASCII                0x00 .. 0x7F
2671    Big5 (1st byte)      0xA1 .. 0xFE
2672         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2673    --------------------------
2674
2675    Since the number of characters in Big5 is larger than maximum
2676    characters in Emacs' charset (96x96), it can't be handled as one
2677    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2678    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2679    contains frequently used characters and the latter contains less
2680    frequently used characters.  */
2681
2682 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2683    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2684    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2685    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2686
2687 /* Number of Big5 characters which have the same code in 1st byte.  */
2688 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2689
2690 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2691   do {                                                                  \
2692     unsigned int temp                                                   \
2693       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2694     if (b1 < 0xC9)                                                      \
2695       charset = charset_big5_1;                                         \
2696     else                                                                \
2697       {                                                                 \
2698         charset = charset_big5_2;                                       \
2699         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2700       }                                                                 \
2701     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2702     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2703   } while (0)
2704
2705 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2706   do {                                                                  \
2707     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2708     if (charset == charset_big5_2)                                      \
2709       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2710     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2711     b2 = temp % BIG5_SAME_ROW;                                          \
2712     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2713   } while (0)
2714
2715 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2716    Check if a text is encoded in SJIS.  If it is, return
2717    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2718
2719 static int
2720 detect_coding_sjis (src, src_end, multibytep)
2721      unsigned char *src, *src_end;
2722      int multibytep;
2723 {
2724   int c;
2725   /* Dummy for ONE_MORE_BYTE.  */
2726   struct coding_system dummy_coding;
2727   struct coding_system *coding = &dummy_coding;
2728
2729   while (1)
2730     {
2731       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2732       if (c < 0x80)
2733         continue;
2734       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2735         return 0;
2736       if (c <= 0x9F || c >= 0xE0)
2737         {
2738           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2739           if (c < 0x40 || c == 0x7F || c > 0xFC)
2740             return 0;
2741         }
2742     }
2743  label_end_of_loop:
2744   return CODING_CATEGORY_MASK_SJIS;
2745 }
2746
2747 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2748    Check if a text is encoded in BIG5.  If it is, return
2749    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2750
2751 static int
2752 detect_coding_big5 (src, src_end, multibytep)
2753      unsigned char *src, *src_end;
2754      int multibytep;
2755 {
2756   int c;
2757   /* Dummy for ONE_MORE_BYTE.  */
2758   struct coding_system dummy_coding;
2759   struct coding_system *coding = &dummy_coding;
2760
2761   while (1)
2762     {
2763       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2764       if (c < 0x80)
2765         continue;
2766       if (c < 0xA1 || c > 0xFE)
2767         return 0;
2768       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2769       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2770         return 0;
2771     }
2772  label_end_of_loop:
2773   return CODING_CATEGORY_MASK_BIG5;
2774 }
2775
2776 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2777    Check if a text is encoded in UTF-8.  If it is, return
2778    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2779
2780 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2781 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2782 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2783 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2784 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2785 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2786 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2787
2788 static int
2789 detect_coding_utf_8 (src, src_end, multibytep)
2790      unsigned char *src, *src_end;
2791      int multibytep;
2792 {
2793   unsigned char c;
2794   int seq_maybe_bytes;
2795   /* Dummy for ONE_MORE_BYTE.  */
2796   struct coding_system dummy_coding;
2797   struct coding_system *coding = &dummy_coding;
2798
2799   while (1)
2800     {
2801       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2802       if (UTF_8_1_OCTET_P (c))
2803         continue;
2804       else if (UTF_8_2_OCTET_LEADING_P (c))
2805         seq_maybe_bytes = 1;
2806       else if (UTF_8_3_OCTET_LEADING_P (c))
2807         seq_maybe_bytes = 2;
2808       else if (UTF_8_4_OCTET_LEADING_P (c))
2809         seq_maybe_bytes = 3;
2810       else if (UTF_8_5_OCTET_LEADING_P (c))
2811         seq_maybe_bytes = 4;
2812       else if (UTF_8_6_OCTET_LEADING_P (c))
2813         seq_maybe_bytes = 5;
2814       else
2815         return 0;
2816
2817       do
2818         {
2819           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2820           if (!UTF_8_EXTRA_OCTET_P (c))
2821             return 0;
2822           seq_maybe_bytes--;
2823         }
2824       while (seq_maybe_bytes > 0);
2825     }
2826
2827  label_end_of_loop:
2828   return CODING_CATEGORY_MASK_UTF_8;
2829 }
2830
2831 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2832    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2833    Little Endian (otherwise).  If it is, return
2834    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2835    else return 0.  */
2836
2837 #define UTF_16_INVALID_P(val)   \
2838   (((val) == 0xFFFE)            \
2839    || ((val) == 0xFFFF))
2840
2841 #define UTF_16_HIGH_SURROGATE_P(val) \
2842   (((val) & 0xD800) == 0xD800)
2843
2844 #define UTF_16_LOW_SURROGATE_P(val) \
2845   (((val) & 0xDC00) == 0xDC00)
2846
2847 static int
2848 detect_coding_utf_16 (src, src_end, multibytep)
2849      unsigned char *src, *src_end;
2850      int multibytep;
2851 {
2852   unsigned char c1, c2;
2853   /* Dummy for TWO_MORE_BYTES.  */
2854   struct coding_system dummy_coding;
2855   struct coding_system *coding = &dummy_coding;
2856
2857   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2858   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2859
2860   if ((c1 == 0xFF) && (c2 == 0xFE))
2861     return CODING_CATEGORY_MASK_UTF_16_LE;
2862   else if ((c1 == 0xFE) && (c2 == 0xFF))
2863     return CODING_CATEGORY_MASK_UTF_16_BE;
2864
2865  label_end_of_loop:
2866   return 0;
2867 }
2868
2869 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2870    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2871
2872 static void
2873 decode_coding_sjis_big5 (coding, source, destination,
2874                          src_bytes, dst_bytes, sjis_p)
2875      struct coding_system *coding;
2876      unsigned char *source, *destination;
2877      int src_bytes, dst_bytes;
2878      int sjis_p;
2879 {
2880   unsigned char *src = source;
2881   unsigned char *src_end = source + src_bytes;
2882   unsigned char *dst = destination;
2883   unsigned char *dst_end = destination + dst_bytes;
2884   /* SRC_BASE remembers the start position in source in each loop.
2885      The loop will be exited when there's not enough source code
2886      (within macro ONE_MORE_BYTE), or when there's not enough
2887      destination area to produce a character (within macro
2888      EMIT_CHAR).  */
2889   unsigned char *src_base;
2890   Lisp_Object translation_table;
2891
2892   if (NILP (Venable_character_translation))
2893     translation_table = Qnil;
2894   else
2895     {
2896       translation_table = coding->translation_table_for_decode;
2897       if (NILP (translation_table))
2898         translation_table = Vstandard_translation_table_for_decode;
2899     }
2900
2901   coding->produced_char = 0;
2902   while (1)
2903     {
2904       int c, charset, c1, c2;
2905
2906       src_base = src;
2907       ONE_MORE_BYTE (c1);
2908
2909       if (c1 < 0x80)
2910         {
2911           charset = CHARSET_ASCII;
2912           if (c1 < 0x20)
2913             {
2914               if (c1 == '\r')
2915                 {
2916                   if (coding->eol_type == CODING_EOL_CRLF)
2917                     {
2918                       ONE_MORE_BYTE (c2);
2919                       if (c2 == '\n')
2920                         c1 = c2;
2921                       else if (coding->mode
2922                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2923                         {
2924                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2925                           goto label_end_of_loop;
2926                         }
2927                       else
2928                         /* To process C2 again, SRC is subtracted by 1.  */
2929                         src--;
2930                     }
2931                   else if (coding->eol_type == CODING_EOL_CR)
2932                     c1 = '\n';
2933                 }
2934               else if (c1 == '\n'
2935                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2936                        && (coding->eol_type == CODING_EOL_CR
2937                            || coding->eol_type == CODING_EOL_CRLF))
2938                 {
2939                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2940                   goto label_end_of_loop;
2941                 }
2942             }
2943         }
2944       else
2945         {
2946           if (sjis_p)
2947             {
2948               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2949                 goto label_invalid_code;
2950               if (c1 <= 0x9F || c1 >= 0xE0)
2951                 {
2952                   /* SJIS -> JISX0208 */
2953                   ONE_MORE_BYTE (c2);
2954                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2955                     goto label_invalid_code;
2956                   DECODE_SJIS (c1, c2, c1, c2);
2957                   charset = charset_jisx0208;
2958                 }
2959               else
2960                 /* SJIS -> JISX0201-Kana */
2961                 charset = charset_katakana_jisx0201;
2962             }
2963           else
2964             {
2965               /* BIG5 -> Big5 */
2966               if (c1 < 0xA0 || c1 > 0xFE)
2967                 goto label_invalid_code;
2968               ONE_MORE_BYTE (c2);
2969               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2970                 goto label_invalid_code;
2971               DECODE_BIG5 (c1, c2, charset, c1, c2);
2972             }
2973         }
2974
2975       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2976       EMIT_CHAR (c);
2977       continue;
2978
2979     label_invalid_code:
2980       coding->errors++;
2981       src = src_base;
2982       c = *src++;
2983       EMIT_CHAR (c);
2984     }
2985
2986  label_end_of_loop:
2987   coding->consumed = coding->consumed_char = src_base - source;
2988   coding->produced = dst - destination;
2989   return;
2990 }
2991
2992 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2993    This function can encode charsets `ascii', `katakana-jisx0201',
2994    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2995    are sure that all these charsets are registered as official charset
2996    (i.e. do not have extended leading-codes).  Characters of other
2997    charsets are produced without any encoding.  If SJIS_P is 1, encode
2998    SJIS text, else encode BIG5 text.  */
2999
3000 static void
3001 encode_coding_sjis_big5 (coding, source, destination,
3002                          src_bytes, dst_bytes, sjis_p)
3003      struct coding_system *coding;
3004      unsigned char *source, *destination;
3005      int src_bytes, dst_bytes;
3006      int sjis_p;
3007 {
3008   unsigned char *src = source;
3009   unsigned char *src_end = source + src_bytes;
3010   unsigned char *dst = destination;
3011   unsigned char *dst_end = destination + dst_bytes;
3012   /* SRC_BASE remembers the start position in source in each loop.
3013      The loop will be exited when there's not enough source text to
3014      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3015      there's not enough destination area to produce encoded codes
3016      (within macro EMIT_BYTES).  */
3017   unsigned char *src_base;
3018   Lisp_Object translation_table;
3019
3020   if (NILP (Venable_character_translation))
3021     translation_table = Qnil;
3022   else
3023     {
3024       translation_table = coding->translation_table_for_encode;
3025       if (NILP (translation_table))
3026         translation_table = Vstandard_translation_table_for_encode;
3027     }
3028
3029   while (1)
3030     {
3031       int c, charset, c1, c2;
3032
3033       src_base = src;
3034       ONE_MORE_CHAR (c);
3035
3036       /* Now encode the character C.  */
3037       if (SINGLE_BYTE_CHAR_P (c))
3038         {
3039           switch (c)
3040             {
3041             case '\r':
3042               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3043                 {
3044                   EMIT_ONE_BYTE (c);
3045                   break;
3046                 }
3047               c = '\n';
3048             case '\n':
3049               if (coding->eol_type == CODING_EOL_CRLF)
3050                 {
3051                   EMIT_TWO_BYTES ('\r', c);
3052                   break;
3053                 }
3054               else if (coding->eol_type == CODING_EOL_CR)
3055                 c = '\r';
3056             default:
3057               EMIT_ONE_BYTE (c);
3058             }
3059         }
3060       else
3061         {
3062           SPLIT_CHAR (c, charset, c1, c2);
3063           if (sjis_p)
3064             {
3065               if (charset == charset_jisx0208
3066                   || charset == charset_jisx0208_1978)
3067                 {
3068                   ENCODE_SJIS (c1, c2, c1, c2);
3069                   EMIT_TWO_BYTES (c1, c2);
3070                 }
3071               else if (charset == charset_katakana_jisx0201)
3072                 EMIT_ONE_BYTE (c1 | 0x80);
3073               else if (charset == charset_latin_jisx0201)
3074                 EMIT_ONE_BYTE (c1);
3075               else
3076                 /* There's no way other than producing the internal
3077                    codes as is.  */
3078                 EMIT_BYTES (src_base, src);
3079             }
3080           else
3081             {
3082               if (charset == charset_big5_1 || charset == charset_big5_2)
3083                 {
3084                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3085                   EMIT_TWO_BYTES (c1, c2);
3086                 }
3087               else
3088                 /* There's no way other than producing the internal
3089                    codes as is.  */
3090                 EMIT_BYTES (src_base, src);
3091             }
3092         }
3093       coding->consumed_char++;
3094     }
3095
3096  label_end_of_loop:
3097   coding->consumed = src_base - source;
3098   coding->produced = coding->produced_char = dst - destination;
3099 }
3100
3101 \f
3102 /*** 5. CCL handlers ***/
3103
3104 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3105    Check if a text is encoded in a coding system of which
3106    encoder/decoder are written in CCL program.  If it is, return
3107    CODING_CATEGORY_MASK_CCL, else return 0.  */
3108
3109 static int
3110 detect_coding_ccl (src, src_end, multibytep)
3111      unsigned char *src, *src_end;
3112      int multibytep;
3113 {
3114   unsigned char *valid;
3115   int c;
3116   /* Dummy for ONE_MORE_BYTE.  */
3117   struct coding_system dummy_coding;
3118   struct coding_system *coding = &dummy_coding;
3119
3120   /* No coding system is assigned to coding-category-ccl.  */
3121   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3122     return 0;
3123
3124   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3125   while (1)
3126     {
3127       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3128       if (! valid[c])
3129         return 0;
3130     }
3131  label_end_of_loop:
3132   return CODING_CATEGORY_MASK_CCL;
3133 }
3134
3135 \f
3136 /*** 6. End-of-line handlers ***/
3137
3138 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3139
3140 static void
3141 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3142      struct coding_system *coding;
3143      unsigned char *source, *destination;
3144      int src_bytes, dst_bytes;
3145 {
3146   unsigned char *src = source;
3147   unsigned char *dst = destination;
3148   unsigned char *src_end = src + src_bytes;
3149   unsigned char *dst_end = dst + dst_bytes;
3150   Lisp_Object translation_table;
3151   /* SRC_BASE remembers the start position in source in each loop.
3152      The loop will be exited when there's not enough source code
3153      (within macro ONE_MORE_BYTE), or when there's not enough
3154      destination area to produce a character (within macro
3155      EMIT_CHAR).  */
3156   unsigned char *src_base;
3157   int c;
3158
3159   translation_table = Qnil;
3160   switch (coding->eol_type)
3161     {
3162     case CODING_EOL_CRLF:
3163       while (1)
3164         {
3165           src_base = src;
3166           ONE_MORE_BYTE (c);
3167           if (c == '\r')
3168             {
3169               ONE_MORE_BYTE (c);
3170               if (c != '\n')
3171                 {
3172                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3173                     {
3174                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
3175                       goto label_end_of_loop;
3176                     }
3177                   src--;
3178                   c = '\r';
3179                 }
3180             }
3181           else if (c == '\n'
3182                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3183             {
3184               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3185               goto label_end_of_loop;
3186             }
3187           EMIT_CHAR (c);
3188         }
3189       break;
3190
3191     case CODING_EOL_CR:
3192       while (1)
3193         {
3194           src_base = src;
3195           ONE_MORE_BYTE (c);
3196           if (c == '\n')
3197             {
3198               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3199                 {
3200                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3201                   goto label_end_of_loop;
3202                 }
3203             }
3204           else if (c == '\r')
3205             c = '\n';
3206           EMIT_CHAR (c);
3207         }
3208       break;
3209
3210     default:                    /* no need for EOL handling */
3211       while (1)
3212         {
3213           src_base = src;
3214           ONE_MORE_BYTE (c);
3215           EMIT_CHAR (c);
3216         }
3217     }
3218
3219  label_end_of_loop:
3220   coding->consumed = coding->consumed_char = src_base - source;
3221   coding->produced = dst - destination;
3222   return;
3223 }
3224
3225 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3226    format of end-of-line according to `coding->eol_type'.  It also
3227    convert multibyte form 8-bit characters to unibyte if
3228    CODING->src_multibyte is nonzero.  If `coding->mode &
3229    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3230    also means end-of-line.  */
3231
3232 static void
3233 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3234      struct coding_system *coding;
3235      unsigned char *source, *destination;
3236      int src_bytes, dst_bytes;
3237 {
3238   unsigned char *src = source;
3239   unsigned char *dst = destination;
3240   unsigned char *src_end = src + src_bytes;
3241   unsigned char *dst_end = dst + dst_bytes;
3242   Lisp_Object translation_table;
3243   /* SRC_BASE remembers the start position in source in each loop.
3244      The loop will be exited when there's not enough source text to
3245      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3246      there's not enough destination area to produce encoded codes
3247      (within macro EMIT_BYTES).  */
3248   unsigned char *src_base;
3249   int c;
3250   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3251
3252   translation_table = Qnil;
3253   if (coding->src_multibyte
3254       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3255     {
3256       src_end--;
3257       src_bytes--;
3258       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3259     }
3260
3261   if (coding->eol_type == CODING_EOL_CRLF)
3262     {
3263       while (src < src_end)
3264         {
3265           src_base = src;
3266           c = *src++;
3267           if (c >= 0x20)
3268             EMIT_ONE_BYTE (c);
3269           else if (c == '\n' || (c == '\r' && selective_display))
3270             EMIT_TWO_BYTES ('\r', '\n');
3271           else
3272             EMIT_ONE_BYTE (c);
3273         }
3274       src_base = src;
3275     label_end_of_loop:
3276       ;
3277     }
3278   else
3279     {
3280       if (!dst_bytes || src_bytes <= dst_bytes)
3281         {
3282           safe_bcopy (src, dst, src_bytes);
3283           src_base = src_end;
3284           dst += src_bytes;
3285         }
3286       else
3287         {
3288           if (coding->src_multibyte
3289               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3290             dst_bytes--;
3291           safe_bcopy (src, dst, dst_bytes);
3292           src_base = src + dst_bytes;
3293           dst = destination + dst_bytes;
3294           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3295         }
3296       if (coding->eol_type == CODING_EOL_CR)
3297         {
3298           for (src = destination; src < dst; src++)
3299             if (*src == '\n') *src = '\r';
3300         }
3301       else if (selective_display)
3302         {
3303           for (src = destination; src < dst; src++)
3304             if (*src == '\r') *src = '\n';
3305         }
3306     }
3307   if (coding->src_multibyte)
3308     dst = destination + str_as_unibyte (destination, dst - destination);
3309
3310   coding->consumed = src_base - source;
3311   coding->produced = dst - destination;
3312   coding->produced_char = coding->produced;
3313 }
3314
3315 \f
3316 /*** 7. C library functions ***/
3317
3318 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3319    has a property `coding-system'.  The value of this property is a
3320    vector of length 5 (called the coding-vector).  Among elements of
3321    this vector, the first (element[0]) and the fifth (element[4])
3322    carry important information for decoding/encoding.  Before
3323    decoding/encoding, this information should be set in fields of a
3324    structure of type `coding_system'.
3325
3326    The value of the property `coding-system' can be a symbol of another
3327    subsidiary coding-system.  In that case, Emacs gets coding-vector
3328    from that symbol.
3329
3330    `element[0]' contains information to be set in `coding->type'.  The
3331    value and its meaning is as follows:
3332
3333    0 -- coding_type_emacs_mule
3334    1 -- coding_type_sjis
3335    2 -- coding_type_iso2022
3336    3 -- coding_type_big5
3337    4 -- coding_type_ccl encoder/decoder written in CCL
3338    nil -- coding_type_no_conversion
3339    t -- coding_type_undecided (automatic conversion on decoding,
3340                                no-conversion on encoding)
3341
3342    `element[4]' contains information to be set in `coding->flags' and
3343    `coding->spec'.  The meaning varies by `coding->type'.
3344
3345    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3346    of length 32 (of which the first 13 sub-elements are used now).
3347    Meanings of these sub-elements are:
3348
3349    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3350         If the value is an integer of valid charset, the charset is
3351         assumed to be designated to graphic register N initially.
3352
3353         If the value is minus, it is a minus value of charset which
3354         reserves graphic register N, which means that the charset is
3355         not designated initially but should be designated to graphic
3356         register N just before encoding a character in that charset.
3357
3358         If the value is nil, graphic register N is never used on
3359         encoding.
3360
3361    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3362         Each value takes t or nil.  See the section ISO2022 of
3363         `coding.h' for more information.
3364
3365    If `coding->type' is `coding_type_big5', element[4] is t to denote
3366    BIG5-ETen or nil to denote BIG5-HKU.
3367
3368    If `coding->type' takes the other value, element[4] is ignored.
3369
3370    Emacs Lisp's coding systems also carry information about format of
3371    end-of-line in a value of property `eol-type'.  If the value is
3372    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3373    means CODING_EOL_CR.  If it is not integer, it should be a vector
3374    of subsidiary coding systems of which property `eol-type' has one
3375    of the above values.
3376
3377 */
3378
3379 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3380    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3381    is setup so that no conversion is necessary and return -1, else
3382    return 0.  */
3383
3384 int
3385 setup_coding_system (coding_system, coding)
3386      Lisp_Object coding_system;
3387      struct coding_system *coding;
3388 {
3389   Lisp_Object coding_spec, coding_type, eol_type, plist;
3390   Lisp_Object val;
3391
3392   /* At first, zero clear all members.  */
3393   bzero (coding, sizeof (struct coding_system));
3394
3395   /* Initialize some fields required for all kinds of coding systems.  */
3396   coding->symbol = coding_system;
3397   coding->heading_ascii = -1;
3398   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3399   coding->composing = COMPOSITION_DISABLED;
3400   coding->cmp_data = NULL;
3401
3402   if (NILP (coding_system))
3403     goto label_invalid_coding_system;
3404
3405   coding_spec = Fget (coding_system, Qcoding_system);
3406
3407   if (!VECTORP (coding_spec)
3408       || XVECTOR (coding_spec)->size != 5
3409       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3410     goto label_invalid_coding_system;
3411
3412   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3413   if (VECTORP (eol_type))
3414     {
3415       coding->eol_type = CODING_EOL_UNDECIDED;
3416       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3417     }
3418   else if (XFASTINT (eol_type) == 1)
3419     {
3420       coding->eol_type = CODING_EOL_CRLF;
3421       coding->common_flags
3422         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3423     }
3424   else if (XFASTINT (eol_type) == 2)
3425     {
3426       coding->eol_type = CODING_EOL_CR;
3427       coding->common_flags
3428         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3429     }
3430   else
3431     coding->eol_type = CODING_EOL_LF;
3432
3433   coding_type = XVECTOR (coding_spec)->contents[0];
3434   /* Try short cut.  */
3435   if (SYMBOLP (coding_type))
3436     {
3437       if (EQ (coding_type, Qt))
3438         {
3439           coding->type = coding_type_undecided;
3440           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3441         }
3442       else
3443         coding->type = coding_type_no_conversion;
3444       /* Initialize this member.  Any thing other than
3445          CODING_CATEGORY_IDX_UTF_16_BE and
3446          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3447          special treatment in detect_eol.  */
3448       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3449
3450       return 0;
3451     }
3452
3453   /* Get values of coding system properties:
3454      `post-read-conversion', `pre-write-conversion',
3455      `translation-table-for-decode', `translation-table-for-encode'.  */
3456   plist = XVECTOR (coding_spec)->contents[3];
3457   /* Pre & post conversion functions should be disabled if
3458      inhibit_eol_conversion is nonzero.  This is the case that a code
3459      conversion function is called while those functions are running.  */
3460   if (! inhibit_pre_post_conversion)
3461     {
3462       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3463       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3464     }
3465   val = Fplist_get (plist, Qtranslation_table_for_decode);
3466   if (SYMBOLP (val))
3467     val = Fget (val, Qtranslation_table_for_decode);
3468   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3469   val = Fplist_get (plist, Qtranslation_table_for_encode);
3470   if (SYMBOLP (val))
3471     val = Fget (val, Qtranslation_table_for_encode);
3472   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3473   val = Fplist_get (plist, Qcoding_category);
3474   if (!NILP (val))
3475     {
3476       val = Fget (val, Qcoding_category_index);
3477       if (INTEGERP (val))
3478         coding->category_idx = XINT (val);
3479       else
3480         goto label_invalid_coding_system;
3481     }
3482   else
3483     goto label_invalid_coding_system;
3484
3485   /* If the coding system has non-nil `composition' property, enable
3486      composition handling.  */
3487   val = Fplist_get (plist, Qcomposition);
3488   if (!NILP (val))
3489     coding->composing = COMPOSITION_NO;
3490
3491   switch (XFASTINT (coding_type))
3492     {
3493     case 0:
3494       coding->type = coding_type_emacs_mule;
3495       coding->common_flags
3496         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3497       coding->composing = COMPOSITION_NO;
3498       if (!NILP (coding->post_read_conversion))
3499         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3500       if (!NILP (coding->pre_write_conversion))
3501         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3502       break;
3503
3504     case 1:
3505       coding->type = coding_type_sjis;
3506       coding->common_flags
3507         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3508       break;
3509
3510     case 2:
3511       coding->type = coding_type_iso2022;
3512       coding->common_flags
3513         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3514       {
3515         Lisp_Object val, temp;
3516         Lisp_Object *flags;
3517         int i, charset, reg_bits = 0;
3518
3519         val = XVECTOR (coding_spec)->contents[4];
3520
3521         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3522           goto label_invalid_coding_system;
3523
3524         flags = XVECTOR (val)->contents;
3525         coding->flags
3526           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3527              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3528              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3529              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3530              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3531              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3532              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3533              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3534              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3535              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3536              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3537              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3538              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3539              );
3540
3541         /* Invoke graphic register 0 to plane 0.  */
3542         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3543         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3544         CODING_SPEC_ISO_INVOCATION (coding, 1)
3545           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3546         /* Not single shifting at first.  */
3547         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3548         /* Beginning of buffer should also be regarded as bol. */
3549         CODING_SPEC_ISO_BOL (coding) = 1;
3550
3551         for (charset = 0; charset <= MAX_CHARSET; charset++)
3552           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3553         val = Vcharset_revision_alist;
3554         while (CONSP (val))
3555           {
3556             charset = get_charset_id (Fcar_safe (XCAR (val)));
3557             if (charset >= 0
3558                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3559                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3560               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3561             val = XCDR (val);
3562           }
3563
3564         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3565            FLAGS[REG] can be one of below:
3566                 integer CHARSET: CHARSET occupies register I,
3567                 t: designate nothing to REG initially, but can be used
3568                   by any charsets,
3569                 list of integer, nil, or t: designate the first
3570                   element (if integer) to REG initially, the remaining
3571                   elements (if integer) is designated to REG on request,
3572                   if an element is t, REG can be used by any charsets,
3573                 nil: REG is never used.  */
3574         for (charset = 0; charset <= MAX_CHARSET; charset++)
3575           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3576             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3577         for (i = 0; i < 4; i++)
3578           {
3579             if (INTEGERP (flags[i])
3580                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3581                 || (charset = get_charset_id (flags[i])) >= 0)
3582               {
3583                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3584                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3585               }
3586             else if (EQ (flags[i], Qt))
3587               {
3588                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3589                 reg_bits |= 1 << i;
3590                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3591               }
3592             else if (CONSP (flags[i]))
3593               {
3594                 Lisp_Object tail;
3595                 tail = flags[i];
3596
3597                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3598                 if (INTEGERP (XCAR (tail))
3599                     && (charset = XINT (XCAR (tail)),
3600                         CHARSET_VALID_P (charset))
3601                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3602                   {
3603                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3604                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3605                   }
3606                 else
3607                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3608                 tail = XCDR (tail);
3609                 while (CONSP (tail))
3610                   {
3611                     if (INTEGERP (XCAR (tail))
3612                         && (charset = XINT (XCAR (tail)),
3613                             CHARSET_VALID_P (charset))
3614                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3615                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3616                         = i;
3617                     else if (EQ (XCAR (tail), Qt))
3618                       reg_bits |= 1 << i;
3619                     tail = XCDR (tail);
3620                   }
3621               }
3622             else
3623               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3624
3625             CODING_SPEC_ISO_DESIGNATION (coding, i)
3626               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3627           }
3628
3629         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3630           {
3631             /* REG 1 can be used only by locking shift in 7-bit env.  */
3632             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3633               reg_bits &= ~2;
3634             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3635               /* Without any shifting, only REG 0 and 1 can be used.  */
3636               reg_bits &= 3;
3637           }
3638
3639         if (reg_bits)
3640           for (charset = 0; charset <= MAX_CHARSET; charset++)
3641             {
3642               if (CHARSET_DEFINED_P (charset)
3643                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3644                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3645                 {
3646                   /* There exist some default graphic registers to be
3647                      used by CHARSET.  */
3648
3649                   /* We had better avoid designating a charset of
3650                      CHARS96 to REG 0 as far as possible.  */
3651                   if (CHARSET_CHARS (charset) == 96)
3652                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3653                       = (reg_bits & 2
3654                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3655                   else
3656                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3657                       = (reg_bits & 1
3658                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3659                 }
3660             }
3661       }
3662       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3663       coding->spec.iso2022.last_invalid_designation_register = -1;
3664       break;
3665
3666     case 3:
3667       coding->type = coding_type_big5;
3668       coding->common_flags
3669         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3670       coding->flags
3671         = (NILP (XVECTOR (coding_spec)->contents[4])
3672            ? CODING_FLAG_BIG5_HKU
3673            : CODING_FLAG_BIG5_ETEN);
3674       break;
3675
3676     case 4:
3677       coding->type = coding_type_ccl;
3678       coding->common_flags
3679         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3680       {
3681         val = XVECTOR (coding_spec)->contents[4];
3682         if (! CONSP (val)
3683             || setup_ccl_program (&(coding->spec.ccl.decoder),
3684                                   XCAR (val)) < 0
3685             || setup_ccl_program (&(coding->spec.ccl.encoder),
3686                                   XCDR (val)) < 0)
3687           goto label_invalid_coding_system;
3688
3689         bzero (coding->spec.ccl.valid_codes, 256);
3690         val = Fplist_get (plist, Qvalid_codes);
3691         if (CONSP (val))
3692           {
3693             Lisp_Object this;
3694
3695             for (; CONSP (val); val = XCDR (val))
3696               {
3697                 this = XCAR (val);
3698                 if (INTEGERP (this)
3699                     && XINT (this) >= 0 && XINT (this) < 256)
3700                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3701                 else if (CONSP (this)
3702                          && INTEGERP (XCAR (this))
3703                          && INTEGERP (XCDR (this)))
3704                   {
3705                     int start = XINT (XCAR (this));
3706                     int end = XINT (XCDR (this));
3707
3708                     if (start >= 0 && start <= end && end < 256)
3709                       while (start <= end)
3710                         coding->spec.ccl.valid_codes[start++] = 1;
3711                   }
3712               }
3713           }
3714       }
3715       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3716       coding->spec.ccl.cr_carryover = 0;
3717       coding->spec.ccl.eight_bit_carryover[0] = 0;
3718       break;
3719
3720     case 5:
3721       coding->type = coding_type_raw_text;
3722       break;
3723
3724     default:
3725       goto label_invalid_coding_system;
3726     }
3727   return 0;
3728
3729  label_invalid_coding_system:
3730   coding->type = coding_type_no_conversion;
3731   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3732   coding->common_flags = 0;
3733   coding->eol_type = CODING_EOL_LF;
3734   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3735   return -1;
3736 }
3737
3738 /* Free memory blocks allocated for storing composition information.  */
3739
3740 void
3741 coding_free_composition_data (coding)
3742      struct coding_system *coding;
3743 {
3744   struct composition_data *cmp_data = coding->cmp_data, *next;
3745
3746   if (!cmp_data)
3747     return;
3748   /* Memory blocks are chained.  At first, rewind to the first, then,
3749      free blocks one by one.  */
3750   while (cmp_data->prev)
3751     cmp_data = cmp_data->prev;
3752   while (cmp_data)
3753     {
3754       next = cmp_data->next;
3755       xfree (cmp_data);
3756       cmp_data = next;
3757     }
3758   coding->cmp_data = NULL;
3759 }
3760
3761 /* Set `char_offset' member of all memory blocks pointed by
3762    coding->cmp_data to POS.  */
3763
3764 void
3765 coding_adjust_composition_offset (coding, pos)
3766      struct coding_system *coding;
3767      int pos;
3768 {
3769   struct composition_data *cmp_data;
3770
3771   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3772     cmp_data->char_offset = pos;
3773 }
3774
3775 /* Setup raw-text or one of its subsidiaries in the structure
3776    coding_system CODING according to the already setup value eol_type
3777    in CODING.  CODING should be setup for some coding system in
3778    advance.  */
3779
3780 void
3781 setup_raw_text_coding_system (coding)
3782      struct coding_system *coding;
3783 {
3784   if (coding->type != coding_type_raw_text)
3785     {
3786       coding->symbol = Qraw_text;
3787       coding->type = coding_type_raw_text;
3788       if (coding->eol_type != CODING_EOL_UNDECIDED)
3789         {
3790           Lisp_Object subsidiaries;
3791           subsidiaries = Fget (Qraw_text, Qeol_type);
3792
3793           if (VECTORP (subsidiaries)
3794               && XVECTOR (subsidiaries)->size == 3)
3795             coding->symbol
3796               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3797         }
3798       setup_coding_system (coding->symbol, coding);
3799     }
3800   return;
3801 }
3802
3803 /* Emacs has a mechanism to automatically detect a coding system if it
3804    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3805    it's impossible to distinguish some coding systems accurately
3806    because they use the same range of codes.  So, at first, coding
3807    systems are categorized into 7, those are:
3808
3809    o coding-category-emacs-mule
3810
3811         The category for a coding system which has the same code range
3812         as Emacs' internal format.  Assigned the coding-system (Lisp
3813         symbol) `emacs-mule' by default.
3814
3815    o coding-category-sjis
3816
3817         The category for a coding system which has the same code range
3818         as SJIS.  Assigned the coding-system (Lisp
3819         symbol) `japanese-shift-jis' by default.
3820
3821    o coding-category-iso-7
3822
3823         The category for a coding system which has the same code range
3824         as ISO2022 of 7-bit environment.  This doesn't use any locking
3825         shift and single shift functions.  This can encode/decode all
3826         charsets.  Assigned the coding-system (Lisp symbol)
3827         `iso-2022-7bit' by default.
3828
3829    o coding-category-iso-7-tight
3830
3831         Same as coding-category-iso-7 except that this can
3832         encode/decode only the specified charsets.
3833
3834    o coding-category-iso-8-1
3835
3836         The category for a coding system which has the same code range
3837         as ISO2022 of 8-bit environment and graphic plane 1 used only
3838         for DIMENSION1 charset.  This doesn't use any locking shift
3839         and single shift functions.  Assigned the coding-system (Lisp
3840         symbol) `iso-latin-1' by default.
3841
3842    o coding-category-iso-8-2
3843
3844         The category for a coding system which has the same code range
3845         as ISO2022 of 8-bit environment and graphic plane 1 used only
3846         for DIMENSION2 charset.  This doesn't use any locking shift
3847         and single shift functions.  Assigned the coding-system (Lisp
3848         symbol) `japanese-iso-8bit' by default.
3849
3850    o coding-category-iso-7-else
3851
3852         The category for a coding system which has the same code range
3853         as ISO2022 of 7-bit environment but uses locking shift or
3854         single shift functions.  Assigned the coding-system (Lisp
3855         symbol) `iso-2022-7bit-lock' by default.
3856
3857    o coding-category-iso-8-else
3858
3859         The category for a coding system which has the same code range
3860         as ISO2022 of 8-bit environment but uses locking shift or
3861         single shift functions.  Assigned the coding-system (Lisp
3862         symbol) `iso-2022-8bit-ss2' by default.
3863
3864    o coding-category-big5
3865
3866         The category for a coding system which has the same code range
3867         as BIG5.  Assigned the coding-system (Lisp symbol)
3868         `cn-big5' by default.
3869
3870    o coding-category-utf-8
3871
3872         The category for a coding system which has the same code range
3873         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3874         symbol) `utf-8' by default.
3875
3876    o coding-category-utf-16-be
3877
3878         The category for a coding system in which a text has an
3879         Unicode signature (cf. Unicode Standard) in the order of BIG
3880         endian at the head.  Assigned the coding-system (Lisp symbol)
3881         `utf-16-be' by default.
3882
3883    o coding-category-utf-16-le
3884
3885         The category for a coding system in which a text has an
3886         Unicode signature (cf. Unicode Standard) in the order of
3887         LITTLE endian at the head.  Assigned the coding-system (Lisp
3888         symbol) `utf-16-le' by default.
3889
3890    o coding-category-ccl
3891
3892         The category for a coding system of which encoder/decoder is
3893         written in CCL programs.  The default value is nil, i.e., no
3894         coding system is assigned.
3895
3896    o coding-category-binary
3897
3898         The category for a coding system not categorized in any of the
3899         above.  Assigned the coding-system (Lisp symbol)
3900         `no-conversion' by default.
3901
3902    Each of them is a Lisp symbol and the value is an actual
3903    `coding-system' (this is also a Lisp symbol) assigned by a user.
3904    What Emacs does actually is to detect a category of coding system.
3905    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3906    decide a single possible category, it selects a category of the
3907    highest priority.  Priorities of categories are also specified by a
3908    user in a Lisp variable `coding-category-list'.
3909
3910 */
3911
3912 static
3913 int ascii_skip_code[256];
3914
3915 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3916    If it detects possible coding systems, return an integer in which
3917    appropriate flag bits are set.  Flag bits are defined by macros
3918    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3919    it should point the table `coding_priorities'.  In that case, only
3920    the flag bit for a coding system of the highest priority is set in
3921    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3922    range 0x80..0x9F are in multibyte form.
3923
3924    How many ASCII characters are at the head is returned as *SKIP.  */
3925
3926 static int
3927 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3928      unsigned char *source;
3929      int src_bytes, *priorities, *skip;
3930      int multibytep;
3931 {
3932   register unsigned char c;
3933   unsigned char *src = source, *src_end = source + src_bytes;
3934   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3935   int i;
3936
3937   /* At first, skip all ASCII characters and control characters except
3938      for three ISO2022 specific control characters.  */
3939   ascii_skip_code[ISO_CODE_SO] = 0;
3940   ascii_skip_code[ISO_CODE_SI] = 0;
3941   ascii_skip_code[ISO_CODE_ESC] = 0;
3942
3943  label_loop_detect_coding:
3944   while (src < src_end && ascii_skip_code[*src]) src++;
3945   *skip = src - source;
3946
3947   if (src >= src_end)
3948     /* We found nothing other than ASCII.  There's nothing to do.  */
3949     return 0;
3950
3951   c = *src;
3952   /* The text seems to be encoded in some multilingual coding system.
3953      Now, try to find in which coding system the text is encoded.  */
3954   if (c < 0x80)
3955     {
3956       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3957       /* C is an ISO2022 specific control code of C0.  */
3958       mask = detect_coding_iso2022 (src, src_end, multibytep);
3959       if (mask == 0)
3960         {
3961           /* No valid ISO2022 code follows C.  Try again.  */
3962           src++;
3963           if (c == ISO_CODE_ESC)
3964             ascii_skip_code[ISO_CODE_ESC] = 1;
3965           else
3966             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3967           goto label_loop_detect_coding;
3968         }
3969       if (priorities)
3970         {
3971           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3972             {
3973               if (mask & priorities[i])
3974                 return priorities[i];
3975             }
3976           return CODING_CATEGORY_MASK_RAW_TEXT;
3977         }
3978     }
3979   else
3980     {
3981       int try;
3982
3983       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3984         c = src[1] - 0x20;
3985
3986       if (c < 0xA0)
3987         {
3988           /* C is the first byte of SJIS character code,
3989              or a leading-code of Emacs' internal format (emacs-mule),
3990              or the first byte of UTF-16.  */
3991           try = (CODING_CATEGORY_MASK_SJIS
3992                   | CODING_CATEGORY_MASK_EMACS_MULE
3993                   | CODING_CATEGORY_MASK_UTF_16_BE
3994                   | CODING_CATEGORY_MASK_UTF_16_LE);
3995
3996           /* Or, if C is a special latin extra code,
3997              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3998              or is an ISO2022 control-sequence-introducer (CSI),
3999              we should also consider the possibility of ISO2022 codings.  */
4000           if ((VECTORP (Vlatin_extra_code_table)
4001                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4002               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4003               || (c == ISO_CODE_CSI
4004                   && (src < src_end
4005                       && (*src == ']'
4006                           || ((*src == '0' || *src == '1' || *src == '2')
4007                               && src + 1 < src_end
4008                               && src[1] == ']')))))
4009             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4010                      | CODING_CATEGORY_MASK_ISO_8BIT);
4011         }
4012       else
4013         /* C is a character of ISO2022 in graphic plane right,
4014            or a SJIS's 1-byte character code (i.e. JISX0201),
4015            or the first byte of BIG5's 2-byte code,
4016            or the first byte of UTF-8/16.  */
4017         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4018                 | CODING_CATEGORY_MASK_ISO_8BIT
4019                 | CODING_CATEGORY_MASK_SJIS
4020                 | CODING_CATEGORY_MASK_BIG5
4021                 | CODING_CATEGORY_MASK_UTF_8
4022                 | CODING_CATEGORY_MASK_UTF_16_BE
4023                 | CODING_CATEGORY_MASK_UTF_16_LE);
4024
4025       /* Or, we may have to consider the possibility of CCL.  */
4026       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4027           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4028               ->spec.ccl.valid_codes)[c])
4029         try |= CODING_CATEGORY_MASK_CCL;
4030
4031       mask = 0;
4032       utf16_examined_p = iso2022_examined_p = 0;
4033       if (priorities)
4034         {
4035           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4036             {
4037               if (!iso2022_examined_p
4038                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4039                 {
4040                   mask |= detect_coding_iso2022 (src, src_end);
4041                   iso2022_examined_p = 1;
4042                 }
4043               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4044                 mask |= detect_coding_sjis (src, src_end, multibytep);
4045               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4046                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4047               else if (!utf16_examined_p
4048                        && (priorities[i] & try &
4049                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4050                 {
4051                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4052                   utf16_examined_p = 1;
4053                 }
4054               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4055                 mask |= detect_coding_big5 (src, src_end, multibytep);
4056               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4057                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4058               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4059                 mask |= detect_coding_ccl (src, src_end, multibytep);
4060               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4061                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4062               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4063                 mask |= CODING_CATEGORY_MASK_BINARY;
4064               if (mask & priorities[i])
4065                 return priorities[i];
4066             }
4067           return CODING_CATEGORY_MASK_RAW_TEXT;
4068         }
4069       if (try & CODING_CATEGORY_MASK_ISO)
4070         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4071       if (try & CODING_CATEGORY_MASK_SJIS)
4072         mask |= detect_coding_sjis (src, src_end, multibytep);
4073       if (try & CODING_CATEGORY_MASK_BIG5)
4074         mask |= detect_coding_big5 (src, src_end, multibytep);
4075       if (try & CODING_CATEGORY_MASK_UTF_8)
4076         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4077       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4078         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4079       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4080         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4081       if (try & CODING_CATEGORY_MASK_CCL)
4082         mask |= detect_coding_ccl (src, src_end, multibytep);
4083     }
4084   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4085 }
4086
4087 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4088    The information of the detected coding system is set in CODING.  */
4089
4090 void
4091 detect_coding (coding, src, src_bytes)
4092      struct coding_system *coding;
4093      unsigned char *src;
4094      int src_bytes;
4095 {
4096   unsigned int idx;
4097   int skip, mask;
4098   Lisp_Object val;
4099
4100   val = Vcoding_category_list;
4101   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4102                              coding->src_multibyte);
4103   coding->heading_ascii = skip;
4104
4105   if (!mask) return;
4106
4107   /* We found a single coding system of the highest priority in MASK.  */
4108   idx = 0;
4109   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4110   if (! mask)
4111     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4112
4113   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
4114
4115   if (coding->eol_type != CODING_EOL_UNDECIDED)
4116     {
4117       Lisp_Object tmp;
4118
4119       tmp = Fget (val, Qeol_type);
4120       if (VECTORP (tmp))
4121         val = XVECTOR (tmp)->contents[coding->eol_type];
4122     }
4123
4124   /* Setup this new coding system while preserving some slots.  */
4125   {
4126     int src_multibyte = coding->src_multibyte;
4127     int dst_multibyte = coding->dst_multibyte;
4128
4129     setup_coding_system (val, coding);
4130     coding->src_multibyte = src_multibyte;
4131     coding->dst_multibyte = dst_multibyte;
4132     coding->heading_ascii = skip;
4133   }
4134 }
4135
4136 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4137    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4138    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4139
4140    How many non-eol characters are at the head is returned as *SKIP.  */
4141
4142 #define MAX_EOL_CHECK_COUNT 3
4143
4144 static int
4145 detect_eol_type (source, src_bytes, skip)
4146      unsigned char *source;
4147      int src_bytes, *skip;
4148 {
4149   unsigned char *src = source, *src_end = src + src_bytes;
4150   unsigned char c;
4151   int total = 0;                /* How many end-of-lines are found so far.  */
4152   int eol_type = CODING_EOL_UNDECIDED;
4153   int this_eol_type;
4154
4155   *skip = 0;
4156
4157   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4158     {
4159       c = *src++;
4160       if (c == '\n' || c == '\r')
4161         {
4162           if (*skip == 0)
4163             *skip = src - 1 - source;
4164           total++;
4165           if (c == '\n')
4166             this_eol_type = CODING_EOL_LF;
4167           else if (src >= src_end || *src != '\n')
4168             this_eol_type = CODING_EOL_CR;
4169           else
4170             this_eol_type = CODING_EOL_CRLF, src++;
4171
4172           if (eol_type == CODING_EOL_UNDECIDED)
4173             /* This is the first end-of-line.  */
4174             eol_type = this_eol_type;
4175           else if (eol_type != this_eol_type)
4176             {
4177               /* The found type is different from what found before.  */
4178               eol_type = CODING_EOL_INCONSISTENT;
4179               break;
4180             }
4181         }
4182     }
4183
4184   if (*skip == 0)
4185     *skip = src_end - source;
4186   return eol_type;
4187 }
4188
4189 /* Like detect_eol_type, but detect EOL type in 2-octet
4190    big-endian/little-endian format for coding systems utf-16-be and
4191    utf-16-le.  */
4192
4193 static int
4194 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4195      unsigned char *source;
4196      int src_bytes, *skip, big_endian_p;
4197 {
4198   unsigned char *src = source, *src_end = src + src_bytes;
4199   unsigned int c1, c2;
4200   int total = 0;                /* How many end-of-lines are found so far.  */
4201   int eol_type = CODING_EOL_UNDECIDED;
4202   int this_eol_type;
4203   int msb, lsb;
4204
4205   if (big_endian_p)
4206     msb = 0, lsb = 1;
4207   else
4208     msb = 1, lsb = 0;
4209
4210   *skip = 0;
4211
4212   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4213     {
4214       c1 = (src[msb] << 8) | (src[lsb]);
4215       src += 2;
4216
4217       if (c1 == '\n' || c1 == '\r')
4218         {
4219           if (*skip == 0)
4220             *skip = src - 2 - source;
4221           total++;
4222           if (c1 == '\n')
4223             {
4224               this_eol_type = CODING_EOL_LF;
4225             }
4226           else
4227             {
4228               if ((src + 1) >= src_end)
4229                 {
4230                   this_eol_type = CODING_EOL_CR;
4231                 }
4232               else
4233                 {
4234                   c2 = (src[msb] << 8) | (src[lsb]);
4235                   if (c2 == '\n')
4236                     this_eol_type = CODING_EOL_CRLF, src += 2;
4237                   else
4238                     this_eol_type = CODING_EOL_CR;
4239                 }
4240             }
4241
4242           if (eol_type == CODING_EOL_UNDECIDED)
4243             /* This is the first end-of-line.  */
4244             eol_type = this_eol_type;
4245           else if (eol_type != this_eol_type)
4246             {
4247               /* The found type is different from what found before.  */
4248               eol_type = CODING_EOL_INCONSISTENT;
4249               break;
4250             }
4251         }
4252     }
4253
4254   if (*skip == 0)
4255     *skip = src_end - source;
4256   return eol_type;
4257 }
4258
4259 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4260    is encoded.  If it detects an appropriate format of end-of-line, it
4261    sets the information in *CODING.  */
4262
4263 void
4264 detect_eol (coding, src, src_bytes)
4265      struct coding_system *coding;
4266      unsigned char *src;
4267      int src_bytes;
4268 {
4269   Lisp_Object val;
4270   int skip;
4271   int eol_type;
4272
4273   switch (coding->category_idx)
4274     {
4275     case CODING_CATEGORY_IDX_UTF_16_BE:
4276       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4277       break;
4278     case CODING_CATEGORY_IDX_UTF_16_LE:
4279       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4280       break;
4281     default:
4282       eol_type = detect_eol_type (src, src_bytes, &skip);
4283       break;
4284     }
4285
4286   if (coding->heading_ascii > skip)
4287     coding->heading_ascii = skip;
4288   else
4289     skip = coding->heading_ascii;
4290
4291   if (eol_type == CODING_EOL_UNDECIDED)
4292     return;
4293   if (eol_type == CODING_EOL_INCONSISTENT)
4294     {
4295 #if 0
4296       /* This code is suppressed until we find a better way to
4297          distinguish raw text file and binary file.  */
4298
4299       /* If we have already detected that the coding is raw-text, the
4300          coding should actually be no-conversion.  */
4301       if (coding->type == coding_type_raw_text)
4302         {
4303           setup_coding_system (Qno_conversion, coding);
4304           return;
4305         }
4306       /* Else, let's decode only text code anyway.  */
4307 #endif /* 0 */
4308       eol_type = CODING_EOL_LF;
4309     }
4310
4311   val = Fget (coding->symbol, Qeol_type);
4312   if (VECTORP (val) && XVECTOR (val)->size == 3)
4313     {
4314       int src_multibyte = coding->src_multibyte;
4315       int dst_multibyte = coding->dst_multibyte;
4316
4317       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4318       coding->src_multibyte = src_multibyte;
4319       coding->dst_multibyte = dst_multibyte;
4320       coding->heading_ascii = skip;
4321     }
4322 }
4323
4324 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4325
4326 #define DECODING_BUFFER_MAG(coding)                     \
4327   (coding->type == coding_type_iso2022                  \
4328    ? 3                                                  \
4329    : (coding->type == coding_type_ccl                   \
4330       ? coding->spec.ccl.decoder.buf_magnification      \
4331       : 2))
4332
4333 /* Return maximum size (bytes) of a buffer enough for decoding
4334    SRC_BYTES of text encoded in CODING.  */
4335
4336 int
4337 decoding_buffer_size (coding, src_bytes)
4338      struct coding_system *coding;
4339      int src_bytes;
4340 {
4341   return (src_bytes * DECODING_BUFFER_MAG (coding)
4342           + CONVERSION_BUFFER_EXTRA_ROOM);
4343 }
4344
4345 /* Return maximum size (bytes) of a buffer enough for encoding
4346    SRC_BYTES of text to CODING.  */
4347
4348 int
4349 encoding_buffer_size (coding, src_bytes)
4350      struct coding_system *coding;
4351      int src_bytes;
4352 {
4353   int magnification;
4354
4355   if (coding->type == coding_type_ccl)
4356     magnification = coding->spec.ccl.encoder.buf_magnification;
4357   else if (CODING_REQUIRE_ENCODING (coding))
4358     magnification = 3;
4359   else
4360     magnification = 1;
4361
4362   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4363 }
4364
4365 /* Working buffer for code conversion.  */
4366 struct conversion_buffer
4367 {
4368   int size;                     /* size of data.  */
4369   int on_stack;                 /* 1 if allocated by alloca.  */
4370   unsigned char *data;
4371 };
4372
4373 /* Don't use alloca for allocating memory space larger than this, lest
4374    we overflow their stack.  */
4375 #define MAX_ALLOCA 16*1024
4376
4377 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4378 #define allocate_conversion_buffer(buf, len)            \
4379   do {                                                  \
4380     if (len < MAX_ALLOCA)                               \
4381       {                                                 \
4382         buf.data = (unsigned char *) alloca (len);      \
4383         buf.on_stack = 1;                               \
4384       }                                                 \
4385     else                                                \
4386       {                                                 \
4387         buf.data = (unsigned char *) xmalloc (len);     \
4388         buf.on_stack = 0;                               \
4389       }                                                 \
4390     buf.size = len;                                     \
4391   } while (0)
4392
4393 /* Double the allocated memory for *BUF.  */
4394 static void
4395 extend_conversion_buffer (buf)
4396      struct conversion_buffer *buf;
4397 {
4398   if (buf->on_stack)
4399     {
4400       unsigned char *save = buf->data;
4401       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4402       bcopy (save, buf->data, buf->size);
4403       buf->on_stack = 0;
4404     }
4405   else
4406     {
4407       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4408     }
4409   buf->size *= 2;
4410 }
4411
4412 /* Free the allocated memory for BUF if it is not on stack.  */
4413 static void
4414 free_conversion_buffer (buf)
4415      struct conversion_buffer *buf;
4416 {
4417   if (!buf->on_stack)
4418     xfree (buf->data);
4419 }
4420
4421 int
4422 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4423      struct coding_system *coding;
4424      unsigned char *source, *destination;
4425      int src_bytes, dst_bytes, encodep;
4426 {
4427   struct ccl_program *ccl
4428     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4429   unsigned char *dst = destination;
4430
4431   ccl->suppress_error = coding->suppress_error;
4432   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4433   if (encodep)
4434     {
4435       /* On encoding, EOL format is converted within ccl_driver.  For
4436          that, setup proper information in the structure CCL.  */
4437       ccl->eol_type = coding->eol_type;
4438       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4439         ccl->eol_type = CODING_EOL_LF;
4440       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4441     }
4442   ccl->multibyte = coding->src_multibyte;
4443   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4444     {
4445       /* Move carryover bytes to DESTINATION.  */
4446       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4447       while (*p)
4448         *dst++ = *p++;
4449       coding->spec.ccl.eight_bit_carryover[0] = 0;
4450       if (dst_bytes)
4451         dst_bytes -= dst - destination;
4452     }
4453
4454   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4455                                   &(coding->consumed))
4456                       + dst - destination);
4457
4458   if (encodep)
4459     {
4460       coding->produced_char = coding->produced;
4461       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4462     }
4463   else if (!ccl->eight_bit_control)
4464     {
4465       /* The produced bytes forms a valid multibyte sequence. */
4466       coding->produced_char
4467         = multibyte_chars_in_text (destination, coding->produced);
4468       coding->spec.ccl.eight_bit_carryover[0] = 0;
4469     }
4470   else
4471     {
4472       /* On decoding, the destination should always multibyte.  But,
4473          CCL program might have been generated an invalid multibyte
4474          sequence.  Here we make such a sequence valid as
4475          multibyte.  */
4476       int bytes
4477         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4478
4479       if ((coding->consumed < src_bytes
4480            || !ccl->last_block)
4481           && coding->produced >= 1
4482           && destination[coding->produced - 1] >= 0x80)
4483         {
4484           /* We should not convert the tailing 8-bit codes to
4485              multibyte form even if they doesn't form a valid
4486              multibyte sequence.  They may form a valid sequence in
4487              the next call.  */
4488           int carryover = 0;
4489
4490           if (destination[coding->produced - 1] < 0xA0)
4491             carryover = 1;
4492           else if (coding->produced >= 2)
4493             {
4494               if (destination[coding->produced - 2] >= 0x80)
4495                 {
4496                   if (destination[coding->produced - 2] < 0xA0)
4497                     carryover = 2;
4498                   else if (coding->produced >= 3
4499                            && destination[coding->produced - 3] >= 0x80
4500                            && destination[coding->produced - 3] < 0xA0)
4501                     carryover = 3;
4502                 }
4503             }
4504           if (carryover > 0)
4505             {
4506               BCOPY_SHORT (destination + coding->produced - carryover,
4507                            coding->spec.ccl.eight_bit_carryover,
4508                            carryover);
4509               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4510               coding->produced -= carryover;
4511             }
4512         }
4513       coding->produced = str_as_multibyte (destination, bytes,
4514                                            coding->produced,
4515                                            &(coding->produced_char));
4516     }
4517
4518   switch (ccl->status)
4519     {
4520     case CCL_STAT_SUSPEND_BY_SRC:
4521       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4522       break;
4523     case CCL_STAT_SUSPEND_BY_DST:
4524       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4525       break;
4526     case CCL_STAT_QUIT:
4527     case CCL_STAT_INVALID_CMD:
4528       coding->result = CODING_FINISH_INTERRUPT;
4529       break;
4530     default:
4531       coding->result = CODING_FINISH_NORMAL;
4532       break;
4533     }
4534   return coding->result;
4535 }
4536
4537 /* Decode EOL format of the text at PTR of BYTES length destructively
4538    according to CODING->eol_type.  This is called after the CCL
4539    program produced a decoded text at PTR.  If we do CRLF->LF
4540    conversion, update CODING->produced and CODING->produced_char.  */
4541
4542 static void
4543 decode_eol_post_ccl (coding, ptr, bytes)
4544      struct coding_system *coding;
4545      unsigned char *ptr;
4546      int bytes;
4547 {
4548   Lisp_Object val, saved_coding_symbol;
4549   unsigned char *pend = ptr + bytes;
4550   int dummy;
4551
4552   /* Remember the current coding system symbol.  We set it back when
4553      an inconsistent EOL is found so that `last-coding-system-used' is
4554      set to the coding system that doesn't specify EOL conversion.  */
4555   saved_coding_symbol = coding->symbol;
4556
4557   coding->spec.ccl.cr_carryover = 0;
4558   if (coding->eol_type == CODING_EOL_UNDECIDED)
4559     {
4560       /* Here, to avoid the call of setup_coding_system, we directly
4561          call detect_eol_type.  */
4562       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4563       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4564         coding->eol_type = CODING_EOL_LF;
4565       if (coding->eol_type != CODING_EOL_UNDECIDED)
4566         {
4567           val = Fget (coding->symbol, Qeol_type);
4568           if (VECTORP (val) && XVECTOR (val)->size == 3)
4569             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4570         }
4571       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4572     }
4573
4574   if (coding->eol_type == CODING_EOL_LF
4575       || coding->eol_type == CODING_EOL_UNDECIDED)
4576     {
4577       /* We have nothing to do.  */
4578       ptr = pend;
4579     }
4580   else if (coding->eol_type == CODING_EOL_CRLF)
4581     {
4582       unsigned char *pstart = ptr, *p = ptr;
4583
4584       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4585           && *(pend - 1) == '\r')
4586         {
4587           /* If the last character is CR, we can't handle it here
4588              because LF will be in the not-yet-decoded source text.
4589              Recorded that the CR is not yet processed.  */
4590           coding->spec.ccl.cr_carryover = 1;
4591           coding->produced--;
4592           coding->produced_char--;
4593           pend--;
4594         }
4595       while (ptr < pend)
4596         {
4597           if (*ptr == '\r')
4598             {
4599               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4600                 {
4601                   *p++ = '\n';
4602                   ptr += 2;
4603                 }
4604               else
4605                 {
4606                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4607                     goto undo_eol_conversion;
4608                   *p++ = *ptr++;
4609                 }
4610             }
4611           else if (*ptr == '\n'
4612                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4613             goto undo_eol_conversion;
4614           else
4615             *p++ = *ptr++;
4616           continue;
4617
4618         undo_eol_conversion:
4619           /* We have faced with inconsistent EOL format at PTR.
4620              Convert all LFs before PTR back to CRLFs.  */
4621           for (p--, ptr--; p >= pstart; p--)
4622             {
4623               if (*p == '\n')
4624                 *ptr-- = '\n', *ptr-- = '\r';
4625               else
4626                 *ptr-- = *p;
4627             }
4628           /*  If carryover is recorded, cancel it because we don't
4629               convert CRLF anymore.  */
4630           if (coding->spec.ccl.cr_carryover)
4631             {
4632               coding->spec.ccl.cr_carryover = 0;
4633               coding->produced++;
4634               coding->produced_char++;
4635               pend++;
4636             }
4637           p = ptr = pend;
4638           coding->eol_type = CODING_EOL_LF;
4639           coding->symbol = saved_coding_symbol;
4640         }
4641       if (p < pend)
4642         {
4643           /* As each two-byte sequence CRLF was converted to LF, (PEND
4644              - P) is the number of deleted characters.  */
4645           coding->produced -= pend - p;
4646           coding->produced_char -= pend - p;
4647         }
4648     }
4649   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4650     {
4651       unsigned char *p = ptr;
4652
4653       for (; ptr < pend; ptr++)
4654         {
4655           if (*ptr == '\r')
4656             *ptr = '\n';
4657           else if (*ptr == '\n'
4658                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4659             {
4660               for (; p < ptr; p++)
4661                 {
4662                   if (*p == '\n')
4663                     *p = '\r';
4664                 }
4665               ptr = pend;
4666               coding->eol_type = CODING_EOL_LF;
4667               coding->symbol = saved_coding_symbol;
4668             }
4669         }
4670     }
4671 }
4672
4673 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4674    decoding, it may detect coding system and format of end-of-line if
4675    those are not yet decided.  The source should be unibyte, the
4676    result is multibyte if CODING->dst_multibyte is nonzero, else
4677    unibyte.  */
4678
4679 int
4680 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4681      struct coding_system *coding;
4682      unsigned char *source, *destination;
4683      int src_bytes, dst_bytes;
4684 {
4685   if (coding->type == coding_type_undecided)
4686     detect_coding (coding, source, src_bytes);
4687
4688   if (coding->eol_type == CODING_EOL_UNDECIDED
4689       && coding->type != coding_type_ccl)
4690     {
4691       detect_eol (coding, source, src_bytes);
4692       /* We had better recover the original eol format if we
4693          encounter an inconsistent eol format while decoding.  */
4694       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4695     }
4696
4697   coding->produced = coding->produced_char = 0;
4698   coding->consumed = coding->consumed_char = 0;
4699   coding->errors = 0;
4700   coding->result = CODING_FINISH_NORMAL;
4701
4702   switch (coding->type)
4703     {
4704     case coding_type_sjis:
4705       decode_coding_sjis_big5 (coding, source, destination,
4706                                src_bytes, dst_bytes, 1);
4707       break;
4708
4709     case coding_type_iso2022:
4710       decode_coding_iso2022 (coding, source, destination,
4711                              src_bytes, dst_bytes);
4712       break;
4713
4714     case coding_type_big5:
4715       decode_coding_sjis_big5 (coding, source, destination,
4716                                src_bytes, dst_bytes, 0);
4717       break;
4718
4719     case coding_type_emacs_mule:
4720       decode_coding_emacs_mule (coding, source, destination,
4721                                 src_bytes, dst_bytes);
4722       break;
4723
4724     case coding_type_ccl:
4725       if (coding->spec.ccl.cr_carryover)
4726         {
4727           /* Set the CR which is not processed by the previous call of
4728              decode_eol_post_ccl in DESTINATION.  */
4729           *destination = '\r';
4730           coding->produced++;
4731           coding->produced_char++;
4732           dst_bytes--;
4733         }
4734       ccl_coding_driver (coding, source,
4735                          destination + coding->spec.ccl.cr_carryover,
4736                          src_bytes, dst_bytes, 0);
4737       if (coding->eol_type != CODING_EOL_LF)
4738         decode_eol_post_ccl (coding, destination, coding->produced);
4739       break;
4740
4741     default:
4742       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4743     }
4744
4745   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4746       && coding->mode & CODING_MODE_LAST_BLOCK
4747       && coding->consumed == src_bytes)
4748     coding->result = CODING_FINISH_NORMAL;
4749
4750   if (coding->mode & CODING_MODE_LAST_BLOCK
4751       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4752     {
4753       unsigned char *src = source + coding->consumed;
4754       unsigned char *dst = destination + coding->produced;
4755
4756       src_bytes -= coding->consumed;
4757       coding->errors++;
4758       if (COMPOSING_P (coding))
4759         DECODE_COMPOSITION_END ('1');
4760       while (src_bytes--)
4761         {
4762           int c = *src++;
4763           dst += CHAR_STRING (c, dst);
4764           coding->produced_char++;
4765         }
4766       coding->consumed = coding->consumed_char = src - source;
4767       coding->produced = dst - destination;
4768       coding->result = CODING_FINISH_NORMAL;
4769     }
4770
4771   if (!coding->dst_multibyte)
4772     {
4773       coding->produced = str_as_unibyte (destination, coding->produced);
4774       coding->produced_char = coding->produced;
4775     }
4776
4777   return coding->result;
4778 }
4779
4780 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4781    multibyteness of the source is CODING->src_multibyte, the
4782    multibyteness of the result is always unibyte.  */
4783
4784 int
4785 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4786      struct coding_system *coding;
4787      unsigned char *source, *destination;
4788      int src_bytes, dst_bytes;
4789 {
4790   coding->produced = coding->produced_char = 0;
4791   coding->consumed = coding->consumed_char = 0;
4792   coding->errors = 0;
4793   coding->result = CODING_FINISH_NORMAL;
4794
4795   switch (coding->type)
4796     {
4797     case coding_type_sjis:
4798       encode_coding_sjis_big5 (coding, source, destination,
4799                                src_bytes, dst_bytes, 1);
4800       break;
4801
4802     case coding_type_iso2022:
4803       encode_coding_iso2022 (coding, source, destination,
4804                              src_bytes, dst_bytes);
4805       break;
4806
4807     case coding_type_big5:
4808       encode_coding_sjis_big5 (coding, source, destination,
4809                                src_bytes, dst_bytes, 0);
4810       break;
4811
4812     case coding_type_emacs_mule:
4813       encode_coding_emacs_mule (coding, source, destination,
4814                                 src_bytes, dst_bytes);
4815       break;
4816
4817     case coding_type_ccl:
4818       ccl_coding_driver (coding, source, destination,
4819                          src_bytes, dst_bytes, 1);
4820       break;
4821
4822     default:
4823       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4824     }
4825
4826   if (coding->mode & CODING_MODE_LAST_BLOCK
4827       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4828     {
4829       unsigned char *src = source + coding->consumed;
4830       unsigned char *src_end = src + src_bytes;
4831       unsigned char *dst = destination + coding->produced;
4832
4833       if (coding->type == coding_type_iso2022)
4834         ENCODE_RESET_PLANE_AND_REGISTER;
4835       if (COMPOSING_P (coding))
4836         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4837       if (coding->consumed < src_bytes)
4838         {
4839           int len = src_bytes - coding->consumed;
4840
4841           BCOPY_SHORT (source + coding->consumed, dst, len);
4842           if (coding->src_multibyte)
4843             len = str_as_unibyte (dst, len);
4844           dst += len;
4845           coding->consumed = src_bytes;
4846         }
4847       coding->produced = coding->produced_char = dst - destination;
4848       coding->result = CODING_FINISH_NORMAL;
4849     }
4850
4851   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4852       && coding->consumed == src_bytes)
4853     coding->result = CODING_FINISH_NORMAL;
4854
4855   return coding->result;
4856 }
4857
4858 /* Scan text in the region between *BEG and *END (byte positions),
4859    skip characters which we don't have to decode by coding system
4860    CODING at the head and tail, then set *BEG and *END to the region
4861    of the text we actually have to convert.  The caller should move
4862    the gap out of the region in advance if the region is from a
4863    buffer.
4864
4865    If STR is not NULL, *BEG and *END are indices into STR.  */
4866
4867 static void
4868 shrink_decoding_region (beg, end, coding, str)
4869      int *beg, *end;
4870      struct coding_system *coding;
4871      unsigned char *str;
4872 {
4873   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4874   int eol_conversion;
4875   Lisp_Object translation_table;
4876
4877   if (coding->type == coding_type_ccl
4878       || coding->type == coding_type_undecided
4879       || coding->eol_type != CODING_EOL_LF
4880       || !NILP (coding->post_read_conversion)
4881       || coding->composing != COMPOSITION_DISABLED)
4882     {
4883       /* We can't skip any data.  */
4884       return;
4885     }
4886   if (coding->type == coding_type_no_conversion
4887       || coding->type == coding_type_raw_text
4888       || coding->type == coding_type_emacs_mule)
4889     {
4890       /* We need no conversion, but don't have to skip any data here.
4891          Decoding routine handles them effectively anyway.  */
4892       return;
4893     }
4894
4895   translation_table = coding->translation_table_for_decode;
4896   if (NILP (translation_table) && !NILP (Venable_character_translation))
4897     translation_table = Vstandard_translation_table_for_decode;
4898   if (CHAR_TABLE_P (translation_table))
4899     {
4900       int i;
4901       for (i = 0; i < 128; i++)
4902         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4903           break;
4904       if (i < 128)
4905         /* Some ASCII character should be translated.  We give up
4906            shrinking.  */
4907         return;
4908     }
4909
4910   if (coding->heading_ascii >= 0)
4911     /* Detection routine has already found how much we can skip at the
4912        head.  */
4913     *beg += coding->heading_ascii;
4914
4915   if (str)
4916     {
4917       begp_orig = begp = str + *beg;
4918       endp_orig = endp = str + *end;
4919     }
4920   else
4921     {
4922       begp_orig = begp = BYTE_POS_ADDR (*beg);
4923       endp_orig = endp = begp + *end - *beg;
4924     }
4925
4926   eol_conversion = (coding->eol_type == CODING_EOL_CR
4927                     || coding->eol_type == CODING_EOL_CRLF);
4928
4929   switch (coding->type)
4930     {
4931     case coding_type_sjis:
4932     case coding_type_big5:
4933       /* We can skip all ASCII characters at the head.  */
4934       if (coding->heading_ascii < 0)
4935         {
4936           if (eol_conversion)
4937             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4938           else
4939             while (begp < endp && *begp < 0x80) begp++;
4940         }
4941       /* We can skip all ASCII characters at the tail except for the
4942          second byte of SJIS or BIG5 code.  */
4943       if (eol_conversion)
4944         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4945       else
4946         while (begp < endp && endp[-1] < 0x80) endp--;
4947       /* Do not consider LF as ascii if preceded by CR, since that
4948          confuses eol decoding. */
4949       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4950         endp++;
4951       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4952         endp++;
4953       break;
4954
4955     case coding_type_iso2022:
4956       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4957         /* We can't skip any data.  */
4958         break;
4959       if (coding->heading_ascii < 0)
4960         {
4961           /* We can skip all ASCII characters at the head except for a
4962              few control codes.  */
4963           while (begp < endp && (c = *begp) < 0x80
4964                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4965                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4966                  && (!eol_conversion || c != ISO_CODE_LF))
4967             begp++;
4968         }
4969       switch (coding->category_idx)
4970         {
4971         case CODING_CATEGORY_IDX_ISO_8_1:
4972         case CODING_CATEGORY_IDX_ISO_8_2:
4973           /* We can skip all ASCII characters at the tail.  */
4974           if (eol_conversion)
4975             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4976           else
4977             while (begp < endp && endp[-1] < 0x80) endp--;
4978           /* Do not consider LF as ascii if preceded by CR, since that
4979              confuses eol decoding. */
4980           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4981             endp++;
4982           break;
4983
4984         case CODING_CATEGORY_IDX_ISO_7:
4985         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4986           {
4987             /* We can skip all characters at the tail except for 8-bit
4988                codes and ESC and the following 2-byte at the tail.  */
4989             unsigned char *eight_bit = NULL;
4990
4991             if (eol_conversion)
4992               while (begp < endp
4993                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4994                 {
4995                   if (!eight_bit && c & 0x80) eight_bit = endp;
4996                   endp--;
4997                 }
4998             else
4999               while (begp < endp
5000                      && (c = endp[-1]) != ISO_CODE_ESC)
5001                 {
5002                   if (!eight_bit && c & 0x80) eight_bit = endp;
5003                   endp--;
5004                 }
5005             /* Do not consider LF as ascii if preceded by CR, since that
5006                confuses eol decoding. */
5007             if (begp < endp && endp < endp_orig
5008                 && endp[-1] == '\r' && endp[0] == '\n')
5009               endp++;
5010             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5011               {
5012                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5013                   /* This is an ASCII designation sequence.  We can
5014                      surely skip the tail.  But, if we have
5015                      encountered an 8-bit code, skip only the codes
5016                      after that.  */
5017                   endp = eight_bit ? eight_bit : endp + 2;
5018                 else
5019                   /* Hmmm, we can't skip the tail.  */
5020                   endp = endp_orig;
5021               }
5022             else if (eight_bit)
5023               endp = eight_bit;
5024           }
5025         }
5026       break;
5027
5028     default:
5029       abort ();
5030     }
5031   *beg += begp - begp_orig;
5032   *end += endp - endp_orig;
5033   return;
5034 }
5035
5036 /* Like shrink_decoding_region but for encoding.  */
5037
5038 static void
5039 shrink_encoding_region (beg, end, coding, str)
5040      int *beg, *end;
5041      struct coding_system *coding;
5042      unsigned char *str;
5043 {
5044   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5045   int eol_conversion;
5046   Lisp_Object translation_table;
5047
5048   if (coding->type == coding_type_ccl
5049       || coding->eol_type == CODING_EOL_CRLF
5050       || coding->eol_type == CODING_EOL_CR
5051       || coding->cmp_data && coding->cmp_data->used > 0)
5052     {
5053       /* We can't skip any data.  */
5054       return;
5055     }
5056   if (coding->type == coding_type_no_conversion
5057       || coding->type == coding_type_raw_text
5058       || coding->type == coding_type_emacs_mule
5059       || coding->type == coding_type_undecided)
5060     {
5061       /* We need no conversion, but don't have to skip any data here.
5062          Encoding routine handles them effectively anyway.  */
5063       return;
5064     }
5065
5066   translation_table = coding->translation_table_for_encode;
5067   if (NILP (translation_table) && !NILP (Venable_character_translation))
5068     translation_table = Vstandard_translation_table_for_encode;
5069   if (CHAR_TABLE_P (translation_table))
5070     {
5071       int i;
5072       for (i = 0; i < 128; i++)
5073         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5074           break;
5075       if (i < 128)
5076         /* Some ASCII character should be translated.  We give up
5077            shrinking.  */
5078         return;
5079     }
5080
5081   if (str)
5082     {
5083       begp_orig = begp = str + *beg;
5084       endp_orig = endp = str + *end;
5085     }
5086   else
5087     {
5088       begp_orig = begp = BYTE_POS_ADDR (*beg);
5089       endp_orig = endp = begp + *end - *beg;
5090     }
5091
5092   eol_conversion = (coding->eol_type == CODING_EOL_CR
5093                     || coding->eol_type == CODING_EOL_CRLF);
5094
5095   /* Here, we don't have to check coding->pre_write_conversion because
5096      the caller is expected to have handled it already.  */
5097   switch (coding->type)
5098     {
5099     case coding_type_iso2022:
5100       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5101         /* We can't skip any data.  */
5102         break;
5103       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5104         {
5105           unsigned char *bol = begp;
5106           while (begp < endp && *begp < 0x80)
5107             {
5108               begp++;
5109               if (begp[-1] == '\n')
5110                 bol = begp;
5111             }
5112           begp = bol;
5113           goto label_skip_tail;
5114         }
5115       /* fall down ... */
5116
5117     case coding_type_sjis:
5118     case coding_type_big5:
5119       /* We can skip all ASCII characters at the head and tail.  */
5120       if (eol_conversion)
5121         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5122       else
5123         while (begp < endp && *begp < 0x80) begp++;
5124     label_skip_tail:
5125       if (eol_conversion)
5126         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5127       else
5128         while (begp < endp && *(endp - 1) < 0x80) endp--;
5129       break;
5130
5131     default:
5132       abort ();
5133     }
5134
5135   *beg += begp - begp_orig;
5136   *end += endp - endp_orig;
5137   return;
5138 }
5139
5140 /* As shrinking conversion region requires some overhead, we don't try
5141    shrinking if the length of conversion region is less than this
5142    value.  */
5143 static int shrink_conversion_region_threshhold = 1024;
5144
5145 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5146   do {                                                                  \
5147     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5148       {                                                                 \
5149         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5150         else shrink_decoding_region (beg, end, coding, str);            \
5151       }                                                                 \
5152   } while (0)
5153
5154 static Lisp_Object
5155 code_convert_region_unwind (dummy)
5156      Lisp_Object dummy;
5157 {
5158   inhibit_pre_post_conversion = 0;
5159   return Qnil;
5160 }
5161
5162 /* Store information about all compositions in the range FROM and TO
5163    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5164    buffer or a string, defaults to the current buffer.  */
5165
5166 void
5167 coding_save_composition (coding, from, to, obj)
5168      struct coding_system *coding;
5169      int from, to;
5170      Lisp_Object obj;
5171 {
5172   Lisp_Object prop;
5173   int start, end;
5174
5175   if (coding->composing == COMPOSITION_DISABLED)
5176     return;
5177   if (!coding->cmp_data)
5178     coding_allocate_composition_data (coding, from);
5179   if (!find_composition (from, to, &start, &end, &prop, obj)
5180       || end > to)
5181     return;
5182   if (start < from
5183       && (!find_composition (end, to, &start, &end, &prop, obj)
5184           || end > to))
5185     return;
5186   coding->composing = COMPOSITION_NO;
5187   do
5188     {
5189       if (COMPOSITION_VALID_P (start, end, prop))
5190         {
5191           enum composition_method method = COMPOSITION_METHOD (prop);
5192           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5193               >= COMPOSITION_DATA_SIZE)
5194             coding_allocate_composition_data (coding, from);
5195           /* For relative composition, we remember start and end
5196              positions, for the other compositions, we also remember
5197              components.  */
5198           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5199           if (method != COMPOSITION_RELATIVE)
5200             {
5201               /* We must store a*/
5202               Lisp_Object val, ch;
5203
5204               val = COMPOSITION_COMPONENTS (prop);
5205               if (CONSP (val))
5206                 while (CONSP (val))
5207                   {
5208                     ch = XCAR (val), val = XCDR (val);
5209                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5210                   }
5211               else if (VECTORP (val) || STRINGP (val))
5212                 {
5213                   int len = (VECTORP (val)
5214                              ? XVECTOR (val)->size : XSTRING (val)->size);
5215                   int i;
5216                   for (i = 0; i < len; i++)
5217                     {
5218                       ch = (STRINGP (val)
5219                             ? Faref (val, make_number (i))
5220                             : XVECTOR (val)->contents[i]);
5221                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5222                     }
5223                 }
5224               else              /* INTEGERP (val) */
5225                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5226             }
5227           CODING_ADD_COMPOSITION_END (coding, end - from);
5228         }
5229       start = end;
5230     }
5231   while (start < to
5232          && find_composition (start, to, &start, &end, &prop, obj)
5233          && end <= to);
5234
5235   /* Make coding->cmp_data point to the first memory block.  */
5236   while (coding->cmp_data->prev)
5237     coding->cmp_data = coding->cmp_data->prev;
5238   coding->cmp_data_start = 0;
5239 }
5240
5241 /* Reflect the saved information about compositions to OBJ.
5242    CODING->cmp_data points to a memory block for the information.  OBJ
5243    is a buffer or a string, defaults to the current buffer.  */
5244
5245 void
5246 coding_restore_composition (coding, obj)
5247      struct coding_system *coding;
5248      Lisp_Object obj;
5249 {
5250   struct composition_data *cmp_data = coding->cmp_data;
5251
5252   if (!cmp_data)
5253     return;
5254
5255   while (cmp_data->prev)
5256     cmp_data = cmp_data->prev;
5257
5258   while (cmp_data)
5259     {
5260       int i;
5261
5262       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5263            i += cmp_data->data[i])
5264         {
5265           int *data = cmp_data->data + i;
5266           enum composition_method method = (enum composition_method) data[3];
5267           Lisp_Object components;
5268
5269           if (method == COMPOSITION_RELATIVE)
5270             components = Qnil;
5271           else
5272             {
5273               int len = data[0] - 4, j;
5274               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5275
5276               for (j = 0; j < len; j++)
5277                 args[j] = make_number (data[4 + j]);
5278               components = (method == COMPOSITION_WITH_ALTCHARS
5279                             ? Fstring (len, args) : Fvector (len, args));
5280             }
5281           compose_text (data[1], data[2], components, Qnil, obj);
5282         }
5283       cmp_data = cmp_data->next;
5284     }
5285 }
5286
5287 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5288    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5289    coding system CODING, and return the status code of code conversion
5290    (currently, this value has no meaning).
5291
5292    How many characters (and bytes) are converted to how many
5293    characters (and bytes) are recorded in members of the structure
5294    CODING.
5295
5296    If REPLACE is nonzero, we do various things as if the original text
5297    is deleted and a new text is inserted.  See the comments in
5298    replace_range (insdel.c) to know what we are doing.
5299
5300    If REPLACE is zero, it is assumed that the source text is unibyte.
5301    Otherwise, it is assumed that the source text is multibyte.  */
5302
5303 int
5304 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5305      int from, from_byte, to, to_byte, encodep, replace;
5306      struct coding_system *coding;
5307 {
5308   int len = to - from, len_byte = to_byte - from_byte;
5309   int require, inserted, inserted_byte;
5310   int head_skip, tail_skip, total_skip = 0;
5311   Lisp_Object saved_coding_symbol;
5312   int first = 1;
5313   unsigned char *src, *dst;
5314   Lisp_Object deletion;
5315   int orig_point = PT, orig_len = len;
5316   int prev_Z;
5317   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5318
5319   deletion = Qnil;
5320   saved_coding_symbol = coding->symbol;
5321
5322   if (from < PT && PT < to)
5323     {
5324       TEMP_SET_PT_BOTH (from, from_byte);
5325       orig_point = from;
5326     }
5327
5328   if (replace)
5329     {
5330       int saved_from = from;
5331       int saved_inhibit_modification_hooks;
5332
5333       prepare_to_modify_buffer (from, to, &from);
5334       if (saved_from != from)
5335         {
5336           to = from + len;
5337           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5338           len_byte = to_byte - from_byte;
5339         }
5340
5341       /* The code conversion routine can not preserve text properties
5342          for now.  So, we must remove all text properties in the
5343          region.  Here, we must suppress all modification hooks.  */
5344       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5345       inhibit_modification_hooks = 1;
5346       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5347       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5348     }
5349
5350   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5351     {
5352       /* We must detect encoding of text and eol format.  */
5353
5354       if (from < GPT && to > GPT)
5355         move_gap_both (from, from_byte);
5356       if (coding->type == coding_type_undecided)
5357         {
5358           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5359           if (coding->type == coding_type_undecided)
5360             {
5361               /* It seems that the text contains only ASCII, but we
5362                  should not leave it undecided because the deeper
5363                  decoding routine (decode_coding) tries to detect the
5364                  encodings again in vain.  */
5365               coding->type = coding_type_emacs_mule;
5366               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5367               /* As emacs-mule decoder will handle composition, we
5368                  need this setting to allocate coding->cmp_data
5369                  later.  */
5370               coding->composing = COMPOSITION_NO;
5371             }
5372         }
5373       if (coding->eol_type == CODING_EOL_UNDECIDED
5374           && coding->type != coding_type_ccl)
5375         {
5376           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5377           if (coding->eol_type == CODING_EOL_UNDECIDED)
5378             coding->eol_type = CODING_EOL_LF;
5379           /* We had better recover the original eol format if we
5380              encounter an inconsistent eol format while decoding.  */
5381           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5382         }
5383     }
5384
5385   /* Now we convert the text.  */
5386
5387   /* For encoding, we must process pre-write-conversion in advance.  */
5388   if (! inhibit_pre_post_conversion
5389       && encodep
5390       && SYMBOLP (coding->pre_write_conversion)
5391       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5392     {
5393       /* The function in pre-write-conversion may put a new text in a
5394          new buffer.  */
5395       struct buffer *prev = current_buffer;
5396       Lisp_Object new;
5397       int count = specpdl_ptr - specpdl;
5398
5399       record_unwind_protect (code_convert_region_unwind, Qnil);
5400       /* We should not call any more pre-write/post-read-conversion
5401          functions while this pre-write-conversion is running.  */
5402       inhibit_pre_post_conversion = 1;
5403       call2 (coding->pre_write_conversion,
5404              make_number (from), make_number (to));
5405       inhibit_pre_post_conversion = 0;
5406       /* Discard the unwind protect.  */
5407       specpdl_ptr--;
5408
5409       if (current_buffer != prev)
5410         {
5411           len = ZV - BEGV;
5412           new = Fcurrent_buffer ();
5413           set_buffer_internal_1 (prev);
5414           del_range_2 (from, from_byte, to, to_byte, 0);
5415           TEMP_SET_PT_BOTH (from, from_byte);
5416           insert_from_buffer (XBUFFER (new), 1, len, 0);
5417           Fkill_buffer (new);
5418           if (orig_point >= to)
5419             orig_point += len - orig_len;
5420           else if (orig_point > from)
5421             orig_point = from;
5422           orig_len = len;
5423           to = from + len;
5424           from_byte = CHAR_TO_BYTE (from);
5425           to_byte = CHAR_TO_BYTE (to);
5426           len_byte = to_byte - from_byte;
5427           TEMP_SET_PT_BOTH (from, from_byte);
5428         }
5429     }
5430
5431   if (replace)
5432     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5433
5434   if (coding->composing != COMPOSITION_DISABLED)
5435     {
5436       if (encodep)
5437         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5438       else
5439         coding_allocate_composition_data (coding, from);
5440     }
5441
5442   /* Try to skip the heading and tailing ASCIIs.  */
5443   if (coding->type != coding_type_ccl)
5444     {
5445       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5446
5447       if (from < GPT && GPT < to)
5448         move_gap_both (from, from_byte);
5449       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5450       if (from_byte == to_byte
5451           && (encodep || NILP (coding->post_read_conversion))
5452           && ! CODING_REQUIRE_FLUSHING (coding))
5453         {
5454           coding->produced = len_byte;
5455           coding->produced_char = len;
5456           if (!replace)
5457             /* We must record and adjust for this new text now.  */
5458             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5459           return 0;
5460         }
5461
5462       head_skip = from_byte - from_byte_orig;
5463       tail_skip = to_byte_orig - to_byte;
5464       total_skip = head_skip + tail_skip;
5465       from += head_skip;
5466       to -= tail_skip;
5467       len -= total_skip; len_byte -= total_skip;
5468     }
5469
5470   /* For conversion, we must put the gap before the text in addition to
5471      making the gap larger for efficient decoding.  The required gap
5472      size starts from 2000 which is the magic number used in make_gap.
5473      But, after one batch of conversion, it will be incremented if we
5474      find that it is not enough .  */
5475   require = 2000;
5476
5477   if (GAP_SIZE  < require)
5478     make_gap (require - GAP_SIZE);
5479   move_gap_both (from, from_byte);
5480
5481   inserted = inserted_byte = 0;
5482
5483   GAP_SIZE += len_byte;
5484   ZV -= len;
5485   Z -= len;
5486   ZV_BYTE -= len_byte;
5487   Z_BYTE -= len_byte;
5488
5489   if (GPT - BEG < BEG_UNCHANGED)
5490     BEG_UNCHANGED = GPT - BEG;
5491   if (Z - GPT < END_UNCHANGED)
5492     END_UNCHANGED = Z - GPT;
5493
5494   if (!encodep && coding->src_multibyte)
5495     {
5496       /* Decoding routines expects that the source text is unibyte.
5497          We must convert 8-bit characters of multibyte form to
5498          unibyte.  */
5499       int len_byte_orig = len_byte;
5500       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5501       if (len_byte < len_byte_orig)
5502         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5503                     len_byte);
5504       coding->src_multibyte = 0;
5505     }
5506
5507   for (;;)
5508     {
5509       int result;
5510
5511       /* The buffer memory is now:
5512          +--------+converted-text+---------+-------original-text-------+---+
5513          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5514                   |<---------------------- GAP ----------------------->|  */
5515       src = GAP_END_ADDR - len_byte;
5516       dst = GPT_ADDR + inserted_byte;
5517
5518       if (encodep)
5519         result = encode_coding (coding, src, dst, len_byte, 0);
5520       else
5521         result = decode_coding (coding, src, dst, len_byte, 0);
5522
5523       /* The buffer memory is now:
5524          +--------+-------converted-text----+--+------original-text----+---+
5525          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5526                   |<---------------------- GAP ----------------------->|  */
5527
5528       inserted += coding->produced_char;
5529       inserted_byte += coding->produced;
5530       len_byte -= coding->consumed;
5531
5532       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5533         {
5534           coding_allocate_composition_data (coding, from + inserted);
5535           continue;
5536         }
5537
5538       src += coding->consumed;
5539       dst += coding->produced;
5540
5541       if (result == CODING_FINISH_NORMAL)
5542         {
5543           src += len_byte;
5544           break;
5545         }
5546       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5547         {
5548           unsigned char *pend = dst, *p = pend - inserted_byte;
5549           Lisp_Object eol_type;
5550
5551           /* Encode LFs back to the original eol format (CR or CRLF).  */
5552           if (coding->eol_type == CODING_EOL_CR)
5553             {
5554               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5555             }
5556           else
5557             {
5558               int count = 0;
5559
5560               while (p < pend) if (*p++ == '\n') count++;
5561               if (src - dst < count)
5562                 {
5563                   /* We don't have sufficient room for encoding LFs
5564                      back to CRLF.  We must record converted and
5565                      not-yet-converted text back to the buffer
5566                      content, enlarge the gap, then record them out of
5567                      the buffer contents again.  */
5568                   int add = len_byte + inserted_byte;
5569
5570                   GAP_SIZE -= add;
5571                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5572                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5573                   make_gap (count - GAP_SIZE);
5574                   GAP_SIZE += add;
5575                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5576                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5577                   /* Don't forget to update SRC, DST, and PEND.  */
5578                   src = GAP_END_ADDR - len_byte;
5579                   dst = GPT_ADDR + inserted_byte;
5580                   pend = dst;
5581                 }
5582               inserted += count;
5583               inserted_byte += count;
5584               coding->produced += count;
5585               p = dst = pend + count;
5586               while (count)
5587                 {
5588                   *--p = *--pend;
5589                   if (*p == '\n') count--, *--p = '\r';
5590                 }
5591             }
5592
5593           /* Suppress eol-format conversion in the further conversion.  */
5594           coding->eol_type = CODING_EOL_LF;
5595
5596           /* Set the coding system symbol to that for Unix-like EOL.  */
5597           eol_type = Fget (saved_coding_symbol, Qeol_type);
5598           if (VECTORP (eol_type)
5599               && XVECTOR (eol_type)->size == 3
5600               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5601             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5602           else
5603             coding->symbol = saved_coding_symbol;
5604
5605           continue;
5606         }
5607       if (len_byte <= 0)
5608         {
5609           if (coding->type != coding_type_ccl
5610               || coding->mode & CODING_MODE_LAST_BLOCK)
5611             break;
5612           coding->mode |= CODING_MODE_LAST_BLOCK;
5613           continue;
5614         }
5615       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5616         {
5617           /* The source text ends in invalid codes.  Let's just
5618              make them valid buffer contents, and finish conversion.  */
5619           if (multibyte_p)
5620             {
5621               unsigned char *start = dst;
5622
5623               inserted += len_byte;
5624               while (len_byte--)
5625                 {
5626                   int c = *src++;
5627                   dst += CHAR_STRING (c, dst);
5628                 }
5629
5630               inserted_byte += dst - start;
5631             }
5632           else
5633             {
5634               inserted += len_byte;
5635               inserted_byte += len_byte;
5636               while (len_byte--)
5637                 *dst++ = *src++;
5638             }
5639           break;
5640         }
5641       if (result == CODING_FINISH_INTERRUPT)
5642         {
5643           /* The conversion procedure was interrupted by a user.  */
5644           break;
5645         }
5646       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5647       if (coding->consumed < 1)
5648         {
5649           /* It's quite strange to require more memory without
5650              consuming any bytes.  Perhaps CCL program bug.  */
5651           break;
5652         }
5653       if (first)
5654         {
5655           /* We have just done the first batch of conversion which was
5656              stopped because of insufficient gap.  Let's reconsider the
5657              required gap size (i.e. SRT - DST) now.
5658
5659              We have converted ORIG bytes (== coding->consumed) into
5660              NEW bytes (coding->produced).  To convert the remaining
5661              LEN bytes, we may need REQUIRE bytes of gap, where:
5662                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5663                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5664              Here, we are sure that NEW >= ORIG.  */
5665           float ratio = coding->produced - coding->consumed;
5666           ratio /= coding->consumed;
5667           require = len_byte * ratio;
5668           first = 0;
5669         }
5670       if ((src - dst) < (require + 2000))
5671         {
5672           /* See the comment above the previous call of make_gap.  */
5673           int add = len_byte + inserted_byte;
5674
5675           GAP_SIZE -= add;
5676           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5677           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5678           make_gap (require + 2000);
5679           GAP_SIZE += add;
5680           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5681           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5682         }
5683     }
5684   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5685
5686   if (encodep && coding->dst_multibyte)
5687     {
5688       /* The output is unibyte.  We must convert 8-bit characters to
5689          multibyte form.  */
5690       if (inserted_byte * 2 > GAP_SIZE)
5691         {
5692           GAP_SIZE -= inserted_byte;
5693           ZV += inserted_byte; Z += inserted_byte;
5694           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5695           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5696           make_gap (inserted_byte - GAP_SIZE);
5697           GAP_SIZE += inserted_byte;
5698           ZV -= inserted_byte; Z -= inserted_byte;
5699           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5700           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5701         }
5702       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5703     }
5704
5705   /* If we shrank the conversion area, adjust it now.  */
5706   if (total_skip > 0)
5707     {
5708       if (tail_skip > 0)
5709         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5710       inserted += total_skip; inserted_byte += total_skip;
5711       GAP_SIZE += total_skip;
5712       GPT -= head_skip; GPT_BYTE -= head_skip;
5713       ZV -= total_skip; ZV_BYTE -= total_skip;
5714       Z -= total_skip; Z_BYTE -= total_skip;
5715       from -= head_skip; from_byte -= head_skip;
5716       to += tail_skip; to_byte += tail_skip;
5717     }
5718
5719   prev_Z = Z;
5720   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5721   inserted = Z - prev_Z;
5722
5723   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5724     coding_restore_composition (coding, Fcurrent_buffer ());
5725   coding_free_composition_data (coding);
5726
5727   if (! inhibit_pre_post_conversion
5728       && ! encodep && ! NILP (coding->post_read_conversion))
5729     {
5730       Lisp_Object val;
5731       int count = specpdl_ptr - specpdl;
5732
5733       if (from != PT)
5734         TEMP_SET_PT_BOTH (from, from_byte);
5735       prev_Z = Z;
5736       record_unwind_protect (code_convert_region_unwind, Qnil);
5737       /* We should not call any more pre-write/post-read-conversion
5738          functions while this post-read-conversion is running.  */
5739       inhibit_pre_post_conversion = 1;
5740       val = call1 (coding->post_read_conversion, make_number (inserted));
5741       inhibit_pre_post_conversion = 0;
5742       /* Discard the unwind protect.  */
5743       specpdl_ptr--;
5744       CHECK_NUMBER (val, 0);
5745       inserted += Z - prev_Z;
5746     }
5747
5748   if (orig_point >= from)
5749     {
5750       if (orig_point >= from + orig_len)
5751         orig_point += inserted - orig_len;
5752       else
5753         orig_point = from;
5754       TEMP_SET_PT (orig_point);
5755     }
5756
5757   if (replace)
5758     {
5759       signal_after_change (from, to - from, inserted);
5760       update_compositions (from, from + inserted, CHECK_BORDER);
5761     }
5762
5763   {
5764     coding->consumed = to_byte - from_byte;
5765     coding->consumed_char = to - from;
5766     coding->produced = inserted_byte;
5767     coding->produced_char = inserted;
5768   }
5769
5770   return 0;
5771 }
5772
5773 Lisp_Object
5774 run_pre_post_conversion_on_str (str, coding, encodep)
5775      Lisp_Object str;
5776      struct coding_system *coding;
5777      int encodep;
5778 {
5779   int count = specpdl_ptr - specpdl;
5780   struct gcpro gcpro1;
5781   int multibyte = STRING_MULTIBYTE (str);
5782
5783   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5784   record_unwind_protect (code_convert_region_unwind, Qnil);
5785   GCPRO1 (str);
5786   temp_output_buffer_setup (" *code-converting-work*");
5787   set_buffer_internal (XBUFFER (Vstandard_output));
5788   /* We must insert the contents of STR as is without
5789      unibyte<->multibyte conversion.  For that, we adjust the
5790      multibyteness of the working buffer to that of STR.  */
5791   Ferase_buffer ();
5792   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5793   insert_from_string (str, 0, 0,
5794                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5795   UNGCPRO;
5796   inhibit_pre_post_conversion = 1;
5797   if (encodep)
5798     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5799   else
5800     {
5801       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5802       call1 (coding->post_read_conversion, make_number (Z - BEG));
5803     }
5804   inhibit_pre_post_conversion = 0;
5805   str = make_buffer_string (BEG, Z, 1);
5806   return unbind_to (count, str);
5807 }
5808
5809 Lisp_Object
5810 decode_coding_string (str, coding, nocopy)
5811      Lisp_Object str;
5812      struct coding_system *coding;
5813      int nocopy;
5814 {
5815   int len;
5816   struct conversion_buffer buf;
5817   int from, to_byte;
5818   struct gcpro gcpro1;
5819   Lisp_Object saved_coding_symbol;
5820   int result;
5821   int require_decoding;
5822   int shrinked_bytes = 0;
5823   Lisp_Object newstr;
5824   int consumed, consumed_char, produced, produced_char;
5825
5826   from = 0;
5827   to_byte = STRING_BYTES (XSTRING (str));
5828
5829   saved_coding_symbol = coding->symbol;
5830   coding->src_multibyte = STRING_MULTIBYTE (str);
5831   coding->dst_multibyte = 1;
5832   if (CODING_REQUIRE_DETECTION (coding))
5833     {
5834       /* See the comments in code_convert_region.  */
5835       if (coding->type == coding_type_undecided)
5836         {
5837           detect_coding (coding, XSTRING (str)->data, to_byte);
5838           if (coding->type == coding_type_undecided)
5839             {
5840               coding->type = coding_type_emacs_mule;
5841               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5842               /* As emacs-mule decoder will handle composition, we
5843                  need this setting to allocate coding->cmp_data
5844                  later.  */
5845               coding->composing = COMPOSITION_NO;
5846             }
5847         }
5848       if (coding->eol_type == CODING_EOL_UNDECIDED
5849           && coding->type != coding_type_ccl)
5850         {
5851           saved_coding_symbol = coding->symbol;
5852           detect_eol (coding, XSTRING (str)->data, to_byte);
5853           if (coding->eol_type == CODING_EOL_UNDECIDED)
5854             coding->eol_type = CODING_EOL_LF;
5855           /* We had better recover the original eol format if we
5856              encounter an inconsistent eol format while decoding.  */
5857           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5858         }
5859     }
5860
5861   if (coding->type == coding_type_no_conversion
5862       || coding->type == coding_type_raw_text)
5863     coding->dst_multibyte = 0;
5864
5865   require_decoding = CODING_REQUIRE_DECODING (coding);
5866
5867   if (STRING_MULTIBYTE (str))
5868     {
5869       /* Decoding routines expect the source text to be unibyte.  */
5870       str = Fstring_as_unibyte (str);
5871       to_byte = STRING_BYTES (XSTRING (str));
5872       nocopy = 1;
5873       coding->src_multibyte = 0;
5874     }
5875
5876   /* Try to skip the heading and tailing ASCIIs.  */
5877   if (require_decoding && coding->type != coding_type_ccl)
5878     {
5879       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5880                                 0);
5881       if (from == to_byte)
5882         require_decoding = 0;
5883       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5884     }
5885
5886   if (!require_decoding)
5887     {
5888       coding->consumed = STRING_BYTES (XSTRING (str));
5889       coding->consumed_char = XSTRING (str)->size;
5890       if (coding->dst_multibyte)
5891         {
5892           str = Fstring_as_multibyte (str);
5893           nocopy = 1;
5894         }
5895       coding->produced = STRING_BYTES (XSTRING (str));
5896       coding->produced_char = XSTRING (str)->size;
5897       return (nocopy ? str : Fcopy_sequence (str));
5898     }
5899
5900   if (coding->composing != COMPOSITION_DISABLED)
5901     coding_allocate_composition_data (coding, from);
5902   len = decoding_buffer_size (coding, to_byte - from);
5903   allocate_conversion_buffer (buf, len);
5904
5905   consumed = consumed_char = produced = produced_char = 0;
5906   while (1)
5907     {
5908       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5909                               buf.data + produced, to_byte - from - consumed,
5910                               buf.size - produced);
5911       consumed += coding->consumed;
5912       consumed_char += coding->consumed_char;
5913       produced += coding->produced;
5914       produced_char += coding->produced_char;
5915       if (result == CODING_FINISH_NORMAL
5916           || (result == CODING_FINISH_INSUFFICIENT_SRC
5917               && coding->consumed == 0))
5918         break;
5919       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5920         coding_allocate_composition_data (coding, from + produced_char);
5921       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5922         extend_conversion_buffer (&buf);
5923       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5924         {
5925           Lisp_Object eol_type;
5926
5927           /* Recover the original EOL format.  */
5928           if (coding->eol_type == CODING_EOL_CR)
5929             {
5930               unsigned char *p;
5931               for (p = buf.data; p < buf.data + produced; p++)
5932                 if (*p == '\n') *p = '\r';
5933             }
5934           else if (coding->eol_type == CODING_EOL_CRLF)
5935             {
5936               int num_eol = 0;
5937               unsigned char *p0, *p1;
5938               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5939                 if (*p0 == '\n') num_eol++;
5940               if (produced + num_eol >= buf.size)
5941                 extend_conversion_buffer (&buf);
5942               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5943                 {
5944                   *--p1 = *--p0;
5945                   if (*p0 == '\n') *--p1 = '\r';
5946                 }
5947               produced += num_eol;
5948               produced_char += num_eol;
5949             }
5950           /* Suppress eol-format conversion in the further conversion.  */
5951           coding->eol_type = CODING_EOL_LF;
5952
5953           /* Set the coding system symbol to that for Unix-like EOL.  */
5954           eol_type = Fget (saved_coding_symbol, Qeol_type);
5955           if (VECTORP (eol_type)
5956               && XVECTOR (eol_type)->size == 3
5957               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5958             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5959           else
5960             coding->symbol = saved_coding_symbol;
5961
5962
5963         }
5964     }
5965
5966   coding->consumed = consumed;
5967   coding->consumed_char = consumed_char;
5968   coding->produced = produced;
5969   coding->produced_char = produced_char;
5970
5971   if (coding->dst_multibyte)
5972     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5973                                            produced + shrinked_bytes);
5974   else
5975     newstr = make_uninit_string (produced + shrinked_bytes);
5976   if (from > 0)
5977     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5978   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5979   if (shrinked_bytes > from)
5980     bcopy (XSTRING (str)->data + to_byte,
5981            XSTRING (newstr)->data + from + produced,
5982            shrinked_bytes - from);
5983   free_conversion_buffer (&buf);
5984
5985   if (coding->cmp_data && coding->cmp_data->used)
5986     coding_restore_composition (coding, newstr);
5987   coding_free_composition_data (coding);
5988
5989   if (SYMBOLP (coding->post_read_conversion)
5990       && !NILP (Ffboundp (coding->post_read_conversion)))
5991     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5992
5993   return newstr;
5994 }
5995
5996 Lisp_Object
5997 encode_coding_string (str, coding, nocopy)
5998      Lisp_Object str;
5999      struct coding_system *coding;
6000      int nocopy;
6001 {
6002   int len;
6003   struct conversion_buffer buf;
6004   int from, to, to_byte;
6005   int result;
6006   int shrinked_bytes = 0;
6007   Lisp_Object newstr;
6008   int consumed, consumed_char, produced, produced_char;
6009
6010   if (SYMBOLP (coding->pre_write_conversion)
6011       && !NILP (Ffboundp (coding->pre_write_conversion)))
6012     str = run_pre_post_conversion_on_str (str, coding, 1);
6013
6014   from = 0;
6015   to = XSTRING (str)->size;
6016   to_byte = STRING_BYTES (XSTRING (str));
6017
6018   /* Encoding routines determine the multibyteness of the source text
6019      by coding->src_multibyte.  */
6020   coding->src_multibyte = STRING_MULTIBYTE (str);
6021   coding->dst_multibyte = 0;
6022   if (! CODING_REQUIRE_ENCODING (coding))
6023     {
6024       coding->consumed = STRING_BYTES (XSTRING (str));
6025       coding->consumed_char = XSTRING (str)->size;
6026       if (STRING_MULTIBYTE (str))
6027         {
6028           str = Fstring_as_unibyte (str);
6029           nocopy = 1;
6030         }
6031       coding->produced = STRING_BYTES (XSTRING (str));
6032       coding->produced_char = XSTRING (str)->size;
6033       return (nocopy ? str : Fcopy_sequence (str));
6034     }
6035
6036   if (coding->composing != COMPOSITION_DISABLED)
6037     coding_save_composition (coding, from, to, str);
6038
6039   /* Try to skip the heading and tailing ASCIIs.  */
6040   if (coding->type != coding_type_ccl)
6041     {
6042       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
6043                                 1);
6044       if (from == to_byte)
6045         return (nocopy ? str : Fcopy_sequence (str));
6046       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
6047     }
6048
6049   len = encoding_buffer_size (coding, to_byte - from);
6050   allocate_conversion_buffer (buf, len);
6051
6052   consumed = consumed_char = produced = produced_char = 0;
6053   while (1)
6054     {
6055       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
6056                               buf.data + produced, to_byte - from - consumed,
6057                               buf.size - produced);
6058       consumed += coding->consumed;
6059       consumed_char += coding->consumed_char;
6060       produced += coding->produced;
6061       produced_char += coding->produced_char;
6062       if (result == CODING_FINISH_NORMAL
6063           || (result == CODING_FINISH_INSUFFICIENT_SRC
6064               && coding->consumed == 0))
6065         break;
6066       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6067       extend_conversion_buffer (&buf);
6068     }
6069
6070   coding->consumed = consumed;
6071   coding->consumed_char = consumed_char;
6072   coding->produced = produced;
6073   coding->produced_char = produced_char;
6074
6075   newstr = make_uninit_string (produced + shrinked_bytes);
6076   if (from > 0)
6077     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
6078   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
6079   if (shrinked_bytes > from)
6080     bcopy (XSTRING (str)->data + to_byte,
6081            XSTRING (newstr)->data + from + produced,
6082            shrinked_bytes - from);
6083
6084   free_conversion_buffer (&buf);
6085   coding_free_composition_data (coding);
6086
6087   return newstr;
6088 }
6089
6090 \f
6091 #ifdef emacs
6092 /*** 8. Emacs Lisp library functions ***/
6093
6094 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6095   "Return t if OBJECT is nil or a coding-system.\n\
6096 See the documentation of `make-coding-system' for information\n\
6097 about coding-system objects.")
6098   (obj)
6099      Lisp_Object obj;
6100 {
6101   if (NILP (obj))
6102     return Qt;
6103   if (!SYMBOLP (obj))
6104     return Qnil;
6105   /* Get coding-spec vector for OBJ.  */
6106   obj = Fget (obj, Qcoding_system);
6107   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6108           ? Qt : Qnil);
6109 }
6110
6111 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6112        Sread_non_nil_coding_system, 1, 1, 0,
6113   "Read a coding system from the minibuffer, prompting with string PROMPT.")
6114   (prompt)
6115      Lisp_Object prompt;
6116 {
6117   Lisp_Object val;
6118   do
6119     {
6120       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6121                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6122     }
6123   while (XSTRING (val)->size == 0);
6124   return (Fintern (val, Qnil));
6125 }
6126
6127 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6128   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
6129 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
6130   (prompt, default_coding_system)
6131      Lisp_Object prompt, default_coding_system;
6132 {
6133   Lisp_Object val;
6134   if (SYMBOLP (default_coding_system))
6135     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6136   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6137                           Qt, Qnil, Qcoding_system_history,
6138                           default_coding_system, Qnil);
6139   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6140 }
6141
6142 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6143        1, 1, 0,
6144   "Check validity of CODING-SYSTEM.\n\
6145 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
6146 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
6147 The value of property should be a vector of length 5.")
6148   (coding_system)
6149      Lisp_Object coding_system;
6150 {
6151   CHECK_SYMBOL (coding_system, 0);
6152   if (!NILP (Fcoding_system_p (coding_system)))
6153     return coding_system;
6154   while (1)
6155     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6156 }
6157 \f
6158 Lisp_Object
6159 detect_coding_system (src, src_bytes, highest, multibytep)
6160      unsigned char *src;
6161      int src_bytes, highest;
6162      int multibytep;
6163 {
6164   int coding_mask, eol_type;
6165   Lisp_Object val, tmp;
6166   int dummy;
6167
6168   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6169   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6170   if (eol_type == CODING_EOL_INCONSISTENT)
6171     eol_type = CODING_EOL_UNDECIDED;
6172
6173   if (!coding_mask)
6174     {
6175       val = Qundecided;
6176       if (eol_type != CODING_EOL_UNDECIDED)
6177         {
6178           Lisp_Object val2;
6179           val2 = Fget (Qundecided, Qeol_type);
6180           if (VECTORP (val2))
6181             val = XVECTOR (val2)->contents[eol_type];
6182         }
6183       return (highest ? val : Fcons (val, Qnil));
6184     }
6185
6186   /* At first, gather possible coding systems in VAL.  */
6187   val = Qnil;
6188   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6189     {
6190       Lisp_Object category_val, category_index;
6191
6192       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6193       category_val = Fsymbol_value (XCAR (tmp));
6194       if (!NILP (category_val)
6195           && NATNUMP (category_index)
6196           && (coding_mask & (1 << XFASTINT (category_index))))
6197         {
6198           val = Fcons (category_val, val);
6199           if (highest)
6200             break;
6201         }
6202     }
6203   if (!highest)
6204     val = Fnreverse (val);
6205
6206   /* Then, replace the elements with subsidiary coding systems.  */
6207   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6208     {
6209       if (eol_type != CODING_EOL_UNDECIDED
6210           && eol_type != CODING_EOL_INCONSISTENT)
6211         {
6212           Lisp_Object eol;
6213           eol = Fget (XCAR (tmp), Qeol_type);
6214           if (VECTORP (eol))
6215             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
6216         }
6217     }
6218   return (highest ? XCAR (val) : val);
6219 }
6220
6221 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6222        2, 3, 0,
6223   "Detect coding system of the text in the region between START and END.\n\
6224 Return a list of possible coding systems ordered by priority.\n\
6225 \n\
6226 If only ASCII characters are found, it returns a list of single element\n\
6227 `undecided' or its subsidiary coding system according to a detected\n\
6228 end-of-line format.\n\
6229 \n\
6230 If optional argument HIGHEST is non-nil, return the coding system of\n\
6231 highest priority.")
6232   (start, end, highest)
6233      Lisp_Object start, end, highest;
6234 {
6235   int from, to;
6236   int from_byte, to_byte;
6237   int include_anchor_byte = 0;
6238
6239   CHECK_NUMBER_COERCE_MARKER (start, 0);
6240   CHECK_NUMBER_COERCE_MARKER (end, 1);
6241
6242   validate_region (&start, &end);
6243   from = XINT (start), to = XINT (end);
6244   from_byte = CHAR_TO_BYTE (from);
6245   to_byte = CHAR_TO_BYTE (to);
6246
6247   if (from < GPT && to >= GPT)
6248     move_gap_both (to, to_byte);
6249   /* If we an anchor byte `\0' follows the region, we include it in
6250      the detecting source.  Then code detectors can handle the tailing
6251      byte sequence more accurately.
6252
6253      Fix me: This is not an perfect solution.  It is better that we
6254      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6255   */
6256   if (to == Z || (to == GPT && GAP_SIZE > 0))
6257     include_anchor_byte = 1;
6258   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6259                                to_byte - from_byte + include_anchor_byte,
6260                                !NILP (highest),
6261                                !NILP (current_buffer
6262                                       ->enable_multibyte_characters));
6263 }
6264
6265 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6266        1, 2, 0,
6267   "Detect coding system of the text in STRING.\n\
6268 Return a list of possible coding systems ordered by priority.\n\
6269 \n\
6270 If only ASCII characters are found, it returns a list of single element\n\
6271 `undecided' or its subsidiary coding system according to a detected\n\
6272 end-of-line format.\n\
6273 \n\
6274 If optional argument HIGHEST is non-nil, return the coding system of\n\
6275 highest priority.")
6276   (string, highest)
6277      Lisp_Object string, highest;
6278 {
6279   CHECK_STRING (string, 0);
6280
6281   return detect_coding_system (XSTRING (string)->data,
6282                                /* "+ 1" is to include the anchor byte
6283                                   `\0'.  With this, code detectors can
6284                                   handle the tailing bytes more
6285                                   accurately.  */
6286                                STRING_BYTES (XSTRING (string)) + 1,
6287                                !NILP (highest),
6288                                STRING_MULTIBYTE (string));
6289 }
6290
6291 /* Return an intersection of lists L1 and L2.  */
6292
6293 static Lisp_Object
6294 intersection (l1, l2)
6295      Lisp_Object l1, l2;
6296 {
6297   Lisp_Object val;
6298
6299   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
6300     {
6301       if (!NILP (Fmemq (XCAR (l1), l2)))
6302         val = Fcons (XCAR (l1), val);
6303     }
6304   return val;
6305 }
6306
6307
6308 /*  Subroutine for Fsafe_coding_systems_region_internal.
6309
6310     Return a list of coding systems that safely encode the multibyte
6311     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
6312     possible coding systems.  If it is nil, it means that we have not
6313     yet found any coding systems.
6314
6315     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6316     element of WORK_TABLE is set to t once the element is looked up.
6317
6318     If a non-ASCII single byte char is found, set
6319     *single_byte_char_found to 1.  */
6320
6321 static Lisp_Object
6322 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6323      unsigned char *p, *pend;
6324      Lisp_Object safe_codings, work_table;
6325      int *single_byte_char_found;
6326 {
6327   int c, len, idx;
6328   Lisp_Object val;
6329
6330   while (p < pend)
6331     {
6332       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6333       p += len;
6334       if (ASCII_BYTE_P (c))
6335         /* We can ignore ASCII characters here.  */
6336         continue;
6337       if (SINGLE_BYTE_CHAR_P (c))
6338         *single_byte_char_found = 1;
6339       if (NILP (safe_codings))
6340         continue;
6341       /* Check the safe coding systems for C.  */
6342       val = char_table_ref_and_index (work_table, c, &idx);
6343       if (EQ (val, Qt))
6344         /* This element was already checked.  Ignore it.  */
6345         continue;
6346       /* Remember that we checked this element.  */
6347       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6348
6349       /* If there are some safe coding systems for C and we have
6350          already found the other set of coding systems for the
6351          different characters, get the intersection of them.  */
6352       if (!EQ (safe_codings, Qt) && !NILP (val))
6353         val = intersection (safe_codings, val);
6354       safe_codings = val;
6355     }
6356   return safe_codings;
6357 }
6358
6359
6360 /* Return a list of coding systems that safely encode the text between
6361    START and END.  If the text contains only ASCII or is unibyte,
6362    return t.  */
6363
6364 DEFUN ("find-coding-systems-region-internal",
6365        Ffind_coding_systems_region_internal,
6366        Sfind_coding_systems_region_internal, 2, 2, 0,
6367   "Internal use only.")
6368   (start, end)
6369      Lisp_Object start, end;
6370 {
6371   Lisp_Object work_table, safe_codings;
6372   int non_ascii_p = 0;
6373   int single_byte_char_found = 0;
6374   unsigned char *p1, *p1end, *p2, *p2end, *p;
6375
6376   if (STRINGP (start))
6377     {
6378       if (!STRING_MULTIBYTE (start))
6379         return Qt;
6380       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
6381       p2 = p2end = p1end;
6382       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6383         non_ascii_p = 1;
6384     }
6385   else
6386     {
6387       int from, to, stop;
6388
6389       CHECK_NUMBER_COERCE_MARKER (start, 0);
6390       CHECK_NUMBER_COERCE_MARKER (end, 1);
6391       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6392         args_out_of_range (start, end);
6393       if (NILP (current_buffer->enable_multibyte_characters))
6394         return Qt;
6395       from = CHAR_TO_BYTE (XINT (start));
6396       to = CHAR_TO_BYTE (XINT (end));
6397       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6398       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6399       if (stop == to)
6400         p2 = p2end = p1end;
6401       else
6402         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6403       if (XINT (end) - XINT (start) != to - from)
6404         non_ascii_p = 1;
6405     }
6406
6407   if (!non_ascii_p)
6408     {
6409       /* We are sure that the text contains no multibyte character.
6410          Check if it contains eight-bit-graphic.  */
6411       p = p1;
6412       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6413       if (p == p1end)
6414         {
6415           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6416           if (p == p2end)
6417             return Qt;
6418         }
6419     }
6420
6421   /* The text contains non-ASCII characters.  */
6422   work_table = Fcopy_sequence (Vchar_coding_system_table);
6423   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6424                                     &single_byte_char_found);
6425   if (p2 < p2end)
6426     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6427                                       &single_byte_char_found);
6428
6429   if (EQ (safe_codings, Qt))
6430     ; /* Nothing to be done.  */
6431   else if (!single_byte_char_found)
6432     {
6433       /* Append generic coding systems.  */
6434       Lisp_Object args[2];
6435       args[0] = safe_codings;
6436       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6437                                         make_number (0));
6438       safe_codings = Fappend (2, args);
6439     }
6440   else
6441     safe_codings = Fcons (Qraw_text,
6442                           Fcons (Qemacs_mule,
6443                                  Fcons (Qno_conversion, safe_codings)));
6444   return safe_codings;
6445 }
6446
6447
6448 Lisp_Object
6449 code_convert_region1 (start, end, coding_system, encodep)
6450      Lisp_Object start, end, coding_system;
6451      int encodep;
6452 {
6453   struct coding_system coding;
6454   int from, to;
6455
6456   CHECK_NUMBER_COERCE_MARKER (start, 0);
6457   CHECK_NUMBER_COERCE_MARKER (end, 1);
6458   CHECK_SYMBOL (coding_system, 2);
6459
6460   validate_region (&start, &end);
6461   from = XFASTINT (start);
6462   to = XFASTINT (end);
6463
6464   if (NILP (coding_system))
6465     return make_number (to - from);
6466
6467   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6468     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6469
6470   coding.mode |= CODING_MODE_LAST_BLOCK;
6471   coding.src_multibyte = coding.dst_multibyte
6472     = !NILP (current_buffer->enable_multibyte_characters);
6473   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6474                        &coding, encodep, 1);
6475   Vlast_coding_system_used = coding.symbol;
6476   return make_number (coding.produced_char);
6477 }
6478
6479 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6480        3, 3, "r\nzCoding system: ",
6481   "Decode the current region from the specified coding system.\n\
6482 When called from a program, takes three arguments:\n\
6483 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6484 This function sets `last-coding-system-used' to the precise coding system\n\
6485 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6486 not fully specified.)\n\
6487 It returns the length of the decoded text.")
6488   (start, end, coding_system)
6489      Lisp_Object start, end, coding_system;
6490 {
6491   return code_convert_region1 (start, end, coding_system, 0);
6492 }
6493
6494 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6495        3, 3, "r\nzCoding system: ",
6496   "Encode the current region into the specified coding system.\n\
6497 When called from a program, takes three arguments:\n\
6498 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
6499 This function sets `last-coding-system-used' to the precise coding system\n\
6500 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6501 not fully specified.)\n\
6502 It returns the length of the encoded text.")
6503   (start, end, coding_system)
6504      Lisp_Object start, end, coding_system;
6505 {
6506   return code_convert_region1 (start, end, coding_system, 1);
6507 }
6508
6509 Lisp_Object
6510 code_convert_string1 (string, coding_system, nocopy, encodep)
6511      Lisp_Object string, coding_system, nocopy;
6512      int encodep;
6513 {
6514   struct coding_system coding;
6515
6516   CHECK_STRING (string, 0);
6517   CHECK_SYMBOL (coding_system, 1);
6518
6519   if (NILP (coding_system))
6520     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6521
6522   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6523     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6524
6525   coding.mode |= CODING_MODE_LAST_BLOCK;
6526   string = (encodep
6527             ? encode_coding_string (string, &coding, !NILP (nocopy))
6528             : decode_coding_string (string, &coding, !NILP (nocopy)));
6529   Vlast_coding_system_used = coding.symbol;
6530
6531   return string;
6532 }
6533
6534 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6535        2, 3, 0,
6536   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
6537 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6538 if the decoding operation is trivial.\n\
6539 This function sets `last-coding-system-used' to the precise coding system\n\
6540 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6541 not fully specified.)")
6542   (string, coding_system, nocopy)
6543      Lisp_Object string, coding_system, nocopy;
6544 {
6545   return code_convert_string1 (string, coding_system, nocopy, 0);
6546 }
6547
6548 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6549        2, 3, 0,
6550   "Encode STRING to CODING-SYSTEM, and return the result.\n\
6551 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\
6552 if the encoding operation is trivial.\n\
6553 This function sets `last-coding-system-used' to the precise coding system\n\
6554 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
6555 not fully specified.)")
6556   (string, coding_system, nocopy)
6557      Lisp_Object string, coding_system, nocopy;
6558 {
6559   return code_convert_string1 (string, coding_system, nocopy, 1);
6560 }
6561
6562 /* Encode or decode STRING according to CODING_SYSTEM.
6563    Do not set Vlast_coding_system_used.
6564
6565    This function is called only from macros DECODE_FILE and
6566    ENCODE_FILE, thus we ignore character composition.  */
6567
6568 Lisp_Object
6569 code_convert_string_norecord (string, coding_system, encodep)
6570      Lisp_Object string, coding_system;
6571      int encodep;
6572 {
6573   struct coding_system coding;
6574
6575   CHECK_STRING (string, 0);
6576   CHECK_SYMBOL (coding_system, 1);
6577
6578   if (NILP (coding_system))
6579     return string;
6580
6581   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6582     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6583
6584   coding.composing = COMPOSITION_DISABLED;
6585   coding.mode |= CODING_MODE_LAST_BLOCK;
6586   return (encodep
6587           ? encode_coding_string (string, &coding, 1)
6588           : decode_coding_string (string, &coding, 1));
6589 }
6590 \f
6591 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6592   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6593 Return the corresponding character.")
6594   (code)
6595      Lisp_Object code;
6596 {
6597   unsigned char c1, c2, s1, s2;
6598   Lisp_Object val;
6599
6600   CHECK_NUMBER (code, 0);
6601   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6602   if (s1 == 0)
6603     {
6604       if (s2 < 0x80)
6605         XSETFASTINT (val, s2);
6606       else if (s2 >= 0xA0 || s2 <= 0xDF)
6607         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6608       else
6609         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6610     }
6611   else
6612     {
6613       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6614           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6615         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6616       DECODE_SJIS (s1, s2, c1, c2);
6617       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6618     }
6619   return val;
6620 }
6621
6622 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6623   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6624 Return the corresponding code in SJIS.")
6625   (ch)
6626      Lisp_Object ch;
6627 {
6628   int charset, c1, c2, s1, s2;
6629   Lisp_Object val;
6630
6631   CHECK_NUMBER (ch, 0);
6632   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6633   if (charset == CHARSET_ASCII)
6634     {
6635       val = ch;
6636     }
6637   else if (charset == charset_jisx0208
6638            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6639     {
6640       ENCODE_SJIS (c1, c2, s1, s2);
6641       XSETFASTINT (val, (s1 << 8) | s2);
6642     }
6643   else if (charset == charset_katakana_jisx0201
6644            && c1 > 0x20 && c2 < 0xE0)
6645     {
6646       XSETFASTINT (val, c1 | 0x80);
6647     }
6648   else
6649     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6650   return val;
6651 }
6652
6653 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6654   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6655 Return the corresponding character.")
6656   (code)
6657      Lisp_Object code;
6658 {
6659   int charset;
6660   unsigned char b1, b2, c1, c2;
6661   Lisp_Object val;
6662
6663   CHECK_NUMBER (code, 0);
6664   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6665   if (b1 == 0)
6666     {
6667       if (b2 >= 0x80)
6668         error ("Invalid BIG5 code: %x", XFASTINT (code));
6669       val = code;
6670     }
6671   else
6672     {
6673       if ((b1 < 0xA1 || b1 > 0xFE)
6674           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6675         error ("Invalid BIG5 code: %x", XFASTINT (code));
6676       DECODE_BIG5 (b1, b2, charset, c1, c2);
6677       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6678     }
6679   return val;
6680 }
6681
6682 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6683   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6684 Return the corresponding character code in Big5.")
6685   (ch)
6686      Lisp_Object ch;
6687 {
6688   int charset, c1, c2, b1, b2;
6689   Lisp_Object val;
6690
6691   CHECK_NUMBER (ch, 0);
6692   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6693   if (charset == CHARSET_ASCII)
6694     {
6695       val = ch;
6696     }
6697   else if ((charset == charset_big5_1
6698             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6699            || (charset == charset_big5_2
6700                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6701     {
6702       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6703       XSETFASTINT (val, (b1 << 8) | b2);
6704     }
6705   else
6706     error ("Can't encode to Big5: %d", XFASTINT (ch));
6707   return val;
6708 }
6709 \f
6710 DEFUN ("set-terminal-coding-system-internal",
6711        Fset_terminal_coding_system_internal,
6712        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6713   (coding_system)
6714      Lisp_Object coding_system;
6715 {
6716   CHECK_SYMBOL (coding_system, 0);
6717   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6718   /* We had better not send unsafe characters to terminal.  */
6719   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6720   /* Character composition should be disabled.  */
6721   terminal_coding.composing = COMPOSITION_DISABLED;
6722   /* Error notification should be suppressed.  */
6723   terminal_coding.suppress_error = 1;
6724   terminal_coding.src_multibyte = 1;
6725   terminal_coding.dst_multibyte = 0;
6726   return Qnil;
6727 }
6728
6729 DEFUN ("set-safe-terminal-coding-system-internal",
6730        Fset_safe_terminal_coding_system_internal,
6731        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6732   (coding_system)
6733      Lisp_Object coding_system;
6734 {
6735   CHECK_SYMBOL (coding_system, 0);
6736   setup_coding_system (Fcheck_coding_system (coding_system),
6737                        &safe_terminal_coding);
6738   /* Character composition should be disabled.  */
6739   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6740   /* Error notification should be suppressed.  */
6741   terminal_coding.suppress_error = 1;
6742   safe_terminal_coding.src_multibyte = 1;
6743   safe_terminal_coding.dst_multibyte = 0;
6744   return Qnil;
6745 }
6746
6747 DEFUN ("terminal-coding-system",
6748        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6749   "Return coding system specified for terminal output.")
6750   ()
6751 {
6752   return terminal_coding.symbol;
6753 }
6754
6755 DEFUN ("set-keyboard-coding-system-internal",
6756        Fset_keyboard_coding_system_internal,
6757        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6758   (coding_system)
6759      Lisp_Object coding_system;
6760 {
6761   CHECK_SYMBOL (coding_system, 0);
6762   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6763   /* Character composition should be disabled.  */
6764   keyboard_coding.composing = COMPOSITION_DISABLED;
6765   return Qnil;
6766 }
6767
6768 DEFUN ("keyboard-coding-system",
6769        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6770   "Return coding system specified for decoding keyboard input.")
6771   ()
6772 {
6773   return keyboard_coding.symbol;
6774 }
6775
6776 \f
6777 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6778        Sfind_operation_coding_system,  1, MANY, 0,
6779   "Choose a coding system for an operation based on the target name.\n\
6780 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6781 DECODING-SYSTEM is the coding system to use for decoding\n\
6782 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6783 for encoding (in case OPERATION does encoding).\n\
6784 \n\
6785 The first argument OPERATION specifies an I/O primitive:\n\
6786   For file I/O, `insert-file-contents' or `write-region'.\n\
6787   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6788   For network I/O, `open-network-stream'.\n\
6789 \n\
6790 The remaining arguments should be the same arguments that were passed\n\
6791 to the primitive.  Depending on which primitive, one of those arguments\n\
6792 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6793 whichever argument specifies the file name is TARGET.\n\
6794 \n\
6795 TARGET has a meaning which depends on OPERATION:\n\
6796   For file I/O, TARGET is a file name.\n\
6797   For process I/O, TARGET is a process name.\n\
6798   For network I/O, TARGET is a service name or a port number\n\
6799 \n\
6800 This function looks up what specified for TARGET in,\n\
6801 `file-coding-system-alist', `process-coding-system-alist',\n\
6802 or `network-coding-system-alist' depending on OPERATION.\n\
6803 They may specify a coding system, a cons of coding systems,\n\
6804 or a function symbol to call.\n\
6805 In the last case, we call the function with one argument,\n\
6806 which is a list of all the arguments given to this function.")
6807   (nargs, args)
6808      int nargs;
6809      Lisp_Object *args;
6810 {
6811   Lisp_Object operation, target_idx, target, val;
6812   register Lisp_Object chain;
6813
6814   if (nargs < 2)
6815     error ("Too few arguments");
6816   operation = args[0];
6817   if (!SYMBOLP (operation)
6818       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6819     error ("Invalid first argument");
6820   if (nargs < 1 + XINT (target_idx))
6821     error ("Too few arguments for operation: %s",
6822            XSYMBOL (operation)->name->data);
6823   target = args[XINT (target_idx) + 1];
6824   if (!(STRINGP (target)
6825         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6826     error ("Invalid argument %d", XINT (target_idx) + 1);
6827
6828   chain = ((EQ (operation, Qinsert_file_contents)
6829             || EQ (operation, Qwrite_region))
6830            ? Vfile_coding_system_alist
6831            : (EQ (operation, Qopen_network_stream)
6832               ? Vnetwork_coding_system_alist
6833               : Vprocess_coding_system_alist));
6834   if (NILP (chain))
6835     return Qnil;
6836
6837   for (; CONSP (chain); chain = XCDR (chain))
6838     {
6839       Lisp_Object elt;
6840       elt = XCAR (chain);
6841
6842       if (CONSP (elt)
6843           && ((STRINGP (target)
6844                && STRINGP (XCAR (elt))
6845                && fast_string_match (XCAR (elt), target) >= 0)
6846               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6847         {
6848           val = XCDR (elt);
6849           /* Here, if VAL is both a valid coding system and a valid
6850              function symbol, we return VAL as a coding system.  */
6851           if (CONSP (val))
6852             return val;
6853           if (! SYMBOLP (val))
6854             return Qnil;
6855           if (! NILP (Fcoding_system_p (val)))
6856             return Fcons (val, val);
6857           if (! NILP (Ffboundp (val)))
6858             {
6859               val = call1 (val, Flist (nargs, args));
6860               if (CONSP (val))
6861                 return val;
6862               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6863                 return Fcons (val, val);
6864             }
6865           return Qnil;
6866         }
6867     }
6868   return Qnil;
6869 }
6870
6871 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6872        Supdate_coding_systems_internal, 0, 0, 0,
6873   "Update internal database for ISO2022 and CCL based coding systems.\n\
6874 When values of any coding categories are changed, you must\n\
6875 call this function")
6876   ()
6877 {
6878   int i;
6879
6880   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6881     {
6882       Lisp_Object val;
6883
6884       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6885       if (!NILP (val))
6886         {
6887           if (! coding_system_table[i])
6888             coding_system_table[i] = ((struct coding_system *)
6889                                       xmalloc (sizeof (struct coding_system)));
6890           setup_coding_system (val, coding_system_table[i]);
6891         }
6892       else if (coding_system_table[i])
6893         {
6894           xfree (coding_system_table[i]);
6895           coding_system_table[i] = NULL;
6896         }
6897     }
6898
6899   return Qnil;
6900 }
6901
6902 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6903        Sset_coding_priority_internal, 0, 0, 0,
6904   "Update internal database for the current value of `coding-category-list'.\n\
6905 This function is internal use only.")
6906   ()
6907 {
6908   int i = 0, idx;
6909   Lisp_Object val;
6910
6911   val = Vcoding_category_list;
6912
6913   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6914     {
6915       if (! SYMBOLP (XCAR (val)))
6916         break;
6917       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6918       if (idx >= CODING_CATEGORY_IDX_MAX)
6919         break;
6920       coding_priorities[i++] = (1 << idx);
6921       val = XCDR (val);
6922     }
6923   /* If coding-category-list is valid and contains all coding
6924      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6925      the following code saves Emacs from crashing.  */
6926   while (i < CODING_CATEGORY_IDX_MAX)
6927     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6928
6929   return Qnil;
6930 }
6931
6932 #endif /* emacs */
6933
6934 \f
6935 /*** 9. Post-amble ***/
6936
6937 void
6938 init_coding_once ()
6939 {
6940   int i;
6941
6942   /* Emacs' internal format specific initialize routine.  */
6943   for (i = 0; i <= 0x20; i++)
6944     emacs_code_class[i] = EMACS_control_code;
6945   emacs_code_class[0x0A] = EMACS_linefeed_code;
6946   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6947   for (i = 0x21 ; i < 0x7F; i++)
6948     emacs_code_class[i] = EMACS_ascii_code;
6949   emacs_code_class[0x7F] = EMACS_control_code;
6950   for (i = 0x80; i < 0xFF; i++)
6951     emacs_code_class[i] = EMACS_invalid_code;
6952   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6953   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6954   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6955   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6956
6957   /* ISO2022 specific initialize routine.  */
6958   for (i = 0; i < 0x20; i++)
6959     iso_code_class[i] = ISO_control_0;
6960   for (i = 0x21; i < 0x7F; i++)
6961     iso_code_class[i] = ISO_graphic_plane_0;
6962   for (i = 0x80; i < 0xA0; i++)
6963     iso_code_class[i] = ISO_control_1;
6964   for (i = 0xA1; i < 0xFF; i++)
6965     iso_code_class[i] = ISO_graphic_plane_1;
6966   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6967   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6968   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6969   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6970   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6971   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6972   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6973   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6974   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6975   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6976
6977   setup_coding_system (Qnil, &keyboard_coding);
6978   setup_coding_system (Qnil, &terminal_coding);
6979   setup_coding_system (Qnil, &safe_terminal_coding);
6980   setup_coding_system (Qnil, &default_buffer_file_coding);
6981
6982   bzero (coding_system_table, sizeof coding_system_table);
6983
6984   bzero (ascii_skip_code, sizeof ascii_skip_code);
6985   for (i = 0; i < 128; i++)
6986     ascii_skip_code[i] = 1;
6987
6988 #if defined (MSDOS) || defined (WINDOWSNT)
6989   system_eol_type = CODING_EOL_CRLF;
6990 #else
6991   system_eol_type = CODING_EOL_LF;
6992 #endif
6993
6994   inhibit_pre_post_conversion = 0;
6995 }
6996
6997 #ifdef emacs
6998
6999 void
7000 syms_of_coding ()
7001 {
7002   Qtarget_idx = intern ("target-idx");
7003   staticpro (&Qtarget_idx);
7004
7005   Qcoding_system_history = intern ("coding-system-history");
7006   staticpro (&Qcoding_system_history);
7007   Fset (Qcoding_system_history, Qnil);
7008
7009   /* Target FILENAME is the first argument.  */
7010   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7011   /* Target FILENAME is the third argument.  */
7012   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7013
7014   Qcall_process = intern ("call-process");
7015   staticpro (&Qcall_process);
7016   /* Target PROGRAM is the first argument.  */
7017   Fput (Qcall_process, Qtarget_idx, make_number (0));
7018
7019   Qcall_process_region = intern ("call-process-region");
7020   staticpro (&Qcall_process_region);
7021   /* Target PROGRAM is the third argument.  */
7022   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7023
7024   Qstart_process = intern ("start-process");
7025   staticpro (&Qstart_process);
7026   /* Target PROGRAM is the third argument.  */
7027   Fput (Qstart_process, Qtarget_idx, make_number (2));
7028
7029   Qopen_network_stream = intern ("open-network-stream");
7030   staticpro (&Qopen_network_stream);
7031   /* Target SERVICE is the fourth argument.  */
7032   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7033
7034   Qcoding_system = intern ("coding-system");
7035   staticpro (&Qcoding_system);
7036
7037   Qeol_type = intern ("eol-type");
7038   staticpro (&Qeol_type);
7039
7040   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7041   staticpro (&Qbuffer_file_coding_system);
7042
7043   Qpost_read_conversion = intern ("post-read-conversion");
7044   staticpro (&Qpost_read_conversion);
7045
7046   Qpre_write_conversion = intern ("pre-write-conversion");
7047   staticpro (&Qpre_write_conversion);
7048
7049   Qno_conversion = intern ("no-conversion");
7050   staticpro (&Qno_conversion);
7051
7052   Qundecided = intern ("undecided");
7053   staticpro (&Qundecided);
7054
7055   Qcoding_system_p = intern ("coding-system-p");
7056   staticpro (&Qcoding_system_p);
7057
7058   Qcoding_system_error = intern ("coding-system-error");
7059   staticpro (&Qcoding_system_error);
7060
7061   Fput (Qcoding_system_error, Qerror_conditions,
7062         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7063   Fput (Qcoding_system_error, Qerror_message,
7064         build_string ("Invalid coding system"));
7065
7066   Qcoding_category = intern ("coding-category");
7067   staticpro (&Qcoding_category);
7068   Qcoding_category_index = intern ("coding-category-index");
7069   staticpro (&Qcoding_category_index);
7070
7071   Vcoding_category_table
7072     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7073   staticpro (&Vcoding_category_table);
7074   {
7075     int i;
7076     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7077       {
7078         XVECTOR (Vcoding_category_table)->contents[i]
7079           = intern (coding_category_name[i]);
7080         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7081               Qcoding_category_index, make_number (i));
7082       }
7083   }
7084
7085   Qtranslation_table = intern ("translation-table");
7086   staticpro (&Qtranslation_table);
7087   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7088
7089   Qtranslation_table_id = intern ("translation-table-id");
7090   staticpro (&Qtranslation_table_id);
7091
7092   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7093   staticpro (&Qtranslation_table_for_decode);
7094
7095   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7096   staticpro (&Qtranslation_table_for_encode);
7097
7098   Qsafe_chars = intern ("safe-chars");
7099   staticpro (&Qsafe_chars);
7100
7101   Qchar_coding_system = intern ("char-coding-system");
7102   staticpro (&Qchar_coding_system);
7103
7104   /* Intern this now in case it isn't already done.
7105      Setting this variable twice is harmless.
7106      But don't staticpro it here--that is done in alloc.c.  */
7107   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7108   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7109   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
7110
7111   Qvalid_codes = intern ("valid-codes");
7112   staticpro (&Qvalid_codes);
7113
7114   Qemacs_mule = intern ("emacs-mule");
7115   staticpro (&Qemacs_mule);
7116
7117   Qraw_text = intern ("raw-text");
7118   staticpro (&Qraw_text);
7119
7120   defsubr (&Scoding_system_p);
7121   defsubr (&Sread_coding_system);
7122   defsubr (&Sread_non_nil_coding_system);
7123   defsubr (&Scheck_coding_system);
7124   defsubr (&Sdetect_coding_region);
7125   defsubr (&Sdetect_coding_string);
7126   defsubr (&Sfind_coding_systems_region_internal);
7127   defsubr (&Sdecode_coding_region);
7128   defsubr (&Sencode_coding_region);
7129   defsubr (&Sdecode_coding_string);
7130   defsubr (&Sencode_coding_string);
7131   defsubr (&Sdecode_sjis_char);
7132   defsubr (&Sencode_sjis_char);
7133   defsubr (&Sdecode_big5_char);
7134   defsubr (&Sencode_big5_char);
7135   defsubr (&Sset_terminal_coding_system_internal);
7136   defsubr (&Sset_safe_terminal_coding_system_internal);
7137   defsubr (&Sterminal_coding_system);
7138   defsubr (&Sset_keyboard_coding_system_internal);
7139   defsubr (&Skeyboard_coding_system);
7140   defsubr (&Sfind_operation_coding_system);
7141   defsubr (&Supdate_coding_systems_internal);
7142   defsubr (&Sset_coding_priority_internal);
7143
7144   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7145     "List of coding systems.\n\
7146 \n\
7147 Do not alter the value of this variable manually.  This variable should be\n\
7148 updated by the functions `make-coding-system' and\n\
7149 `define-coding-system-alias'.");
7150   Vcoding_system_list = Qnil;
7151
7152   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7153     "Alist of coding system names.\n\
7154 Each element is one element list of coding system name.\n\
7155 This variable is given to `completing-read' as TABLE argument.\n\
7156 \n\
7157 Do not alter the value of this variable manually.  This variable should be\n\
7158 updated by the functions `make-coding-system' and\n\
7159 `define-coding-system-alias'.");
7160   Vcoding_system_alist = Qnil;
7161
7162   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7163     "List of coding-categories (symbols) ordered by priority.\n\
7164 \n\
7165 On detecting a coding system, Emacs tries code detection algorithms\n\
7166 associated with each coding-category one by one in this order.  When\n\
7167 one algorithm agrees with a byte sequence of source text, the coding\n\
7168 system bound to the corresponding coding-category is selected.");
7169   {
7170     int i;
7171
7172     Vcoding_category_list = Qnil;
7173     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7174       Vcoding_category_list
7175         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7176                  Vcoding_category_list);
7177   }
7178
7179   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7180     "Specify the coding system for read operations.\n\
7181 It is useful to bind this variable with `let', but do not set it globally.\n\
7182 If the value is a coding system, it is used for decoding on read operation.\n\
7183 If not, an appropriate element is used from one of the coding system alists:\n\
7184 There are three such tables, `file-coding-system-alist',\n\
7185 `process-coding-system-alist', and `network-coding-system-alist'.");
7186   Vcoding_system_for_read = Qnil;
7187
7188   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7189     "Specify the coding system for write operations.\n\
7190 Programs bind this variable with `let', but you should not set it globally.\n\
7191 If the value is a coding system, it is used for encoding of output,\n\
7192 when writing it to a file and when sending it to a file or subprocess.\n\
7193 \n\
7194 If this does not specify a coding system, an appropriate element\n\
7195 is used from one of the coding system alists:\n\
7196 There are three such tables, `file-coding-system-alist',\n\
7197 `process-coding-system-alist', and `network-coding-system-alist'.\n\
7198 For output to files, if the above procedure does not specify a coding system,\n\
7199 the value of `buffer-file-coding-system' is used.");
7200   Vcoding_system_for_write = Qnil;
7201
7202   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7203     "Coding system used in the latest file or process I/O.");
7204   Vlast_coding_system_used = Qnil;
7205
7206   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7207     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
7208 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
7209 such conversion.");
7210   inhibit_eol_conversion = 0;
7211
7212   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7213     "Non-nil means process buffer inherits coding system of process output.\n\
7214 Bind it to t if the process output is to be treated as if it were a file\n\
7215 read from some filesystem.");
7216   inherit_process_coding_system = 0;
7217
7218   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7219     "Alist to decide a coding system to use for a file I/O operation.\n\
7220 The format is ((PATTERN . VAL) ...),\n\
7221 where PATTERN is a regular expression matching a file name,\n\
7222 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7223 If VAL is a coding system, it is used for both decoding and encoding\n\
7224 the file contents.\n\
7225 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7226 and the cdr part is used for encoding.\n\
7227 If VAL is a function symbol, the function must return a coding system\n\
7228 or a cons of coding systems which are used as above.\n\
7229 \n\
7230 See also the function `find-operation-coding-system'\n\
7231 and the variable `auto-coding-alist'.");
7232   Vfile_coding_system_alist = Qnil;
7233
7234   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7235     "Alist to decide a coding system to use for a process I/O operation.\n\
7236 The format is ((PATTERN . VAL) ...),\n\
7237 where PATTERN is a regular expression matching a program name,\n\
7238 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7239 If VAL is a coding system, it is used for both decoding what received\n\
7240 from the program and encoding what sent to the program.\n\
7241 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7242 and the cdr part is used for encoding.\n\
7243 If VAL is a function symbol, the function must return a coding system\n\
7244 or a cons of coding systems which are used as above.\n\
7245 \n\
7246 See also the function `find-operation-coding-system'.");
7247   Vprocess_coding_system_alist = Qnil;
7248
7249   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7250     "Alist to decide a coding system to use for a network I/O operation.\n\
7251 The format is ((PATTERN . VAL) ...),\n\
7252 where PATTERN is a regular expression matching a network service name\n\
7253 or is a port number to connect to,\n\
7254 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
7255 If VAL is a coding system, it is used for both decoding what received\n\
7256 from the network stream and encoding what sent to the network stream.\n\
7257 If VAL is a cons of coding systems, the car part is used for decoding,\n\
7258 and the cdr part is used for encoding.\n\
7259 If VAL is a function symbol, the function must return a coding system\n\
7260 or a cons of coding systems which are used as above.\n\
7261 \n\
7262 See also the function `find-operation-coding-system'.");
7263   Vnetwork_coding_system_alist = Qnil;
7264
7265   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7266     "Coding system to use with system messages.");
7267   Vlocale_coding_system = Qnil;
7268
7269   /* The eol mnemonics are reset in startup.el system-dependently.  */
7270   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7271     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
7272   eol_mnemonic_unix = build_string (":");
7273
7274   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7275     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
7276   eol_mnemonic_dos = build_string ("\\");
7277
7278   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7279     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
7280   eol_mnemonic_mac = build_string ("/");
7281
7282   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7283     "*String displayed in mode line when end-of-line format is not yet determined.");
7284   eol_mnemonic_undecided = build_string (":");
7285
7286   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7287     "*Non-nil enables character translation while encoding and decoding.");
7288   Venable_character_translation = Qt;
7289
7290   DEFVAR_LISP ("standard-translation-table-for-decode",
7291     &Vstandard_translation_table_for_decode,
7292     "Table for translating characters while decoding.");
7293   Vstandard_translation_table_for_decode = Qnil;
7294
7295   DEFVAR_LISP ("standard-translation-table-for-encode",
7296     &Vstandard_translation_table_for_encode,
7297     "Table for translating characters while encoding.");
7298   Vstandard_translation_table_for_encode = Qnil;
7299
7300   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7301     "Alist of charsets vs revision numbers.\n\
7302 While encoding, if a charset (car part of an element) is found,\n\
7303 designate it with the escape sequence identifying revision (cdr part of the element).");
7304   Vcharset_revision_alist = Qnil;
7305
7306   DEFVAR_LISP ("default-process-coding-system",
7307                &Vdefault_process_coding_system,
7308     "Cons of coding systems used for process I/O by default.\n\
7309 The car part is used for decoding a process output,\n\
7310 the cdr part is used for encoding a text to be sent to a process.");
7311   Vdefault_process_coding_system = Qnil;
7312
7313   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7314     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
7315 This is a vector of length 256.\n\
7316 If Nth element is non-nil, the existence of code N in a file\n\
7317 \(or output of subprocess) doesn't prevent it to be detected as\n\
7318 a coding system of ISO 2022 variant which has a flag\n\
7319 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
7320 or reading output of a subprocess.\n\
7321 Only 128th through 159th elements has a meaning.");
7322   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7323
7324   DEFVAR_LISP ("select-safe-coding-system-function",
7325                &Vselect_safe_coding_system_function,
7326     "Function to call to select safe coding system for encoding a text.\n\
7327 \n\
7328 If set, this function is called to force a user to select a proper\n\
7329 coding system which can encode the text in the case that a default\n\
7330 coding system used in each operation can't encode the text.\n\
7331 \n\
7332 The default value is `select-safe-coding-system' (which see).");
7333   Vselect_safe_coding_system_function = Qnil;
7334
7335   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7336     "Char-table containing safe coding systems of each characters.\n\
7337 Each element doesn't include such generic coding systems that can\n\
7338 encode any characters.   They are in the first extra slot.");
7339   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7340
7341   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7342                &inhibit_iso_escape_detection,
7343     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
7344 \n\
7345 By default, on reading a file, Emacs tries to detect how the text is\n\
7346 encoded.  This code detection is sensitive to escape sequences.  If\n\
7347 the sequence is valid as ISO2022, the code is determined as one of\n\
7348 the ISO2022 encodings, and the file is decoded by the corresponding\n\
7349 coding system (e.g. `iso-2022-7bit').\n\
7350 \n\
7351 However, there may be a case that you want to read escape sequences in\n\
7352 a file as is.  In such a case, you can set this variable to non-nil.\n\
7353 Then, as the code detection ignores any escape sequences, no file is\n\
7354 detected as encoded in some ISO2022 encoding.  The result is that all\n\
7355 escape sequences become visible in a buffer.\n\
7356 \n\
7357 The default value is nil, and it is strongly recommended not to change\n\
7358 it.  That is because many Emacs Lisp source files that contain\n\
7359 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
7360 in Emacs's distribution, and they won't be decoded correctly on\n\
7361 reading if you suppress escape sequence detection.\n\
7362 \n\
7363 The other way to read escape sequences in a file without decoding is\n\
7364 to explicitly specify some coding system that doesn't use ISO2022's\n\
7365 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
7366   inhibit_iso_escape_detection = 0;
7367 }
7368
7369 char *
7370 emacs_strerror (error_number)
7371      int error_number;
7372 {
7373   char *str;
7374
7375   synchronize_system_messages_locale ();
7376   str = strerror (error_number);
7377
7378   if (! NILP (Vlocale_coding_system))
7379     {
7380       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7381                                                       Vlocale_coding_system,
7382                                                       0);
7383       str = (char *) XSTRING (dec)->data;
7384     }
7385
7386   return str;
7387 }
7388
7389 #endif /* emacs */
7390