1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /*** TABLE OF CONTENTS ***
25 2. Emacs' internal format (emacs-mule) handlers
27 4. Shift-JIS and BIG5 handlers
29 6. End-of-line handlers
30 7. C library functions
31 8. Emacs Lisp library functions
36 /*** GENERAL NOTE on CODING SYSTEM ***
38 Coding system is an encoding mechanism of one or more character
39 sets. Here's a list of coding systems which Emacs can handle. When
40 we say "decode", it means converting some other coding system to
41 Emacs' internal format (emacs-internal), and when we say "encode",
42 it means converting the coding system emacs-mule to some other
45 0. Emacs' internal format (emacs-mule)
47 Emacs itself holds a multi-lingual character in a buffer and a string
48 in a special format. Details are described in section 2.
52 The most famous coding system for multiple character sets. X's
53 Compound Text, various EUCs (Extended Unix Code), and coding
54 systems used in Internet communication such as ISO-2022-JP are
55 all variants of ISO2022. Details are described in section 3.
57 2. SJIS (or Shift-JIS or MS-Kanji-Code)
59 A coding system to encode character sets: ASCII, JISX0201, and
60 JISX0208. Widely used for PC's in Japan. Details are described in
65 A coding system to encode character sets: ASCII and Big5. Widely
66 used by Chinese (mainly in Taiwan and Hong Kong). Details are
67 described in section 4. In this file, when we write "BIG5"
68 (all uppercase), we mean the coding system, and when we write
69 "Big5" (capitalized), we mean the character set.
73 A coding system for a text containing random 8-bit code. Emacs does
74 no code conversion on such a text except for end-of-line format.
78 If a user wants to read/write a text encoded in a coding system not
79 listed above, he can supply a decoder and an encoder for it in CCL
80 (Code Conversion Language) programs. Emacs executes the CCL program
81 while reading/writing.
83 Emacs represents a coding system by a Lisp symbol that has a property
84 `coding-system'. But, before actually using the coding system, the
85 information about it is set in a structure of type `struct
86 coding_system' for rapid processing. See section 6 for more details.
90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
92 How end-of-line of a text is encoded depends on a system. For
93 instance, Unix's format is just one byte of `line-feed' code,
94 whereas DOS's format is two-byte sequence of `carriage-return' and
95 `line-feed' codes. MacOS's format is usually one byte of
98 Since text characters encoding and end-of-line encoding are
99 independent, any coding system described above can take
100 any format of end-of-line. So, Emacs has information of format of
101 end-of-line in each coding-system. See section 6 for more details.
105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
107 These functions check if a text between SRC and SRC_END is encoded
108 in the coding system category XXX. Each returns an integer value in
109 which appropriate flag bits for the category XXX is set. The flag
110 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
111 template of these functions. */
114 detect_coding_emacs_mule (src
, src_end
)
115 unsigned char *src
, *src_end
;
121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
123 These functions decode SRC_BYTES length text at SOURCE encoded in
124 CODING to Emacs' internal format (emacs-mule). The resulting text
125 goes to a place pointed to by DESTINATION, the length of which
126 should not exceed DST_BYTES. These functions set the information of
127 original and decoded texts in the members produced, produced_char,
128 consumed, and consumed_char of the structure *CODING.
130 The return value is an integer (CODING_FINISH_XXX) indicating how
131 the decoding finished.
133 DST_BYTES zero means that source area and destination area are
134 overlapped, which means that we can produce a decoded text until it
135 reaches at the head of not-yet-decoded source text.
137 Below is a template of these functions. */
139 decode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
140 struct coding_system
*coding
;
141 unsigned char *source
, *destination
;
142 int src_bytes
, dst_bytes
;
148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
150 These functions encode SRC_BYTES length text at SOURCE of Emacs'
151 internal format (emacs-mule) to CODING. The resulting text goes to
152 a place pointed to by DESTINATION, the length of which should not
153 exceed DST_BYTES. These functions set the information of
154 original and encoded texts in the members produced, produced_char,
155 consumed, and consumed_char of the structure *CODING.
157 The return value is an integer (CODING_FINISH_XXX) indicating how
158 the encoding finished.
160 DST_BYTES zero means that source area and destination area are
161 overlapped, which means that we can produce a decoded text until it
162 reaches at the head of not-yet-decoded source text.
164 Below is a template of these functions. */
166 encode_coding_XXX (coding
, source
, destination
, src_bytes
, dst_bytes
)
167 struct coding_system
*coding
;
168 unsigned char *source
, *destination
;
169 int src_bytes
, dst_bytes
;
175 /*** COMMONLY USED MACROS ***/
177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
178 THREE_MORE_BYTES safely get one, two, and three bytes from the
179 source text respectively. If there are not enough bytes in the
180 source, they jump to `label_end_of_loop'. The caller should set
181 variables `src' and `src_end' to appropriate areas in advance. */
183 #define ONE_MORE_BYTE(c1) \
188 goto label_end_of_loop; \
191 #define TWO_MORE_BYTES(c1, c2) \
193 if (src + 1 < src_end) \
194 c1 = *src++, c2 = *src++; \
196 goto label_end_of_loop; \
199 #define THREE_MORE_BYTES(c1, c2, c3) \
201 if (src + 2 < src_end) \
202 c1 = *src++, c2 = *src++, c3 = *src++; \
204 goto label_end_of_loop; \
207 /* The following three macros DECODE_CHARACTER_ASCII,
208 DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
209 the multi-byte form of a character of each class at the place
210 pointed by `dst'. The caller should set the variable `dst' to
211 point to an appropriate area and the variable `coding' to point to
212 the coding-system of the currently decoding text in advance. */
214 /* Decode one ASCII character C. */
216 #define DECODE_CHARACTER_ASCII(c) \
218 *dst++ = (c) & 0x7F; \
219 coding->produced_char++; \
222 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
223 position-code is C. */
225 #define DECODE_CHARACTER_DIMENSION1(charset, c) \
227 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
229 *dst++ = leading_code; \
230 if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \
231 *dst++ = leading_code; \
232 *dst++ = (c) | 0x80; \
233 coding->produced_char++; \
236 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
237 position-codes are C1 and C2. */
239 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \
241 DECODE_CHARACTER_DIMENSION1 (charset, c1); \
242 *dst++ = (c2) | 0x80; \
246 /*** 1. Preamble ***/
259 #include "composite.h"
264 #else /* not emacs */
268 #endif /* not emacs */
270 Lisp_Object Qcoding_system
, Qeol_type
;
271 Lisp_Object Qbuffer_file_coding_system
;
272 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
273 Lisp_Object Qno_conversion
, Qundecided
;
274 Lisp_Object Qcoding_system_history
;
275 Lisp_Object Qsafe_charsets
;
276 Lisp_Object Qvalid_codes
;
278 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
279 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
280 Lisp_Object Qstart_process
, Qopen_network_stream
;
281 Lisp_Object Qtarget_idx
;
283 Lisp_Object Vselect_safe_coding_system_function
;
285 /* Mnemonic string for each format of end-of-line. */
286 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
287 /* Mnemonic string to indicate format of end-of-line is not yet
289 Lisp_Object eol_mnemonic_undecided
;
291 /* Format of end-of-line decided by system. This is CODING_EOL_LF on
292 Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac. */
297 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
299 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
301 /* Coding system emacs-mule and raw-text are for converting only
302 end-of-line format. */
303 Lisp_Object Qemacs_mule
, Qraw_text
;
305 /* Coding-systems are handed between Emacs Lisp programs and C internal
306 routines by the following three variables. */
307 /* Coding-system for reading files and receiving data from process. */
308 Lisp_Object Vcoding_system_for_read
;
309 /* Coding-system for writing files and sending data to process. */
310 Lisp_Object Vcoding_system_for_write
;
311 /* Coding-system actually used in the latest I/O. */
312 Lisp_Object Vlast_coding_system_used
;
314 /* A vector of length 256 which contains information about special
315 Latin codes (especially for dealing with Microsoft codes). */
316 Lisp_Object Vlatin_extra_code_table
;
318 /* Flag to inhibit code conversion of end-of-line format. */
319 int inhibit_eol_conversion
;
321 /* Flag to make buffer-file-coding-system inherit from process-coding. */
322 int inherit_process_coding_system
;
324 /* Coding system to be used to encode text for terminal display. */
325 struct coding_system terminal_coding
;
327 /* Coding system to be used to encode text for terminal display when
328 terminal coding system is nil. */
329 struct coding_system safe_terminal_coding
;
331 /* Coding system of what is sent from terminal keyboard. */
332 struct coding_system keyboard_coding
;
334 /* Default coding system to be used to write a file. */
335 struct coding_system default_buffer_file_coding
;
337 Lisp_Object Vfile_coding_system_alist
;
338 Lisp_Object Vprocess_coding_system_alist
;
339 Lisp_Object Vnetwork_coding_system_alist
;
341 Lisp_Object Vlocale_coding_system
;
345 Lisp_Object Qcoding_category
, Qcoding_category_index
;
347 /* List of symbols `coding-category-xxx' ordered by priority. */
348 Lisp_Object Vcoding_category_list
;
350 /* Table of coding categories (Lisp symbols). */
351 Lisp_Object Vcoding_category_table
;
353 /* Table of names of symbol for each coding-category. */
354 char *coding_category_name
[CODING_CATEGORY_IDX_MAX
] = {
355 "coding-category-emacs-mule",
356 "coding-category-sjis",
357 "coding-category-iso-7",
358 "coding-category-iso-7-tight",
359 "coding-category-iso-8-1",
360 "coding-category-iso-8-2",
361 "coding-category-iso-7-else",
362 "coding-category-iso-8-else",
363 "coding-category-ccl",
364 "coding-category-big5",
365 "coding-category-utf-8",
366 "coding-category-utf-16-be",
367 "coding-category-utf-16-le",
368 "coding-category-raw-text",
369 "coding-category-binary"
372 /* Table of pointers to coding systems corresponding to each coding
374 struct coding_system
*coding_system_table
[CODING_CATEGORY_IDX_MAX
];
376 /* Table of coding category masks. Nth element is a mask for a coding
377 cateogry of which priority is Nth. */
379 int coding_priorities
[CODING_CATEGORY_IDX_MAX
];
381 /* Flag to tell if we look up translation table on character code
383 Lisp_Object Venable_character_translation
;
384 /* Standard translation table to look up on decoding (reading). */
385 Lisp_Object Vstandard_translation_table_for_decode
;
386 /* Standard translation table to look up on encoding (writing). */
387 Lisp_Object Vstandard_translation_table_for_encode
;
389 Lisp_Object Qtranslation_table
;
390 Lisp_Object Qtranslation_table_id
;
391 Lisp_Object Qtranslation_table_for_decode
;
392 Lisp_Object Qtranslation_table_for_encode
;
394 /* Alist of charsets vs revision number. */
395 Lisp_Object Vcharset_revision_alist
;
397 /* Default coding systems used for process I/O. */
398 Lisp_Object Vdefault_process_coding_system
;
400 /* Global flag to tell that we can't call post-read-conversion and
401 pre-write-conversion functions. Usually the value is zero, but it
402 is set to 1 temporarily while such functions are running. This is
403 to avoid infinite recursive call. */
404 static int inhibit_pre_post_conversion
;
407 /*** 2. Emacs internal format (emacs-mule) handlers ***/
409 /* Emacs' internal format for encoding multiple character sets is a
410 kind of multi-byte encoding, i.e. characters are encoded by
411 variable-length sequences of one-byte codes. ASCII characters
412 and control characters (e.g. `tab', `newline') are represented by
413 one-byte sequences which are their ASCII codes, in the range 0x00
414 through 0x7F. The other characters are represented by a sequence
415 of `base leading-code', optional `extended leading-code', and one
416 or two `position-code's. The length of the sequence is determined
417 by the base leading-code. Leading-code takes the range 0x80
418 through 0x9F, whereas extended leading-code and position-code take
419 the range 0xA0 through 0xFF. See `charset.h' for more details
420 about leading-code and position-code.
422 --- CODE RANGE of Emacs' internal format ---
423 (character set) (range)
425 ELSE (1st byte) 0x81 .. 0x9F
426 (rest bytes) 0xA0 .. 0xFF
427 ---------------------------------------------
431 enum emacs_code_class_type emacs_code_class
[256];
433 /* Go to the next statement only if *SRC is accessible and the code is
434 greater than 0xA0. */
435 #define CHECK_CODE_RANGE_A0_FF \
437 if (src >= src_end) \
438 goto label_end_of_switch; \
439 else if (*src++ < 0xA0) \
443 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
444 Check if a text is encoded in Emacs' internal format. If it is,
445 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
448 detect_coding_emacs_mule (src
, src_end
)
449 unsigned char *src
, *src_end
;
454 while (src
< src_end
)
466 switch (emacs_code_class
[c
])
468 case EMACS_ascii_code
:
469 case EMACS_linefeed_code
:
472 case EMACS_control_code
:
473 if (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
)
477 case EMACS_invalid_code
:
480 case EMACS_leading_code_4
:
481 CHECK_CODE_RANGE_A0_FF
;
482 /* fall down to check it two more times ... */
484 case EMACS_leading_code_3
:
485 CHECK_CODE_RANGE_A0_FF
;
486 /* fall down to check it one more time ... */
488 case EMACS_leading_code_2
:
489 CHECK_CODE_RANGE_A0_FF
;
492 case 0x80: /* Old leading code for a composite character. */
494 CHECK_CODE_RANGE_A0_FF
;
504 return CODING_CATEGORY_MASK_EMACS_MULE
;
508 /*** 3. ISO2022 handlers ***/
510 /* The following note describes the coding system ISO2022 briefly.
511 Since the intention of this note is to help understand the
512 functions in this file, some parts are NOT ACCURATE or OVERLY
513 SIMPLIFIED. For thorough understanding, please refer to the
514 original document of ISO2022.
516 ISO2022 provides many mechanisms to encode several character sets
517 in 7-bit and 8-bit environments. For 7-bite environments, all text
518 is encoded using bytes less than 128. This may make the encoded
519 text a little bit longer, but the text passes more easily through
520 several gateways, some of which strip off MSB (Most Signigant Bit).
522 There are two kinds of character sets: control character set and
523 graphic character set. The former contains control characters such
524 as `newline' and `escape' to provide control functions (control
525 functions are also provided by escape sequences). The latter
526 contains graphic characters such as 'A' and '-'. Emacs recognizes
527 two control character sets and many graphic character sets.
529 Graphic character sets are classified into one of the following
530 four classes, according to the number of bytes (DIMENSION) and
531 number of characters in one dimension (CHARS) of the set:
537 In addition, each character set is assigned an identification tag,
538 unique for each set, called "final character" (denoted as <F>
539 hereafter). The <F> of each character set is decided by ECMA(*)
540 when it is registered in ISO. The code range of <F> is 0x30..0x7F
541 (0x30..0x3F are for private use only).
543 Note (*): ECMA = European Computer Manufacturers Association
545 Here are examples of graphic character set [NAME(<F>)]:
546 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
547 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
548 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
549 o DIMENSION2_CHARS96 -- none for the moment
551 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
552 C0 [0x00..0x1F] -- control character plane 0
553 GL [0x20..0x7F] -- graphic character plane 0
554 C1 [0x80..0x9F] -- control character plane 1
555 GR [0xA0..0xFF] -- graphic character plane 1
557 A control character set is directly designated and invoked to C0 or
558 C1 by an escape sequence. The most common case is that:
559 - ISO646's control character set is designated/invoked to C0, and
560 - ISO6429's control character set is designated/invoked to C1,
561 and usually these designations/invocations are omitted in encoded
562 text. In a 7-bit environment, only C0 can be used, and a control
563 character for C1 is encoded by an appropriate escape sequence to
564 fit into the environment. All control characters for C1 are
565 defined to have corresponding escape sequences.
567 A graphic character set is at first designated to one of four
568 graphic registers (G0 through G3), then these graphic registers are
569 invoked to GL or GR. These designations and invocations can be
570 done independently. The most common case is that G0 is invoked to
571 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
572 these invocations and designations are omitted in encoded text.
573 In a 7-bit environment, only GL can be used.
575 When a graphic character set of CHARS94 is invoked to GL, codes
576 0x20 and 0x7F of the GL area work as control characters SPACE and
577 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
580 There are two ways of invocation: locking-shift and single-shift.
581 With locking-shift, the invocation lasts until the next different
582 invocation, whereas with single-shift, the invocation affects the
583 following character only and doesn't affect the locking-shift
584 state. Invocations are done by the following control characters or
587 ----------------------------------------------------------------------
588 abbrev function cntrl escape seq description
589 ----------------------------------------------------------------------
590 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
591 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
592 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
593 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
594 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
595 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
596 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
597 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
598 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
599 ----------------------------------------------------------------------
600 (*) These are not used by any known coding system.
602 Control characters for these functions are defined by macros
603 ISO_CODE_XXX in `coding.h'.
605 Designations are done by the following escape sequences:
606 ----------------------------------------------------------------------
607 escape sequence description
608 ----------------------------------------------------------------------
609 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
610 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
611 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
612 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
613 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
614 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
615 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
616 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
617 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
618 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
619 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
620 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
621 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
622 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
623 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
624 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
625 ----------------------------------------------------------------------
627 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
628 of dimension 1, chars 94, and final character <F>, etc...
630 Note (*): Although these designations are not allowed in ISO2022,
631 Emacs accepts them on decoding, and produces them on encoding
632 CHARS96 character sets in a coding system which is characterized as
633 7-bit environment, non-locking-shift, and non-single-shift.
635 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
636 '(' can be omitted. We refer to this as "short-form" hereafter.
638 Now you may notice that there are a lot of ways for encoding the
639 same multilingual text in ISO2022. Actually, there exist many
640 coding systems such as Compound Text (used in X11's inter client
641 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
642 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
643 localized platforms), and all of these are variants of ISO2022.
645 In addition to the above, Emacs handles two more kinds of escape
646 sequences: ISO6429's direction specification and Emacs' private
647 sequence for specifying character composition.
649 ISO6429's direction specification takes the following form:
650 o CSI ']' -- end of the current direction
651 o CSI '0' ']' -- end of the current direction
652 o CSI '1' ']' -- start of left-to-right text
653 o CSI '2' ']' -- start of right-to-left text
654 The control character CSI (0x9B: control sequence introducer) is
655 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
657 Character composition specification takes the following form:
658 o ESC '0' -- start relative composition
659 o ESC '1' -- end composition
660 o ESC '2' -- start rule-base composition (*)
661 o ESC '3' -- start relative composition with alternate chars (**)
662 o ESC '4' -- start rule-base composition with alternate chars (**)
663 Since these are not standard escape sequences of any ISO standard,
664 the use of them for these meaning is restricted to Emacs only.
666 (*) This form is used only in Emacs 20.5 and the older versions,
667 but the newer versions can safely decode it.
668 (**) This form is used only in Emacs 21.1 and the newer versions,
669 and the older versions can't decode it.
671 Here's a list of examples usages of these composition escape
672 sequences (categorized by `enum composition_method').
674 COMPOSITION_RELATIVE:
675 ESC 0 CHAR [ CHAR ] ESC 1
676 COMPOSITOIN_WITH_RULE:
677 ESC 2 CHAR [ RULE CHAR ] ESC 1
678 COMPOSITION_WITH_ALTCHARS:
679 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
680 COMPOSITION_WITH_RULE_ALTCHARS:
681 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
683 enum iso_code_class_type iso_code_class
[256];
685 #define CHARSET_OK(idx, charset) \
686 (coding_system_table[idx] \
687 && (coding_system_table[idx]->safe_charsets[charset] \
688 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
689 (coding_system_table[idx], charset) \
690 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
692 #define SHIFT_OUT_OK(idx) \
693 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
695 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
696 Check if a text is encoded in ISO2022. If it is, returns an
697 integer in which appropriate flag bits any of:
698 CODING_CATEGORY_MASK_ISO_7
699 CODING_CATEGORY_MASK_ISO_7_TIGHT
700 CODING_CATEGORY_MASK_ISO_8_1
701 CODING_CATEGORY_MASK_ISO_8_2
702 CODING_CATEGORY_MASK_ISO_7_ELSE
703 CODING_CATEGORY_MASK_ISO_8_ELSE
704 are set. If a code which should never appear in ISO2022 is found,
708 detect_coding_iso2022 (src
, src_end
)
709 unsigned char *src
, *src_end
;
711 int mask
= CODING_CATEGORY_MASK_ISO
;
713 int reg
[4], shift_out
= 0, single_shifting
= 0;
714 int c
, c1
, i
, charset
;
716 reg
[0] = CHARSET_ASCII
, reg
[1] = reg
[2] = reg
[3] = -1;
717 while (mask
&& src
< src_end
)
727 if (c
>= '(' && c
<= '/')
729 /* Designation sequence for a charset of dimension 1. */
733 if (c1
< ' ' || c1
>= 0x80
734 || (charset
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
735 /* Invalid designation sequence. Just ignore. */
737 reg
[(c
- '(') % 4] = charset
;
741 /* Designation sequence for a charset of dimension 2. */
745 if (c
>= '@' && c
<= 'B')
746 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
747 reg
[0] = charset
= iso_charset_table
[1][0][c
];
748 else if (c
>= '(' && c
<= '/')
753 if (c1
< ' ' || c1
>= 0x80
754 || (charset
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
755 /* Invalid designation sequence. Just ignore. */
757 reg
[(c
- '(') % 4] = charset
;
760 /* Invalid designation sequence. Just ignore. */
763 else if (c
== 'N' || c
== 'O')
765 /* ESC <Fe> for SS2 or SS3. */
766 mask
&= CODING_CATEGORY_MASK_ISO_7_ELSE
;
769 else if (c
>= '0' && c
<= '4')
771 /* ESC <Fp> for start/end composition. */
772 mask_found
|= CODING_CATEGORY_MASK_ISO
;
776 /* Invalid escape sequence. Just ignore. */
779 /* We found a valid designation sequence for CHARSET. */
780 mask
&= ~CODING_CATEGORY_MASK_ISO_8BIT
;
781 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7
, charset
))
782 mask_found
|= CODING_CATEGORY_MASK_ISO_7
;
784 mask
&= ~CODING_CATEGORY_MASK_ISO_7
;
785 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT
, charset
))
786 mask_found
|= CODING_CATEGORY_MASK_ISO_7_TIGHT
;
788 mask
&= ~CODING_CATEGORY_MASK_ISO_7_TIGHT
;
789 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
, charset
))
790 mask_found
|= CODING_CATEGORY_MASK_ISO_7_ELSE
;
792 mask
&= ~CODING_CATEGORY_MASK_ISO_7_ELSE
;
793 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
, charset
))
794 mask_found
|= CODING_CATEGORY_MASK_ISO_8_ELSE
;
796 mask
&= ~CODING_CATEGORY_MASK_ISO_8_ELSE
;
803 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE
)
804 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE
)))
806 /* Locking shift out. */
807 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
808 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
816 /* Locking shift in. */
817 mask
&= ~CODING_CATEGORY_MASK_ISO_7BIT
;
818 mask_found
|= CODING_CATEGORY_MASK_ISO_SHIFT
;
827 int newmask
= CODING_CATEGORY_MASK_ISO_8_ELSE
;
829 if (c
!= ISO_CODE_CSI
)
831 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
832 & CODING_FLAG_ISO_SINGLE_SHIFT
)
833 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
834 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
835 & CODING_FLAG_ISO_SINGLE_SHIFT
)
836 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
839 if (VECTORP (Vlatin_extra_code_table
)
840 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
842 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
843 & CODING_FLAG_ISO_LATIN_EXTRA
)
844 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
845 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
846 & CODING_FLAG_ISO_LATIN_EXTRA
)
847 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
850 mask_found
|= newmask
;
863 if (VECTORP (Vlatin_extra_code_table
)
864 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
868 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_1
]->flags
869 & CODING_FLAG_ISO_LATIN_EXTRA
)
870 newmask
|= CODING_CATEGORY_MASK_ISO_8_1
;
871 if (coding_system_table
[CODING_CATEGORY_IDX_ISO_8_2
]->flags
872 & CODING_FLAG_ISO_LATIN_EXTRA
)
873 newmask
|= CODING_CATEGORY_MASK_ISO_8_2
;
875 mask_found
|= newmask
;
882 unsigned char *src_begin
= src
;
884 mask
&= ~(CODING_CATEGORY_MASK_ISO_7BIT
885 | CODING_CATEGORY_MASK_ISO_7_ELSE
);
886 mask_found
|= CODING_CATEGORY_MASK_ISO_8_1
;
887 /* Check the length of succeeding codes of the range
888 0xA0..0FF. If the byte length is odd, we exclude
889 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
890 when we are not single shifting. */
891 if (!single_shifting
)
893 while (src
< src_end
&& *src
>= 0xA0)
895 if ((src
- src_begin
- 1) & 1 && src
< src_end
)
896 mask
&= ~CODING_CATEGORY_MASK_ISO_8_2
;
898 mask_found
|= CODING_CATEGORY_MASK_ISO_8_2
;
905 return (mask
& mask_found
);
908 /* Decode a character of which charset is CHARSET and the 1st position
909 code is C1. If dimension of CHARSET is 2, the 2nd position code is
910 fetched from SRC and set to C2. If CHARSET is negative, it means
911 that we are decoding ill formed text, and what we can do is just to
914 If we are now in the middle of composition sequence, the decoded
915 character may be ALTCHAR (see the comment above). In that case,
916 the character goes to coding->cmp_data->data instead of DST. */
918 #define DECODE_ISO_CHARACTER(charset, c1) \
920 int c_alt = -1, charset_alt = (charset); \
921 if (charset_alt >= 0) \
923 if (CHARSET_DIMENSION (charset_alt) == 2) \
925 ONE_MORE_BYTE (c2); \
926 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
927 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
930 charset_alt = CHARSET_ASCII; \
933 if (!NILP (translation_table) \
934 && ((c_alt = translate_char (translation_table, \
935 -1, charset_alt, c1, c2)) >= 0)) \
936 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
938 if (! COMPOSING_P (coding) \
939 || coding->composing == COMPOSITION_RELATIVE \
940 || coding->composing == COMPOSITION_WITH_RULE) \
942 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
943 DECODE_CHARACTER_ASCII (c1); \
944 else if (CHARSET_DIMENSION (charset_alt) == 1) \
945 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
947 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
949 if (COMPOSING_P (coding) \
950 && coding->composing != COMPOSITION_RELATIVE) \
953 c_alt = MAKE_CHAR (charset_alt, c1, c2); \
954 CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \
955 coding->composition_rule_follows \
956 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
960 /* Set designation state into CODING. */
961 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
965 if (final_char < '0' || final_char >= 128) \
966 goto label_invalid_code; \
967 charset = ISO_CHARSET_TABLE (make_number (dimension), \
968 make_number (chars), \
969 make_number (final_char)); \
971 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
972 || coding->safe_charsets[charset])) \
974 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
976 && charset == CHARSET_ASCII) \
978 /* We should insert this designation sequence as is so \
979 that it is surely written back to a file. */ \
980 coding->spec.iso2022.last_invalid_designation_register = -1; \
981 goto label_invalid_code; \
983 coding->spec.iso2022.last_invalid_designation_register = -1; \
984 if ((coding->mode & CODING_MODE_DIRECTION) \
985 && CHARSET_REVERSE_CHARSET (charset) >= 0) \
986 charset = CHARSET_REVERSE_CHARSET (charset); \
987 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
991 coding->spec.iso2022.last_invalid_designation_register = reg; \
992 goto label_invalid_code; \
996 /* Allocate a memory block for storing information about compositions.
997 The block is chained to the already allocated blocks. */
1000 coding_allocate_composition_data (coding
, char_offset
)
1001 struct coding_system
*coding
;
1004 struct composition_data
*cmp_data
1005 = (struct composition_data
*) xmalloc (sizeof *cmp_data
);
1007 cmp_data
->char_offset
= char_offset
;
1009 cmp_data
->prev
= coding
->cmp_data
;
1010 cmp_data
->next
= NULL
;
1011 if (coding
->cmp_data
)
1012 coding
->cmp_data
->next
= cmp_data
;
1013 coding
->cmp_data
= cmp_data
;
1014 coding
->cmp_data_start
= 0;
1017 /* Record the starting position START and METHOD of one composition. */
1019 #define CODING_ADD_COMPOSITION_START(coding, start, method) \
1021 struct composition_data *cmp_data = coding->cmp_data; \
1022 int *data = cmp_data->data + cmp_data->used; \
1023 coding->cmp_data_start = cmp_data->used; \
1025 data[1] = cmp_data->char_offset + start; \
1026 data[3] = (int) method; \
1027 cmp_data->used += 4; \
1030 /* Record the ending position END of the current composition. */
1032 #define CODING_ADD_COMPOSITION_END(coding, end) \
1034 struct composition_data *cmp_data = coding->cmp_data; \
1035 int *data = cmp_data->data + coding->cmp_data_start; \
1036 data[0] = cmp_data->used - coding->cmp_data_start; \
1037 data[2] = cmp_data->char_offset + end; \
1040 /* Record one COMPONENT (alternate character or composition rule). */
1042 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
1043 (coding->cmp_data->data[coding->cmp_data->used++] = component)
1045 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
1047 #define DECODE_COMPOSITION_START(c1) \
1049 if (coding->composing == COMPOSITION_DISABLED) \
1051 *dst++ = ISO_CODE_ESC; \
1052 *dst++ = c1 & 0x7f; \
1053 coding->produced_char += 2; \
1055 else if (!COMPOSING_P (coding)) \
1057 /* This is surely the start of a composition. We must be sure \
1058 that coding->cmp_data has enough space to store the \
1059 information about the composition. If not, terminate the \
1060 current decoding loop, allocate one more memory block for \
1061 coding->cmp_data in the calller, then start the decoding \
1062 loop again. We can't allocate memory here directly because \
1063 it may cause buffer/string relocation. */ \
1064 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1065 >= COMPOSITION_DATA_SIZE) \
1067 result = CODING_FINISH_INSUFFICIENT_CMP; \
1068 goto label_end_of_loop_2; \
1070 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1071 : c1 == '2' ? COMPOSITION_WITH_RULE \
1072 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1073 : COMPOSITION_WITH_RULE_ALTCHARS); \
1074 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1075 coding->composing); \
1076 coding->composition_rule_follows = 0; \
1080 /* We are already handling a composition. If the method is \
1081 the following two, the codes following the current escape \
1082 sequence are actual characters stored in a buffer. */ \
1083 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1084 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1086 coding->composing = COMPOSITION_RELATIVE; \
1087 coding->composition_rule_follows = 0; \
1092 /* Handle compositoin end sequence ESC 1. */
1094 #define DECODE_COMPOSITION_END(c1) \
1096 if (coding->composing == COMPOSITION_DISABLED) \
1098 *dst++ = ISO_CODE_ESC; \
1100 coding->produced_char += 2; \
1104 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1105 coding->composing = COMPOSITION_NO; \
1109 /* Decode a composition rule from the byte C1 (and maybe one more byte
1110 from SRC) and store one encoded composition rule in
1111 coding->cmp_data. */
1113 #define DECODE_COMPOSITION_RULE(c1) \
1117 if (c1 < 81) /* old format (before ver.21) */ \
1119 int gref = (c1) / 9; \
1120 int nref = (c1) % 9; \
1121 if (gref == 4) gref = 10; \
1122 if (nref == 4) nref = 10; \
1123 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1125 else if (c1 < 93) /* new format (after ver.21 */ \
1127 ONE_MORE_BYTE (c2); \
1128 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1130 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1131 coding->composition_rule_follows = 0; \
1135 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1138 decode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1139 struct coding_system
*coding
;
1140 unsigned char *source
, *destination
;
1141 int src_bytes
, dst_bytes
;
1143 unsigned char *src
= source
;
1144 unsigned char *src_end
= source
+ src_bytes
;
1145 unsigned char *dst
= destination
;
1146 unsigned char *dst_end
= destination
+ dst_bytes
;
1147 /* Since the maximum bytes produced by each loop is 7, we subtract 6
1148 from DST_END to assure that overflow checking is necessary only
1149 at the head of loop. */
1150 unsigned char *adjusted_dst_end
= dst_end
- 6;
1152 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1153 int charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1154 int charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1155 Lisp_Object translation_table
1156 = coding
->translation_table_for_decode
;
1157 int result
= CODING_FINISH_NORMAL
;
1159 if (!NILP (Venable_character_translation
) && NILP (translation_table
))
1160 translation_table
= Vstandard_translation_table_for_decode
;
1162 coding
->produced_char
= 0;
1163 coding
->fake_multibyte
= 0;
1164 while (src
< src_end
&& (dst_bytes
1165 ? (dst
< adjusted_dst_end
)
1168 /* SRC_BASE remembers the start position in source in each loop.
1169 The loop will be exited when there's not enough source text
1170 to analyze long escape sequence or 2-byte code (within macros
1171 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
1172 to SRC_BASE before exiting. */
1173 unsigned char *src_base
= src
;
1174 int c1
= *src
++, c2
;
1176 /* We produce no character or one character. */
1177 switch (iso_code_class
[c1
])
1179 case ISO_0x20_or_0x7F
:
1180 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1182 DECODE_COMPOSITION_RULE (c1
);
1185 if (charset0
< 0 || CHARSET_CHARS (charset0
) == 94)
1187 /* This is SPACE or DEL. */
1189 coding
->produced_char
++;
1192 /* This is a graphic character, we fall down ... */
1194 case ISO_graphic_plane_0
:
1195 if (COMPOSING_P (coding
) && coding
->composition_rule_follows
)
1196 DECODE_COMPOSITION_RULE (c1
);
1198 DECODE_ISO_CHARACTER (charset0
, c1
);
1201 case ISO_0xA0_or_0xFF
:
1202 if (charset1
< 0 || CHARSET_CHARS (charset1
) == 94
1203 || coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
1204 goto label_invalid_code
;
1205 /* This is a graphic character, we fall down ... */
1207 case ISO_graphic_plane_1
:
1208 if (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
1209 goto label_invalid_code
;
1210 DECODE_ISO_CHARACTER (charset1
, c1
);
1213 case ISO_control_code
:
1214 if (COMPOSING_P (coding
))
1215 DECODE_COMPOSITION_END ('1');
1217 /* All ISO2022 control characters in this class have the
1218 same representation in Emacs internal format. */
1220 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1221 && (coding
->eol_type
== CODING_EOL_CR
1222 || coding
->eol_type
== CODING_EOL_CRLF
))
1224 result
= CODING_FINISH_INCONSISTENT_EOL
;
1225 goto label_end_of_loop_2
;
1228 coding
->produced_char
++;
1231 case ISO_carriage_return
:
1232 if (COMPOSING_P (coding
))
1233 DECODE_COMPOSITION_END ('1');
1235 if (coding
->eol_type
== CODING_EOL_CR
)
1237 else if (coding
->eol_type
== CODING_EOL_CRLF
)
1240 if (c1
== ISO_CODE_LF
)
1244 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
1246 result
= CODING_FINISH_INCONSISTENT_EOL
;
1247 goto label_end_of_loop_2
;
1255 coding
->produced_char
++;
1259 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1260 || CODING_SPEC_ISO_DESIGNATION (coding
, 1) < 0)
1261 goto label_invalid_code
;
1262 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 1;
1263 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1267 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
1268 goto label_invalid_code
;
1269 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
1270 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1273 case ISO_single_shift_2_7
:
1274 case ISO_single_shift_2
:
1275 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1276 goto label_invalid_code
;
1277 /* SS2 is handled as an escape sequence of ESC 'N' */
1279 goto label_escape_sequence
;
1281 case ISO_single_shift_3
:
1282 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
1283 goto label_invalid_code
;
1284 /* SS2 is handled as an escape sequence of ESC 'O' */
1286 goto label_escape_sequence
;
1288 case ISO_control_sequence_introducer
:
1289 /* CSI is handled as an escape sequence of ESC '[' ... */
1291 goto label_escape_sequence
;
1295 label_escape_sequence
:
1296 /* Escape sequences handled by Emacs are invocation,
1297 designation, direction specification, and character
1298 composition specification. */
1301 case '&': /* revision of following character set */
1303 if (!(c1
>= '@' && c1
<= '~'))
1304 goto label_invalid_code
;
1306 if (c1
!= ISO_CODE_ESC
)
1307 goto label_invalid_code
;
1309 goto label_escape_sequence
;
1311 case '$': /* designation of 2-byte character set */
1312 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1313 goto label_invalid_code
;
1315 if (c1
>= '@' && c1
<= 'B')
1316 { /* designation of JISX0208.1978, GB2312.1980,
1318 DECODE_DESIGNATION (0, 2, 94, c1
);
1320 else if (c1
>= 0x28 && c1
<= 0x2B)
1321 { /* designation of DIMENSION2_CHARS94 character set */
1323 DECODE_DESIGNATION (c1
- 0x28, 2, 94, c2
);
1325 else if (c1
>= 0x2C && c1
<= 0x2F)
1326 { /* designation of DIMENSION2_CHARS96 character set */
1328 DECODE_DESIGNATION (c1
- 0x2C, 2, 96, c2
);
1331 goto label_invalid_code
;
1334 case 'n': /* invocation of locking-shift-2 */
1335 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1336 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1337 goto label_invalid_code
;
1338 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 2;
1339 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1342 case 'o': /* invocation of locking-shift-3 */
1343 if (! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
)
1344 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1345 goto label_invalid_code
;
1346 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 3;
1347 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1350 case 'N': /* invocation of single-shift-2 */
1351 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1352 || CODING_SPEC_ISO_DESIGNATION (coding
, 2) < 0)
1353 goto label_invalid_code
;
1355 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 2);
1356 DECODE_ISO_CHARACTER (charset
, c1
);
1359 case 'O': /* invocation of single-shift-3 */
1360 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1361 || CODING_SPEC_ISO_DESIGNATION (coding
, 3) < 0)
1362 goto label_invalid_code
;
1364 charset
= CODING_SPEC_ISO_DESIGNATION (coding
, 3);
1365 DECODE_ISO_CHARACTER (charset
, c1
);
1368 case '0': case '2': case '3': case '4': /* start composition */
1369 DECODE_COMPOSITION_START (c1
);
1372 case '1': /* end composition */
1373 DECODE_COMPOSITION_END (c1
);
1376 case '[': /* specification of direction */
1377 if (coding
->flags
& CODING_FLAG_ISO_NO_DIRECTION
)
1378 goto label_invalid_code
;
1379 /* For the moment, nested direction is not supported.
1380 So, `coding->mode & CODING_MODE_DIRECTION' zero means
1381 left-to-right, and nozero means right-to-left. */
1385 case ']': /* end of the current direction */
1386 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1388 case '0': /* end of the current direction */
1389 case '1': /* start of left-to-right direction */
1392 coding
->mode
&= ~CODING_MODE_DIRECTION
;
1394 goto label_invalid_code
;
1397 case '2': /* start of right-to-left direction */
1400 coding
->mode
|= CODING_MODE_DIRECTION
;
1402 goto label_invalid_code
;
1406 goto label_invalid_code
;
1411 if (! (coding
->flags
& CODING_FLAG_ISO_DESIGNATION
))
1412 goto label_invalid_code
;
1413 if (c1
>= 0x28 && c1
<= 0x2B)
1414 { /* designation of DIMENSION1_CHARS94 character set */
1416 DECODE_DESIGNATION (c1
- 0x28, 1, 94, c2
);
1418 else if (c1
>= 0x2C && c1
<= 0x2F)
1419 { /* designation of DIMENSION1_CHARS96 character set */
1421 DECODE_DESIGNATION (c1
- 0x2C, 1, 96, c2
);
1425 goto label_invalid_code
;
1428 /* We must update these variables now. */
1429 charset0
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 0);
1430 charset1
= CODING_SPEC_ISO_PLANE_CHARSET (coding
, 1);
1434 if (COMPOSING_P (coding
))
1435 DECODE_COMPOSITION_END ('1');
1436 coding
->produced_char
+= src
- src_base
;
1437 while (src_base
< src
)
1438 *dst
++ = (*src_base
++) & 0x7F;
1443 result
= CODING_FINISH_INSUFFICIENT_SRC
;
1444 label_end_of_loop_2
:
1451 if (result
== CODING_FINISH_NORMAL
)
1452 result
= CODING_FINISH_INSUFFICIENT_DST
;
1453 else if (result
!= CODING_FINISH_INCONSISTENT_EOL
1454 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
1456 /* This is the last block of the text to be decoded. We had
1457 better just flush out all remaining codes in the text
1458 although they are not valid characters. */
1459 if (COMPOSING_P (coding
))
1460 DECODE_COMPOSITION_END ('1');
1461 src_bytes
= src_end
- src
;
1462 if (dst_bytes
&& (dst_end
- dst
< src_end
- src
))
1463 src_end
= src
+ (dst_end
- dst
);
1464 coding
->produced_char
+= src_end
- src
;
1465 while (src
< src_end
)
1466 *dst
++ = (*src
++) & 0x7F;
1470 coding
->consumed
= coding
->consumed_char
= src
- source
;
1471 coding
->produced
= dst
- destination
;
1475 /* ISO2022 encoding stuff. */
1478 It is not enough to say just "ISO2022" on encoding, we have to
1479 specify more details. In Emacs, each coding system of ISO2022
1480 variant has the following specifications:
1481 1. Initial designation to G0 thru G3.
1482 2. Allows short-form designation?
1483 3. ASCII should be designated to G0 before control characters?
1484 4. ASCII should be designated to G0 at end of line?
1485 5. 7-bit environment or 8-bit environment?
1486 6. Use locking-shift?
1487 7. Use Single-shift?
1488 And the following two are only for Japanese:
1489 8. Use ASCII in place of JIS0201-1976-Roman?
1490 9. Use JISX0208-1983 in place of JISX0208-1978?
1491 These specifications are encoded in `coding->flags' as flag bits
1492 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
1496 /* Produce codes (escape sequence) for designating CHARSET to graphic
1497 register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and
1498 the coding system CODING allows, produce designation sequence of
1501 #define ENCODE_DESIGNATION(charset, reg, coding) \
1503 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1504 char *intermediate_char_94 = "()*+"; \
1505 char *intermediate_char_96 = ",-./"; \
1506 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1507 if (revision < 255) \
1509 *dst++ = ISO_CODE_ESC; \
1511 *dst++ = '@' + revision; \
1513 *dst++ = ISO_CODE_ESC; \
1514 if (CHARSET_DIMENSION (charset) == 1) \
1516 if (CHARSET_CHARS (charset) == 94) \
1517 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1519 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1524 if (CHARSET_CHARS (charset) == 94) \
1526 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1528 || final_char < '@' || final_char > 'B') \
1529 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1532 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1534 *dst++ = final_char; \
1535 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1538 /* The following two macros produce codes (control character or escape
1539 sequence) for ISO2022 single-shift functions (single-shift-2 and
1542 #define ENCODE_SINGLE_SHIFT_2 \
1544 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1545 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1548 *dst++ = ISO_CODE_SS2; \
1549 coding->fake_multibyte = 1; \
1551 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1554 #define ENCODE_SINGLE_SHIFT_3 \
1556 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1557 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1560 *dst++ = ISO_CODE_SS3; \
1561 coding->fake_multibyte = 1; \
1563 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1566 /* The following four macros produce codes (control character or
1567 escape sequence) for ISO2022 locking-shift functions (shift-in,
1568 shift-out, locking-shift-2, and locking-shift-3). */
1570 #define ENCODE_SHIFT_IN \
1572 *dst++ = ISO_CODE_SI; \
1573 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1576 #define ENCODE_SHIFT_OUT \
1578 *dst++ = ISO_CODE_SO; \
1579 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1582 #define ENCODE_LOCKING_SHIFT_2 \
1584 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1585 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1588 #define ENCODE_LOCKING_SHIFT_3 \
1590 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1591 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1594 /* Produce codes for a DIMENSION1 character whose character set is
1595 CHARSET and whose position-code is C1. Designation and invocation
1596 sequences are also produced in advance if necessary. */
1599 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1601 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1603 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1604 *dst++ = c1 & 0x7F; \
1606 *dst++ = c1 | 0x80; \
1607 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1610 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1612 *dst++ = c1 & 0x7F; \
1615 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1617 *dst++ = c1 | 0x80; \
1620 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1621 && !coding->safe_charsets[charset]) \
1623 /* We should not encode this character, instead produce one or \
1625 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1626 if (CHARSET_WIDTH (charset) == 2) \
1627 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1631 /* Since CHARSET is not yet invoked to any graphic planes, we \
1632 must invoke it, or, at first, designate it to some graphic \
1633 register. Then repeat the loop to actually produce the \
1635 dst = encode_invocation_designation (charset, coding, dst); \
1638 /* Produce codes for a DIMENSION2 character whose character set is
1639 CHARSET and whose position-codes are C1 and C2. Designation and
1640 invocation codes are also produced in advance if necessary. */
1642 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
1644 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1646 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1647 *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
1649 *dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
1650 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
1653 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
1655 *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
1658 else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
1660 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1663 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1664 && !coding->safe_charsets[charset]) \
1666 /* We should not encode this character, instead produce one or \
1668 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1669 if (CHARSET_WIDTH (charset) == 2) \
1670 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1674 /* Since CHARSET is not yet invoked to any graphic planes, we \
1675 must invoke it, or, at first, designate it to some graphic \
1676 register. Then repeat the loop to actually produce the \
1678 dst = encode_invocation_designation (charset, coding, dst); \
1681 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1683 int c_alt, charset_alt; \
1685 if (!NILP (translation_table) \
1686 && ((c_alt = translate_char (translation_table, -1, \
1689 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
1691 charset_alt = charset; \
1692 if (CHARSET_DEFINED_P (charset_alt)) \
1694 if (CHARSET_DIMENSION (charset_alt) == 1) \
1696 if (charset == CHARSET_ASCII \
1697 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1698 charset_alt = charset_latin_jisx0201; \
1699 ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
1703 if (charset == charset_jisx0208 \
1704 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1705 charset_alt = charset_jisx0208_1978; \
1706 ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
1711 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1713 *dst++ = charset & 0x7f; \
1714 *dst++ = c1 & 0x7f; \
1716 *dst++ = c2 & 0x7f; \
1726 coding->consumed_char++; \
1729 /* Produce designation and invocation codes at a place pointed by DST
1730 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1734 encode_invocation_designation (charset
, coding
, dst
)
1736 struct coding_system
*coding
;
1739 int reg
; /* graphic register number */
1741 /* At first, check designations. */
1742 for (reg
= 0; reg
< 4; reg
++)
1743 if (charset
== CODING_SPEC_ISO_DESIGNATION (coding
, reg
))
1748 /* CHARSET is not yet designated to any graphic registers. */
1749 /* At first check the requested designation. */
1750 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1751 if (reg
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
)
1752 /* Since CHARSET requests no special designation, designate it
1753 to graphic register 0. */
1756 ENCODE_DESIGNATION (charset
, reg
, coding
);
1759 if (CODING_SPEC_ISO_INVOCATION (coding
, 0) != reg
1760 && CODING_SPEC_ISO_INVOCATION (coding
, 1) != reg
)
1762 /* Since the graphic register REG is not invoked to any graphic
1763 planes, invoke it to graphic plane 0. */
1766 case 0: /* graphic register 0 */
1770 case 1: /* graphic register 1 */
1774 case 2: /* graphic register 2 */
1775 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1776 ENCODE_SINGLE_SHIFT_2
;
1778 ENCODE_LOCKING_SHIFT_2
;
1781 case 3: /* graphic register 3 */
1782 if (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
)
1783 ENCODE_SINGLE_SHIFT_3
;
1785 ENCODE_LOCKING_SHIFT_3
;
1792 /* Produce 2-byte codes for encoded composition rule RULE. */
1794 #define ENCODE_COMPOSITION_RULE(rule) \
1797 COMPOSITION_DECODE_RULE (rule, gref, nref); \
1798 *dst++ = 32 + 81 + gref; \
1799 *dst++ = 32 + nref; \
1802 /* Produce codes for indicating the start of a composition sequence
1803 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
1804 which specify information about the composition. See the comment
1805 in coding.h for the format of DATA. */
1807 #define ENCODE_COMPOSITION_START(coding, data) \
1809 coding->composing = data[3]; \
1810 *dst++ = ISO_CODE_ESC; \
1811 if (coding->composing == COMPOSITION_RELATIVE) \
1815 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
1817 coding->cmp_data_index = coding->cmp_data_start + 4; \
1818 coding->composition_rule_follows = 0; \
1822 /* Produce codes for indicating the end of the current composition. */
1824 #define ENCODE_COMPOSITION_END(coding, data) \
1826 *dst++ = ISO_CODE_ESC; \
1828 coding->cmp_data_start += data[0]; \
1829 coding->composing = COMPOSITION_NO; \
1830 if (coding->cmp_data_start == coding->cmp_data->used \
1831 && coding->cmp_data->next) \
1833 coding->cmp_data = coding->cmp_data->next; \
1834 coding->cmp_data_start = 0; \
1838 /* Produce composition start sequence ESC 0. Here, this sequence
1839 doesn't mean the start of a new composition but means that we have
1840 just produced components (alternate chars and composition rules) of
1841 the composition and the actual text follows in SRC. */
1843 #define ENCODE_COMPOSITION_FAKE_START(coding) \
1845 *dst++ = ISO_CODE_ESC; \
1847 coding->composing = COMPOSITION_RELATIVE; \
1850 /* The following three macros produce codes for indicating direction
1852 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1854 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1855 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1857 *dst++ = ISO_CODE_CSI; \
1860 #define ENCODE_DIRECTION_R2L \
1861 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1863 #define ENCODE_DIRECTION_L2R \
1864 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1866 /* Produce codes for designation and invocation to reset the graphic
1867 planes and registers to initial state. */
1868 #define ENCODE_RESET_PLANE_AND_REGISTER \
1871 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1873 for (reg = 0; reg < 4; reg++) \
1874 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1875 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1876 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1877 ENCODE_DESIGNATION \
1878 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1881 /* Produce designation sequences of charsets in the line started from
1882 SRC to a place pointed by *DSTP, and update DSTP.
1884 If the current block ends before any end-of-line, we may fail to
1885 find all the necessary designations. */
1888 encode_designation_at_bol (coding
, table
, src
, src_end
, dstp
)
1889 struct coding_system
*coding
;
1891 unsigned char *src
, *src_end
, **dstp
;
1893 int charset
, c
, found
= 0, reg
;
1894 /* Table of charsets to be designated to each graphic register. */
1896 unsigned char *dst
= *dstp
;
1898 for (reg
= 0; reg
< 4; reg
++)
1901 while (src
< src_end
&& *src
!= '\n' && found
< 4)
1903 int bytes
= BYTES_BY_CHAR_HEAD (*src
);
1906 charset
= CHARSET_AT (src
);
1910 unsigned char c1
, c2
;
1912 SPLIT_STRING(src
, bytes
, charset
, c1
, c2
);
1913 if ((c_alt
= translate_char (table
, -1, charset
, c1
, c2
)) >= 0)
1914 charset
= CHAR_CHARSET (c_alt
);
1917 reg
= CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
);
1918 if (reg
!= CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
&& r
[reg
] < 0)
1929 for (reg
= 0; reg
< 4; reg
++)
1931 && CODING_SPEC_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
1932 ENCODE_DESIGNATION (r
[reg
], reg
, coding
);
1937 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1940 encode_coding_iso2022 (coding
, source
, destination
, src_bytes
, dst_bytes
)
1941 struct coding_system
*coding
;
1942 unsigned char *source
, *destination
;
1943 int src_bytes
, dst_bytes
;
1945 unsigned char *src
= source
;
1946 unsigned char *src_end
= source
+ src_bytes
;
1947 unsigned char *dst
= destination
;
1948 unsigned char *dst_end
= destination
+ dst_bytes
;
1949 /* Since the maximum bytes produced by each loop is 14, we subtract 13
1950 from DST_END to assure overflow checking is necessary only at the
1952 unsigned char *adjusted_dst_end
= dst_end
- 13;
1953 Lisp_Object translation_table
1954 = coding
->translation_table_for_encode
;
1955 int result
= CODING_FINISH_NORMAL
;
1957 if (!NILP (Venable_character_translation
) && NILP (translation_table
))
1958 translation_table
= Vstandard_translation_table_for_encode
;
1960 coding
->consumed_char
= 0;
1961 coding
->fake_multibyte
= 0;
1962 while (src
< src_end
&& (dst_bytes
1963 ? (dst
< adjusted_dst_end
)
1964 : (dst
< src
- 13)))
1966 /* SRC_BASE remembers the start position in source in each loop.
1967 The loop will be exited when there's not enough source text
1968 to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1969 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1970 reset to SRC_BASE before exiting. */
1971 unsigned char *src_base
= src
;
1972 int charset
, c1
, c2
, c3
, c4
;
1974 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
1975 && CODING_SPEC_ISO_BOL (coding
))
1977 /* We have to produce designation sequences if any now. */
1978 encode_designation_at_bol (coding
, translation_table
,
1979 src
, src_end
, &dst
);
1980 CODING_SPEC_ISO_BOL (coding
) = 0;
1983 /* Check composition start and end. */
1984 if (coding
->composing
!= COMPOSITION_DISABLED
1985 && coding
->cmp_data_start
< coding
->cmp_data
->used
)
1987 struct composition_data
*cmp_data
= coding
->cmp_data
;
1988 int *data
= cmp_data
->data
+ coding
->cmp_data_start
;
1989 int this_pos
= cmp_data
->char_offset
+ coding
->consumed_char
;
1991 if (coding
->composing
== COMPOSITION_RELATIVE
)
1993 if (this_pos
== data
[2])
1995 ENCODE_COMPOSITION_END (coding
, data
);
1996 cmp_data
= coding
->cmp_data
;
1997 data
= cmp_data
->data
+ coding
->cmp_data_start
;
2000 else if (COMPOSING_P (coding
))
2002 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2003 if (coding
->cmp_data_index
== coding
->cmp_data_start
+ data
[0])
2004 /* We have consumed components of the composition.
2005 What follows in SRC is the compositions's base
2007 ENCODE_COMPOSITION_FAKE_START (coding
);
2010 int c
= cmp_data
->data
[coding
->cmp_data_index
++];
2011 if (coding
->composition_rule_follows
)
2013 ENCODE_COMPOSITION_RULE (c
);
2014 coding
->composition_rule_follows
= 0;
2018 SPLIT_CHAR (c
, charset
, c1
, c2
);
2019 ENCODE_ISO_CHARACTER (charset
, c1
, c2
);
2020 /* But, we didn't consume a character in SRC. */
2021 coding
->consumed_char
--;
2022 if (coding
->composing
== COMPOSITION_WITH_RULE_ALTCHARS
)
2023 coding
->composition_rule_follows
= 1;
2028 if (!COMPOSING_P (coding
))
2030 if (this_pos
== data
[1])
2032 ENCODE_COMPOSITION_START (coding
, data
);
2039 /* Now encode one character. C1 is a control character, an
2040 ASCII character, or a leading-code of multi-byte character. */
2041 switch (emacs_code_class
[c1
])
2043 case EMACS_ascii_code
:
2045 ENCODE_ISO_CHARACTER (CHARSET_ASCII
, c1
, /* dummy */ c2
);
2048 case EMACS_control_code
:
2049 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2050 ENCODE_RESET_PLANE_AND_REGISTER
;
2052 coding
->consumed_char
++;
2055 case EMACS_carriage_return_code
:
2056 if (! (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
2058 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2059 ENCODE_RESET_PLANE_AND_REGISTER
;
2061 coding
->consumed_char
++;
2064 /* fall down to treat '\r' as '\n' ... */
2066 case EMACS_linefeed_code
:
2067 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_EOL
)
2068 ENCODE_RESET_PLANE_AND_REGISTER
;
2069 if (coding
->flags
& CODING_FLAG_ISO_INIT_AT_BOL
)
2070 bcopy (coding
->spec
.iso2022
.initial_designation
,
2071 coding
->spec
.iso2022
.current_designation
,
2072 sizeof coding
->spec
.iso2022
.initial_designation
);
2073 if (coding
->eol_type
== CODING_EOL_LF
2074 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
2075 *dst
++ = ISO_CODE_LF
;
2076 else if (coding
->eol_type
== CODING_EOL_CRLF
)
2077 *dst
++ = ISO_CODE_CR
, *dst
++ = ISO_CODE_LF
;
2079 *dst
++ = ISO_CODE_CR
;
2080 CODING_SPEC_ISO_BOL (coding
) = 1;
2081 coding
->consumed_char
++;
2084 case EMACS_leading_code_2
:
2089 /* invalid sequence */
2092 coding
->consumed_char
++;
2095 ENCODE_ISO_CHARACTER (c1
, c2
, /* dummy */ c3
);
2098 case EMACS_leading_code_3
:
2099 TWO_MORE_BYTES (c2
, c3
);
2101 if (c2
< 0xA0 || c3
< 0xA0)
2103 /* invalid sequence */
2106 coding
->consumed_char
++;
2108 else if (c1
< LEADING_CODE_PRIVATE_11
)
2109 ENCODE_ISO_CHARACTER (c1
, c2
, c3
);
2111 ENCODE_ISO_CHARACTER (c2
, c3
, /* dummy */ c4
);
2114 case EMACS_leading_code_4
:
2115 THREE_MORE_BYTES (c2
, c3
, c4
);
2116 if (c2
< 0xA0 || c3
< 0xA0 || c4
< 0xA0)
2118 /* invalid sequence */
2121 coding
->consumed_char
++;
2124 ENCODE_ISO_CHARACTER (c2
, c3
, c4
);
2127 case EMACS_invalid_code
:
2128 if (coding
->flags
& CODING_FLAG_ISO_RESET_AT_CNTL
)
2129 ENCODE_RESET_PLANE_AND_REGISTER
;
2131 coding
->consumed_char
++;
2136 result
= CODING_FINISH_INSUFFICIENT_SRC
;
2141 if (src
< src_end
&& result
== CODING_FINISH_NORMAL
)
2142 result
= CODING_FINISH_INSUFFICIENT_DST
;
2144 /* If this is the last block of the text to be encoded, we must
2145 reset graphic planes and registers to the initial state, and
2146 flush out the carryover if any. */
2147 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
2149 ENCODE_RESET_PLANE_AND_REGISTER
;
2150 if (COMPOSING_P (coding
))
2151 *dst
++ = ISO_CODE_ESC
, *dst
++ = '1';
2152 if (result
== CODING_FINISH_INSUFFICIENT_SRC
)
2154 while (src
< src_end
&& dst
< dst_end
)
2158 coding
->consumed
= src
- source
;
2159 coding
->produced
= coding
->produced_char
= dst
- destination
;
2164 /*** 4. SJIS and BIG5 handlers ***/
2166 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2167 quite widely. So, for the moment, Emacs supports them in the bare
2168 C code. But, in the future, they may be supported only by CCL. */
2170 /* SJIS is a coding system encoding three character sets: ASCII, right
2171 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
2172 as is. A character of charset katakana-jisx0201 is encoded by
2173 "position-code + 0x80". A character of charset japanese-jisx0208
2174 is encoded in 2-byte but two position-codes are divided and shifted
2175 so that it fit in the range below.
2177 --- CODE RANGE of SJIS ---
2178 (character set) (range)
2180 KATAKANA-JISX0201 0xA0 .. 0xDF
2181 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
2182 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
2183 -------------------------------
2187 /* BIG5 is a coding system encoding two character sets: ASCII and
2188 Big5. An ASCII character is encoded as is. Big5 is a two-byte
2189 character set and is encoded in two-byte.
2191 --- CODE RANGE of BIG5 ---
2192 (character set) (range)
2194 Big5 (1st byte) 0xA1 .. 0xFE
2195 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2196 --------------------------
2198 Since the number of characters in Big5 is larger than maximum
2199 characters in Emacs' charset (96x96), it can't be handled as one
2200 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2201 and `charset-big5-2'. Both are DIMENSION2 and CHARS94. The former
2202 contains frequently used characters and the latter contains less
2203 frequently used characters. */
2205 /* Macros to decode or encode a character of Big5 in BIG5. B1 and B2
2206 are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2207 C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2208 format. CHARSET is `charset_big5_1' or `charset_big5_2'. */
2210 /* Number of Big5 characters which have the same code in 1st byte. */
2211 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2213 #define DECODE_BIG5(b1, b2, charset, c1, c2) \
2216 = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62); \
2218 charset = charset_big5_1; \
2221 charset = charset_big5_2; \
2222 temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW; \
2224 c1 = temp / (0xFF - 0xA1) + 0x21; \
2225 c2 = temp % (0xFF - 0xA1) + 0x21; \
2228 #define ENCODE_BIG5(charset, c1, c2, b1, b2) \
2230 unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21); \
2231 if (charset == charset_big5_2) \
2232 temp += BIG5_SAME_ROW * (0xC9 - 0xA1); \
2233 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2234 b2 = temp % BIG5_SAME_ROW; \
2235 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2238 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2240 int c_alt, charset_alt = (charset); \
2241 if (!NILP (translation_table) \
2242 && ((c_alt = translate_char (translation_table, \
2243 -1, (charset), c1, c2)) >= 0)) \
2244 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
2245 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
2246 DECODE_CHARACTER_ASCII (c1); \
2247 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2248 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
2250 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
2253 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2255 int c_alt, charset_alt; \
2256 if (!NILP (translation_table) \
2257 && ((c_alt = translate_char (translation_table, -1, \
2260 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
2262 charset_alt = charset; \
2263 if (charset_alt == charset_ascii) \
2265 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2267 if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2269 else if (sjis_p && charset_alt == charset_latin_jisx0201) \
2270 *dst++ = c1 & 0x7F; \
2273 *dst++ = charset_alt, *dst++ = c1; \
2274 coding->fake_multibyte = 1; \
2279 c1 &= 0x7F, c2 &= 0x7F; \
2280 if (sjis_p && (charset_alt == charset_jisx0208 \
2281 || charset_alt == charset_jisx0208_1978))\
2283 unsigned char s1, s2; \
2285 ENCODE_SJIS (c1, c2, s1, s2); \
2286 *dst++ = s1, *dst++ = s2; \
2287 coding->fake_multibyte = 1; \
2290 && (charset_alt == charset_big5_1 \
2291 || charset_alt == charset_big5_2)) \
2293 unsigned char b1, b2; \
2295 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
2296 *dst++ = b1, *dst++ = b2; \
2300 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
2301 coding->fake_multibyte = 1; \
2304 coding->consumed_char++; \
2307 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2308 Check if a text is encoded in SJIS. If it is, return
2309 CODING_CATEGORY_MASK_SJIS, else return 0. */
2312 detect_coding_sjis (src
, src_end
)
2313 unsigned char *src
, *src_end
;
2317 while (src
< src_end
)
2320 if ((c
>= 0x80 && c
< 0xA0) || c
>= 0xE0)
2322 if (src
< src_end
&& *src
++ < 0x40)
2326 return CODING_CATEGORY_MASK_SJIS
;
2329 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2330 Check if a text is encoded in BIG5. If it is, return
2331 CODING_CATEGORY_MASK_BIG5, else return 0. */
2334 detect_coding_big5 (src
, src_end
)
2335 unsigned char *src
, *src_end
;
2339 while (src
< src_end
)
2347 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
2351 return CODING_CATEGORY_MASK_BIG5
;
2354 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2355 Check if a text is encoded in UTF-8. If it is, return
2356 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2358 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2359 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2360 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2361 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2362 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2363 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2364 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2367 detect_coding_utf_8 (src
, src_end
)
2368 unsigned char *src
, *src_end
;
2371 int seq_maybe_bytes
;
2373 while (src
< src_end
)
2376 if (UTF_8_1_OCTET_P (c
))
2378 else if (UTF_8_2_OCTET_LEADING_P (c
))
2379 seq_maybe_bytes
= 1;
2380 else if (UTF_8_3_OCTET_LEADING_P (c
))
2381 seq_maybe_bytes
= 2;
2382 else if (UTF_8_4_OCTET_LEADING_P (c
))
2383 seq_maybe_bytes
= 3;
2384 else if (UTF_8_5_OCTET_LEADING_P (c
))
2385 seq_maybe_bytes
= 4;
2386 else if (UTF_8_6_OCTET_LEADING_P (c
))
2387 seq_maybe_bytes
= 5;
2394 return CODING_CATEGORY_MASK_UTF_8
;
2397 if (!UTF_8_EXTRA_OCTET_P (c
))
2401 while (seq_maybe_bytes
> 0);
2404 return CODING_CATEGORY_MASK_UTF_8
;
2407 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2408 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2409 Little Endian (otherwise). If it is, return
2410 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2413 #define UTF_16_INVALID_P(val) \
2414 (((val) == 0xFFFE) \
2415 || ((val) == 0xFFFF))
2417 #define UTF_16_HIGH_SURROGATE_P(val) \
2418 (((val) & 0xD800) == 0xD800)
2420 #define UTF_16_LOW_SURROGATE_P(val) \
2421 (((val) & 0xDC00) == 0xDC00)
2424 detect_coding_utf_16 (src
, src_end
)
2425 unsigned char *src
, *src_end
;
2427 if ((src
+ 1) >= src_end
) return 0;
2429 if ((src
[0] == 0xFF) && (src
[1] == 0xFE))
2430 return CODING_CATEGORY_MASK_UTF_16_LE
;
2431 else if ((src
[0] == 0xFE) && (src
[1] == 0xFF))
2432 return CODING_CATEGORY_MASK_UTF_16_BE
;
2437 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2438 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2441 decode_coding_sjis_big5 (coding
, source
, destination
,
2442 src_bytes
, dst_bytes
, sjis_p
)
2443 struct coding_system
*coding
;
2444 unsigned char *source
, *destination
;
2445 int src_bytes
, dst_bytes
;
2448 unsigned char *src
= source
;
2449 unsigned char *src_end
= source
+ src_bytes
;
2450 unsigned char *dst
= destination
;
2451 unsigned char *dst_end
= destination
+ dst_bytes
;
2452 /* Since the maximum bytes produced by each loop is 4, we subtract 3
2453 from DST_END to assure overflow checking is necessary only at the
2455 unsigned char *adjusted_dst_end
= dst_end
- 3;
2456 Lisp_Object translation_table
2457 = coding
->translation_table_for_decode
;
2458 int result
= CODING_FINISH_NORMAL
;
2460 if (!NILP (Venable_character_translation
) && NILP (translation_table
))
2461 translation_table
= Vstandard_translation_table_for_decode
;
2463 coding
->produced_char
= 0;
2464 coding
->fake_multibyte
= 0;
2465 while (src
< src_end
&& (dst_bytes
2466 ? (dst
< adjusted_dst_end
)
2469 /* SRC_BASE remembers the start position in source in each loop.
2470 The loop will be exited when there's not enough source text
2471 to analyze two-byte character (within macro ONE_MORE_BYTE).
2472 In that case, SRC is reset to SRC_BASE before exiting. */
2473 unsigned char *src_base
= src
;
2474 unsigned char c1
= *src
++, c2
, c3
, c4
;
2480 if (coding
->eol_type
== CODING_EOL_CRLF
)
2485 else if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2487 result
= CODING_FINISH_INCONSISTENT_EOL
;
2488 goto label_end_of_loop_2
;
2491 /* To process C2 again, SRC is subtracted by 1. */
2494 else if (coding
->eol_type
== CODING_EOL_CR
)
2500 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2501 && (coding
->eol_type
== CODING_EOL_CR
2502 || coding
->eol_type
== CODING_EOL_CRLF
))
2504 result
= CODING_FINISH_INCONSISTENT_EOL
;
2505 goto label_end_of_loop_2
;
2509 coding
->produced_char
++;
2513 c2
= 0; /* avoid warning */
2514 DECODE_SJIS_BIG5_CHARACTER (charset_ascii
, c1
, /* dummy */ c2
);
2520 if (c1
< 0xA0 || (c1
>= 0xE0 && c1
< 0xF0))
2522 /* SJIS -> JISX0208 */
2524 if (c2
>= 0x40 && c2
!= 0x7F && c2
<= 0xFC)
2526 DECODE_SJIS (c1
, c2
, c3
, c4
);
2527 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208
, c3
, c4
);
2530 goto label_invalid_code_2
;
2533 /* SJIS -> JISX0201-Kana */
2535 c2
= 0; /* avoid warning */
2536 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201
, c1
,
2540 goto label_invalid_code_1
;
2545 if (c1
>= 0xA1 && c1
<= 0xFE)
2548 if ((c2
>= 0x40 && c2
<= 0x7E) || (c2
>= 0xA1 && c2
<= 0xFE))
2552 DECODE_BIG5 (c1
, c2
, charset
, c3
, c4
);
2553 DECODE_SJIS_BIG5_CHARACTER (charset
, c3
, c4
);
2556 goto label_invalid_code_2
;
2559 goto label_invalid_code_1
;
2564 label_invalid_code_1
:
2566 coding
->produced_char
++;
2567 coding
->fake_multibyte
= 1;
2570 label_invalid_code_2
:
2571 *dst
++ = c1
; *dst
++= c2
;
2572 coding
->produced_char
+= 2;
2573 coding
->fake_multibyte
= 1;
2577 result
= CODING_FINISH_INSUFFICIENT_SRC
;
2578 label_end_of_loop_2
:
2585 if (result
== CODING_FINISH_NORMAL
)
2586 result
= CODING_FINISH_INSUFFICIENT_DST
;
2587 else if (result
!= CODING_FINISH_INCONSISTENT_EOL
2588 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
2590 src_bytes
= src_end
- src
;
2591 if (dst_bytes
&& (dst_end
- dst
< src_bytes
))
2592 src_bytes
= dst_end
- dst
;
2593 bcopy (dst
, src
, src_bytes
);
2596 coding
->fake_multibyte
= 1;
2600 coding
->consumed
= coding
->consumed_char
= src
- source
;
2601 coding
->produced
= dst
- destination
;
2605 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2606 This function can encode `charset_ascii', `charset_katakana_jisx0201',
2607 `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are
2608 sure that all these charsets are registered as official charset
2609 (i.e. do not have extended leading-codes). Characters of other
2610 charsets are produced without any encoding. If SJIS_P is 1, encode
2611 SJIS text, else encode BIG5 text. */
2614 encode_coding_sjis_big5 (coding
, source
, destination
,
2615 src_bytes
, dst_bytes
, sjis_p
)
2616 struct coding_system
*coding
;
2617 unsigned char *source
, *destination
;
2618 int src_bytes
, dst_bytes
;
2621 unsigned char *src
= source
;
2622 unsigned char *src_end
= source
+ src_bytes
;
2623 unsigned char *dst
= destination
;
2624 unsigned char *dst_end
= destination
+ dst_bytes
;
2625 /* Since the maximum bytes produced by each loop is 2, we subtract 1
2626 from DST_END to assure overflow checking is necessary only at the
2628 unsigned char *adjusted_dst_end
= dst_end
- 1;
2629 Lisp_Object translation_table
2630 = coding
->translation_table_for_encode
;
2631 int result
= CODING_FINISH_NORMAL
;
2633 if (!NILP (Venable_character_translation
) && NILP (translation_table
))
2634 translation_table
= Vstandard_translation_table_for_encode
;
2636 coding
->consumed_char
= 0;
2637 coding
->fake_multibyte
= 0;
2638 while (src
< src_end
&& (dst_bytes
2639 ? (dst
< adjusted_dst_end
)
2642 /* SRC_BASE remembers the start position in source in each loop.
2643 The loop will be exited when there's not enough source text
2644 to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2645 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE
2647 unsigned char *src_base
= src
;
2648 unsigned char c1
= *src
++, c2
, c3
, c4
;
2650 switch (emacs_code_class
[c1
])
2652 case EMACS_ascii_code
:
2653 ENCODE_SJIS_BIG5_CHARACTER (charset_ascii
, c1
, /* dummy */ c2
);
2656 case EMACS_control_code
:
2658 coding
->consumed_char
++;
2661 case EMACS_carriage_return_code
:
2662 if (! (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
2665 coding
->consumed_char
++;
2668 /* fall down to treat '\r' as '\n' ... */
2670 case EMACS_linefeed_code
:
2671 if (coding
->eol_type
== CODING_EOL_LF
2672 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
2674 else if (coding
->eol_type
== CODING_EOL_CRLF
)
2675 *dst
++ = '\r', *dst
++ = '\n';
2678 coding
->consumed_char
++;
2681 case EMACS_leading_code_2
:
2683 ENCODE_SJIS_BIG5_CHARACTER (c1
, c2
, /* dummy */ c3
);
2686 case EMACS_leading_code_3
:
2687 TWO_MORE_BYTES (c2
, c3
);
2688 ENCODE_SJIS_BIG5_CHARACTER (c1
, c2
, c3
);
2691 case EMACS_leading_code_4
:
2692 THREE_MORE_BYTES (c2
, c3
, c4
);
2693 ENCODE_SJIS_BIG5_CHARACTER (c2
, c3
, c4
);
2696 default: /* i.e. case EMACS_invalid_code: */
2698 coding
->consumed_char
++;
2703 result
= CODING_FINISH_INSUFFICIENT_SRC
;
2708 if (result
== CODING_FINISH_NORMAL
2710 result
= CODING_FINISH_INSUFFICIENT_DST
;
2711 coding
->consumed
= src
- source
;
2712 coding
->produced
= coding
->produced_char
= dst
- destination
;
2717 /*** 5. CCL handlers ***/
2719 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2720 Check if a text is encoded in a coding system of which
2721 encoder/decoder are written in CCL program. If it is, return
2722 CODING_CATEGORY_MASK_CCL, else return 0. */
2725 detect_coding_ccl (src
, src_end
)
2726 unsigned char *src
, *src_end
;
2728 unsigned char *valid
;
2730 /* No coding system is assigned to coding-category-ccl. */
2731 if (!coding_system_table
[CODING_CATEGORY_IDX_CCL
])
2734 valid
= coding_system_table
[CODING_CATEGORY_IDX_CCL
]->spec
.ccl
.valid_codes
;
2735 while (src
< src_end
)
2737 if (! valid
[*src
]) return 0;
2740 return CODING_CATEGORY_MASK_CCL
;
2744 /*** 6. End-of-line handlers ***/
2746 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2747 This function is called only when `coding->eol_type' is
2748 CODING_EOL_CRLF or CODING_EOL_CR. */
2751 decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2752 struct coding_system
*coding
;
2753 unsigned char *source
, *destination
;
2754 int src_bytes
, dst_bytes
;
2756 unsigned char *src
= source
;
2757 unsigned char *src_end
= source
+ src_bytes
;
2758 unsigned char *dst
= destination
;
2759 unsigned char *dst_end
= destination
+ dst_bytes
;
2761 int result
= CODING_FINISH_NORMAL
;
2763 coding
->fake_multibyte
= 0;
2767 coding
->produced
= coding
->produced_char
= 0;
2768 coding
->consumed
= coding
->consumed_char
= 0;
2772 switch (coding
->eol_type
)
2774 case CODING_EOL_CRLF
:
2776 /* Since the maximum bytes produced by each loop is 2, we
2777 subtract 1 from DST_END to assure overflow checking is
2778 necessary only at the head of loop. */
2779 unsigned char *adjusted_dst_end
= dst_end
- 1;
2781 while (src
< src_end
&& (dst_bytes
2782 ? (dst
< adjusted_dst_end
)
2785 unsigned char *src_base
= src
;
2795 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2797 result
= CODING_FINISH_INCONSISTENT_EOL
;
2798 goto label_end_of_loop_2
;
2802 if (BASE_LEADING_CODE_P (c
))
2803 coding
->fake_multibyte
= 1;
2807 && (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
))
2809 result
= CODING_FINISH_INCONSISTENT_EOL
;
2810 goto label_end_of_loop_2
;
2815 if (BASE_LEADING_CODE_P (c
))
2816 coding
->fake_multibyte
= 1;
2821 result
= CODING_FINISH_INSUFFICIENT_SRC
;
2822 label_end_of_loop_2
:
2828 if (result
== CODING_FINISH_NORMAL
)
2829 result
= CODING_FINISH_INSUFFICIENT_DST
;
2830 else if (result
!= CODING_FINISH_INCONSISTENT_EOL
2831 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
2833 /* This is the last block of the text to be decoded.
2834 We flush out all remaining codes. */
2835 src_bytes
= src_end
- src
;
2836 if (dst_bytes
&& (dst_end
- dst
< src_bytes
))
2837 src_bytes
= dst_end
- dst
;
2838 bcopy (src
, dst
, src_bytes
);
2847 if (coding
->mode
& CODING_MODE_INHIBIT_INCONSISTENT_EOL
)
2849 while (src
< src_end
)
2851 if ((c
= *src
++) == '\n')
2853 if (BASE_LEADING_CODE_P (c
))
2854 coding
->fake_multibyte
= 1;
2858 src_bytes
= src
- source
;
2859 result
= CODING_FINISH_INCONSISTENT_EOL
;
2862 if (dst_bytes
&& src_bytes
> dst_bytes
)
2864 result
= CODING_FINISH_INSUFFICIENT_DST
;
2865 src_bytes
= dst_bytes
;
2868 bcopy (source
, destination
, src_bytes
);
2870 safe_bcopy (source
, destination
, src_bytes
);
2871 src
= source
+ src_bytes
;
2872 while (src_bytes
--) if (*dst
++ == '\r') dst
[-1] = '\n';
2875 default: /* i.e. case: CODING_EOL_LF */
2876 if (dst_bytes
&& src_bytes
> dst_bytes
)
2878 result
= CODING_FINISH_INSUFFICIENT_DST
;
2879 src_bytes
= dst_bytes
;
2882 bcopy (source
, destination
, src_bytes
);
2884 safe_bcopy (source
, destination
, src_bytes
);
2887 coding
->fake_multibyte
= 1;
2891 coding
->consumed
= coding
->consumed_char
= src
- source
;
2892 coding
->produced
= coding
->produced_char
= dst
- destination
;
2896 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
2897 format of end-of-line according to `coding->eol_type'. If
2898 `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2899 '\r' in source text also means end-of-line. */
2902 encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
)
2903 struct coding_system
*coding
;
2904 unsigned char *source
, *destination
;
2905 int src_bytes
, dst_bytes
;
2907 unsigned char *src
= source
;
2908 unsigned char *dst
= destination
;
2909 int result
= CODING_FINISH_NORMAL
;
2911 coding
->fake_multibyte
= 0;
2913 if (coding
->eol_type
== CODING_EOL_CRLF
)
2916 unsigned char *src_end
= source
+ src_bytes
;
2917 unsigned char *dst_end
= destination
+ dst_bytes
;
2918 /* Since the maximum bytes produced by each loop is 2, we
2919 subtract 1 from DST_END to assure overflow checking is
2920 necessary only at the head of loop. */
2921 unsigned char *adjusted_dst_end
= dst_end
- 1;
2923 while (src
< src_end
&& (dst_bytes
2924 ? (dst
< adjusted_dst_end
)
2929 || (c
== '\r' && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
)))
2930 *dst
++ = '\r', *dst
++ = '\n';
2934 if (BASE_LEADING_CODE_P (c
))
2935 coding
->fake_multibyte
= 1;
2939 result
= CODING_FINISH_INSUFFICIENT_DST
;
2945 if (dst_bytes
&& src_bytes
> dst_bytes
)
2947 src_bytes
= dst_bytes
;
2948 result
= CODING_FINISH_INSUFFICIENT_DST
;
2951 bcopy (source
, destination
, src_bytes
);
2953 safe_bcopy (source
, destination
, src_bytes
);
2954 dst_bytes
= src_bytes
;
2955 if (coding
->eol_type
== CODING_EOL_CR
)
2959 if ((c
= *dst
++) == '\n')
2961 else if (BASE_LEADING_CODE_P (c
))
2962 coding
->fake_multibyte
= 1;
2967 if (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
)
2970 if (*dst
++ == '\r') dst
[-1] = '\n';
2972 coding
->fake_multibyte
= 1;
2974 src
= source
+ dst_bytes
;
2975 dst
= destination
+ dst_bytes
;
2978 coding
->consumed
= coding
->consumed_char
= src
- source
;
2979 coding
->produced
= coding
->produced_char
= dst
- destination
;
2984 /*** 7. C library functions ***/
2986 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2987 has a property `coding-system'. The value of this property is a
2988 vector of length 5 (called as coding-vector). Among elements of
2989 this vector, the first (element[0]) and the fifth (element[4])
2990 carry important information for decoding/encoding. Before
2991 decoding/encoding, this information should be set in fields of a
2992 structure of type `coding_system'.
2994 A value of property `coding-system' can be a symbol of another
2995 subsidiary coding-system. In that case, Emacs gets coding-vector
2998 `element[0]' contains information to be set in `coding->type'. The
2999 value and its meaning is as follows:
3001 0 -- coding_type_emacs_mule
3002 1 -- coding_type_sjis
3003 2 -- coding_type_iso2022
3004 3 -- coding_type_big5
3005 4 -- coding_type_ccl encoder/decoder written in CCL
3006 nil -- coding_type_no_conversion
3007 t -- coding_type_undecided (automatic conversion on decoding,
3008 no-conversion on encoding)
3010 `element[4]' contains information to be set in `coding->flags' and
3011 `coding->spec'. The meaning varies by `coding->type'.
3013 If `coding->type' is `coding_type_iso2022', element[4] is a vector
3014 of length 32 (of which the first 13 sub-elements are used now).
3015 Meanings of these sub-elements are:
3017 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3018 If the value is an integer of valid charset, the charset is
3019 assumed to be designated to graphic register N initially.
3021 If the value is minus, it is a minus value of charset which
3022 reserves graphic register N, which means that the charset is
3023 not designated initially but should be designated to graphic
3024 register N just before encoding a character in that charset.
3026 If the value is nil, graphic register N is never used on
3029 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3030 Each value takes t or nil. See the section ISO2022 of
3031 `coding.h' for more information.
3033 If `coding->type' is `coding_type_big5', element[4] is t to denote
3034 BIG5-ETen or nil to denote BIG5-HKU.
3036 If `coding->type' takes the other value, element[4] is ignored.
3038 Emacs Lisp's coding system also carries information about format of
3039 end-of-line in a value of property `eol-type'. If the value is
3040 integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3041 means CODING_EOL_CR. If it is not integer, it should be a vector
3042 of subsidiary coding systems of which property `eol-type' has one
3047 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3048 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
3049 is setup so that no conversion is necessary and return -1, else
3053 setup_coding_system (coding_system
, coding
)
3054 Lisp_Object coding_system
;
3055 struct coding_system
*coding
;
3057 Lisp_Object coding_spec
, coding_type
, eol_type
, plist
;
3061 /* Initialize some fields required for all kinds of coding systems. */
3062 coding
->symbol
= coding_system
;
3063 coding
->common_flags
= 0;
3065 coding
->heading_ascii
= -1;
3066 coding
->post_read_conversion
= coding
->pre_write_conversion
= Qnil
;
3067 coding
->composing
= COMPOSITION_DISABLED
;
3068 coding
->cmp_data
= NULL
;
3070 if (NILP (coding_system
))
3071 goto label_invalid_coding_system
;
3073 coding_spec
= Fget (coding_system
, Qcoding_system
);
3075 if (!VECTORP (coding_spec
)
3076 || XVECTOR (coding_spec
)->size
!= 5
3077 || !CONSP (XVECTOR (coding_spec
)->contents
[3]))
3078 goto label_invalid_coding_system
;
3080 eol_type
= inhibit_eol_conversion
? Qnil
: Fget (coding_system
, Qeol_type
);
3081 if (VECTORP (eol_type
))
3083 coding
->eol_type
= CODING_EOL_UNDECIDED
;
3084 coding
->common_flags
= CODING_REQUIRE_DETECTION_MASK
;
3086 else if (XFASTINT (eol_type
) == 1)
3088 coding
->eol_type
= CODING_EOL_CRLF
;
3089 coding
->common_flags
3090 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3092 else if (XFASTINT (eol_type
) == 2)
3094 coding
->eol_type
= CODING_EOL_CR
;
3095 coding
->common_flags
3096 = CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3099 coding
->eol_type
= CODING_EOL_LF
;
3101 coding_type
= XVECTOR (coding_spec
)->contents
[0];
3102 /* Try short cut. */
3103 if (SYMBOLP (coding_type
))
3105 if (EQ (coding_type
, Qt
))
3107 coding
->type
= coding_type_undecided
;
3108 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
3111 coding
->type
= coding_type_no_conversion
;
3115 /* Get values of coding system properties:
3116 `post-read-conversion', `pre-write-conversion',
3117 `translation-table-for-decode', `translation-table-for-encode'. */
3118 plist
= XVECTOR (coding_spec
)->contents
[3];
3119 /* Pre & post conversion functions should be disabled if
3120 inhibit_eol_conversion is nozero. This is the case that a code
3121 conversion function is called while those functions are running. */
3122 if (! inhibit_pre_post_conversion
)
3124 coding
->post_read_conversion
= Fplist_get (plist
, Qpost_read_conversion
);
3125 coding
->pre_write_conversion
= Fplist_get (plist
, Qpre_write_conversion
);
3127 val
= Fplist_get (plist
, Qtranslation_table_for_decode
);
3129 val
= Fget (val
, Qtranslation_table_for_decode
);
3130 coding
->translation_table_for_decode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
3131 val
= Fplist_get (plist
, Qtranslation_table_for_encode
);
3133 val
= Fget (val
, Qtranslation_table_for_encode
);
3134 coding
->translation_table_for_encode
= CHAR_TABLE_P (val
) ? val
: Qnil
;
3135 val
= Fplist_get (plist
, Qcoding_category
);
3138 val
= Fget (val
, Qcoding_category_index
);
3140 coding
->category_idx
= XINT (val
);
3142 goto label_invalid_coding_system
;
3145 goto label_invalid_coding_system
;
3147 val
= Fplist_get (plist
, Qsafe_charsets
);
3150 for (i
= 0; i
<= MAX_CHARSET
; i
++)
3151 coding
->safe_charsets
[i
] = 1;
3155 bzero (coding
->safe_charsets
, MAX_CHARSET
+ 1);
3158 if ((i
= get_charset_id (XCAR (val
))) >= 0)
3159 coding
->safe_charsets
[i
] = 1;
3164 /* If the coding system has non-nil `composition' property, enable
3165 composition handling. */
3166 val
= Fplist_get (plist
, Qcomposition
);
3168 coding
->composing
= COMPOSITION_NO
;
3170 switch (XFASTINT (coding_type
))
3173 coding
->type
= coding_type_emacs_mule
;
3174 if (!NILP (coding
->post_read_conversion
))
3175 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
3176 if (!NILP (coding
->pre_write_conversion
))
3177 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
3181 coding
->type
= coding_type_sjis
;
3182 coding
->common_flags
3183 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3187 coding
->type
= coding_type_iso2022
;
3188 coding
->common_flags
3189 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3191 Lisp_Object val
, temp
;
3193 int i
, charset
, reg_bits
= 0;
3195 val
= XVECTOR (coding_spec
)->contents
[4];
3197 if (!VECTORP (val
) || XVECTOR (val
)->size
!= 32)
3198 goto label_invalid_coding_system
;
3200 flags
= XVECTOR (val
)->contents
;
3202 = ((NILP (flags
[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM
)
3203 | (NILP (flags
[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL
)
3204 | (NILP (flags
[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL
)
3205 | (NILP (flags
[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS
)
3206 | (NILP (flags
[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT
)
3207 | (NILP (flags
[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT
)
3208 | (NILP (flags
[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN
)
3209 | (NILP (flags
[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS
)
3210 | (NILP (flags
[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION
)
3211 | (NILP (flags
[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL
)
3212 | (NILP (flags
[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
3213 | (NILP (flags
[15]) ? 0 : CODING_FLAG_ISO_SAFE
)
3214 | (NILP (flags
[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA
)
3217 /* Invoke graphic register 0 to plane 0. */
3218 CODING_SPEC_ISO_INVOCATION (coding
, 0) = 0;
3219 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
3220 CODING_SPEC_ISO_INVOCATION (coding
, 1)
3221 = (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
? -1 : 1);
3222 /* Not single shifting at first. */
3223 CODING_SPEC_ISO_SINGLE_SHIFTING (coding
) = 0;
3224 /* Beginning of buffer should also be regarded as bol. */
3225 CODING_SPEC_ISO_BOL (coding
) = 1;
3227 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3228 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = 255;
3229 val
= Vcharset_revision_alist
;
3232 charset
= get_charset_id (Fcar_safe (XCAR (val
)));
3234 && (temp
= Fcdr_safe (XCAR (val
)), INTEGERP (temp
))
3235 && (i
= XINT (temp
), (i
>= 0 && (i
+ '@') < 128)))
3236 CODING_SPEC_ISO_REVISION_NUMBER (coding
, charset
) = i
;
3240 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3241 FLAGS[REG] can be one of below:
3242 integer CHARSET: CHARSET occupies register I,
3243 t: designate nothing to REG initially, but can be used
3245 list of integer, nil, or t: designate the first
3246 element (if integer) to REG initially, the remaining
3247 elements (if integer) is designated to REG on request,
3248 if an element is t, REG can be used by any charsets,
3249 nil: REG is never used. */
3250 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3251 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3252 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION
;
3253 for (i
= 0; i
< 4; i
++)
3255 if (INTEGERP (flags
[i
])
3256 && (charset
= XINT (flags
[i
]), CHARSET_VALID_P (charset
))
3257 || (charset
= get_charset_id (flags
[i
])) >= 0)
3259 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3260 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) = i
;
3262 else if (EQ (flags
[i
], Qt
))
3264 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3266 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3268 else if (CONSP (flags
[i
]))
3273 coding
->flags
|= CODING_FLAG_ISO_DESIGNATION
;
3274 if (INTEGERP (XCAR (tail
))
3275 && (charset
= XINT (XCAR (tail
)),
3276 CHARSET_VALID_P (charset
))
3277 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3279 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = charset
;
3280 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
) =i
;
3283 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3285 while (CONSP (tail
))
3287 if (INTEGERP (XCAR (tail
))
3288 && (charset
= XINT (XCAR (tail
)),
3289 CHARSET_VALID_P (charset
))
3290 || (charset
= get_charset_id (XCAR (tail
))) >= 0)
3291 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3293 else if (EQ (XCAR (tail
), Qt
))
3299 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
) = -1;
3301 CODING_SPEC_ISO_DESIGNATION (coding
, i
)
3302 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, i
);
3305 if (reg_bits
&& ! (coding
->flags
& CODING_FLAG_ISO_LOCKING_SHIFT
))
3307 /* REG 1 can be used only by locking shift in 7-bit env. */
3308 if (coding
->flags
& CODING_FLAG_ISO_SEVEN_BITS
)
3310 if (! (coding
->flags
& CODING_FLAG_ISO_SINGLE_SHIFT
))
3311 /* Without any shifting, only REG 0 and 1 can be used. */
3316 for (charset
= 0; charset
<= MAX_CHARSET
; charset
++)
3318 if (CHARSET_VALID_P (charset
))
3320 /* There exist some default graphic registers to be
3323 /* We had better avoid designating a charset of
3324 CHARS96 to REG 0 as far as possible. */
3325 if (CHARSET_CHARS (charset
) == 96)
3326 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3328 ? 1 : (reg_bits
& 4 ? 2 : (reg_bits
& 8 ? 3 : 0)));
3330 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding
, charset
)
3332 ? 0 : (reg_bits
& 2 ? 1 : (reg_bits
& 4 ? 2 : 3)));
3336 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3337 coding
->spec
.iso2022
.last_invalid_designation_register
= -1;
3341 coding
->type
= coding_type_big5
;
3342 coding
->common_flags
3343 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3345 = (NILP (XVECTOR (coding_spec
)->contents
[4])
3346 ? CODING_FLAG_BIG5_HKU
3347 : CODING_FLAG_BIG5_ETEN
);
3351 coding
->type
= coding_type_ccl
;
3352 coding
->common_flags
3353 |= CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
;
3355 val
= XVECTOR (coding_spec
)->contents
[4];
3357 || setup_ccl_program (&(coding
->spec
.ccl
.decoder
),
3359 || setup_ccl_program (&(coding
->spec
.ccl
.encoder
),
3361 goto label_invalid_coding_system
;
3363 bzero (coding
->spec
.ccl
.valid_codes
, 256);
3364 val
= Fplist_get (plist
, Qvalid_codes
);
3369 for (; CONSP (val
); val
= XCDR (val
))
3373 && XINT (this) >= 0 && XINT (this) < 256)
3374 coding
->spec
.ccl
.valid_codes
[XINT (this)] = 1;
3375 else if (CONSP (this)
3376 && INTEGERP (XCAR (this))
3377 && INTEGERP (XCDR (this)))
3379 int start
= XINT (XCAR (this));
3380 int end
= XINT (XCDR (this));
3382 if (start
>= 0 && start
<= end
&& end
< 256)
3383 while (start
<= end
)
3384 coding
->spec
.ccl
.valid_codes
[start
++] = 1;
3389 coding
->common_flags
|= CODING_REQUIRE_FLUSHING_MASK
;
3393 coding
->type
= coding_type_raw_text
;
3397 goto label_invalid_coding_system
;
3401 label_invalid_coding_system
:
3402 coding
->type
= coding_type_no_conversion
;
3403 coding
->category_idx
= CODING_CATEGORY_IDX_BINARY
;
3404 coding
->common_flags
= 0;
3405 coding
->eol_type
= CODING_EOL_LF
;
3406 coding
->pre_write_conversion
= coding
->post_read_conversion
= Qnil
;
3410 /* Free memory blocks allocated for storing composition information. */
3413 coding_free_composition_data (coding
)
3414 struct coding_system
*coding
;
3416 struct composition_data
*cmp_data
= coding
->cmp_data
, *next
;
3420 /* Memory blocks are chained. At first, rewind to the first, then,
3421 free blocks one by one. */
3422 while (cmp_data
->prev
)
3423 cmp_data
= cmp_data
->prev
;
3426 next
= cmp_data
->next
;
3430 coding
->cmp_data
= NULL
;
3433 /* Set `char_offset' member of all memory blocks pointed by
3434 coding->cmp_data to POS. */
3437 coding_adjust_composition_offset (coding
, pos
)
3438 struct coding_system
*coding
;
3441 struct composition_data
*cmp_data
;
3443 for (cmp_data
= coding
->cmp_data
; cmp_data
; cmp_data
= cmp_data
->next
)
3444 cmp_data
->char_offset
= pos
;
3447 /* Setup raw-text or one of its subsidiaries in the structure
3448 coding_system CODING according to the already setup value eol_type
3449 in CODING. CODING should be setup for some coding system in
3453 setup_raw_text_coding_system (coding
)
3454 struct coding_system
*coding
;
3456 if (coding
->type
!= coding_type_raw_text
)
3458 coding
->symbol
= Qraw_text
;
3459 coding
->type
= coding_type_raw_text
;
3460 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3462 Lisp_Object subsidiaries
;
3463 subsidiaries
= Fget (Qraw_text
, Qeol_type
);
3465 if (VECTORP (subsidiaries
)
3466 && XVECTOR (subsidiaries
)->size
== 3)
3468 = XVECTOR (subsidiaries
)->contents
[coding
->eol_type
];
3470 setup_coding_system (coding
->symbol
, coding
);
3475 /* Emacs has a mechanism to automatically detect a coding system if it
3476 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3477 it's impossible to distinguish some coding systems accurately
3478 because they use the same range of codes. So, at first, coding
3479 systems are categorized into 7, those are:
3481 o coding-category-emacs-mule
3483 The category for a coding system which has the same code range
3484 as Emacs' internal format. Assigned the coding-system (Lisp
3485 symbol) `emacs-mule' by default.
3487 o coding-category-sjis
3489 The category for a coding system which has the same code range
3490 as SJIS. Assigned the coding-system (Lisp
3491 symbol) `japanese-shift-jis' by default.
3493 o coding-category-iso-7
3495 The category for a coding system which has the same code range
3496 as ISO2022 of 7-bit environment. This doesn't use any locking
3497 shift and single shift functions. This can encode/decode all
3498 charsets. Assigned the coding-system (Lisp symbol)
3499 `iso-2022-7bit' by default.
3501 o coding-category-iso-7-tight
3503 Same as coding-category-iso-7 except that this can
3504 encode/decode only the specified charsets.
3506 o coding-category-iso-8-1
3508 The category for a coding system which has the same code range
3509 as ISO2022 of 8-bit environment and graphic plane 1 used only
3510 for DIMENSION1 charset. This doesn't use any locking shift
3511 and single shift functions. Assigned the coding-system (Lisp
3512 symbol) `iso-latin-1' by default.
3514 o coding-category-iso-8-2
3516 The category for a coding system which has the same code range
3517 as ISO2022 of 8-bit environment and graphic plane 1 used only
3518 for DIMENSION2 charset. This doesn't use any locking shift
3519 and single shift functions. Assigned the coding-system (Lisp
3520 symbol) `japanese-iso-8bit' by default.
3522 o coding-category-iso-7-else
3524 The category for a coding system which has the same code range
3525 as ISO2022 of 7-bit environemnt but uses locking shift or
3526 single shift functions. Assigned the coding-system (Lisp
3527 symbol) `iso-2022-7bit-lock' by default.
3529 o coding-category-iso-8-else
3531 The category for a coding system which has the same code range
3532 as ISO2022 of 8-bit environemnt but uses locking shift or
3533 single shift functions. Assigned the coding-system (Lisp
3534 symbol) `iso-2022-8bit-ss2' by default.
3536 o coding-category-big5
3538 The category for a coding system which has the same code range
3539 as BIG5. Assigned the coding-system (Lisp symbol)
3540 `cn-big5' by default.
3542 o coding-category-utf-8
3544 The category for a coding system which has the same code range
3545 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
3546 symbol) `utf-8' by default.
3548 o coding-category-utf-16-be
3550 The category for a coding system in which a text has an
3551 Unicode signature (cf. Unicode Standard) in the order of BIG
3552 endian at the head. Assigned the coding-system (Lisp symbol)
3553 `utf-16-be' by default.
3555 o coding-category-utf-16-le
3557 The category for a coding system in which a text has an
3558 Unicode signature (cf. Unicode Standard) in the order of
3559 LITTLE endian at the head. Assigned the coding-system (Lisp
3560 symbol) `utf-16-le' by default.
3562 o coding-category-ccl
3564 The category for a coding system of which encoder/decoder is
3565 written in CCL programs. The default value is nil, i.e., no
3566 coding system is assigned.
3568 o coding-category-binary
3570 The category for a coding system not categorized in any of the
3571 above. Assigned the coding-system (Lisp symbol)
3572 `no-conversion' by default.
3574 Each of them is a Lisp symbol and the value is an actual
3575 `coding-system's (this is also a Lisp symbol) assigned by a user.
3576 What Emacs does actually is to detect a category of coding system.
3577 Then, it uses a `coding-system' assigned to it. If Emacs can't
3578 decide only one possible category, it selects a category of the
3579 highest priority. Priorities of categories are also specified by a
3580 user in a Lisp variable `coding-category-list'.
3585 int ascii_skip_code
[256];
3587 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3588 If it detects possible coding systems, return an integer in which
3589 appropriate flag bits are set. Flag bits are defined by macros
3590 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3591 it should point the table `coding_priorities'. In that case, only
3592 the flag bit for a coding system of the highest priority is set in
3595 How many ASCII characters are at the head is returned as *SKIP. */
3598 detect_coding_mask (source
, src_bytes
, priorities
, skip
)
3599 unsigned char *source
;
3600 int src_bytes
, *priorities
, *skip
;
3602 register unsigned char c
;
3603 unsigned char *src
= source
, *src_end
= source
+ src_bytes
;
3604 unsigned int mask
, utf16_examined_p
, iso2022_examined_p
;
3607 /* At first, skip all ASCII characters and control characters except
3608 for three ISO2022 specific control characters. */
3609 ascii_skip_code
[ISO_CODE_SO
] = 0;
3610 ascii_skip_code
[ISO_CODE_SI
] = 0;
3611 ascii_skip_code
[ISO_CODE_ESC
] = 0;
3613 label_loop_detect_coding
:
3614 while (src
< src_end
&& ascii_skip_code
[*src
]) src
++;
3615 *skip
= src
- source
;
3618 /* We found nothing other than ASCII. There's nothing to do. */
3622 /* The text seems to be encoded in some multilingual coding system.
3623 Now, try to find in which coding system the text is encoded. */
3626 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3627 /* C is an ISO2022 specific control code of C0. */
3628 mask
= detect_coding_iso2022 (src
, src_end
);
3631 /* No valid ISO2022 code follows C. Try again. */
3633 if (c
== ISO_CODE_ESC
)
3634 ascii_skip_code
[ISO_CODE_ESC
] = 1;
3636 ascii_skip_code
[ISO_CODE_SO
] = ascii_skip_code
[ISO_CODE_SI
] = 1;
3637 goto label_loop_detect_coding
;
3641 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3643 if (mask
& priorities
[i
])
3644 return priorities
[i
];
3646 return CODING_CATEGORY_MASK_RAW_TEXT
;
3655 /* C is the first byte of SJIS character code,
3656 or a leading-code of Emacs' internal format (emacs-mule),
3657 or the first byte of UTF-16. */
3658 try = (CODING_CATEGORY_MASK_SJIS
3659 | CODING_CATEGORY_MASK_EMACS_MULE
3660 | CODING_CATEGORY_MASK_UTF_16_BE
3661 | CODING_CATEGORY_MASK_UTF_16_LE
);
3663 /* Or, if C is a special latin extra code,
3664 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3665 or is an ISO2022 control-sequence-introducer (CSI),
3666 we should also consider the possibility of ISO2022 codings. */
3667 if ((VECTORP (Vlatin_extra_code_table
)
3668 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
3669 || (c
== ISO_CODE_SS2
|| c
== ISO_CODE_SS3
)
3670 || (c
== ISO_CODE_CSI
3673 || ((*src
== '0' || *src
== '1' || *src
== '2')
3674 && src
+ 1 < src_end
3675 && src
[1] == ']')))))
3676 try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3677 | CODING_CATEGORY_MASK_ISO_8BIT
);
3680 /* C is a character of ISO2022 in graphic plane right,
3681 or a SJIS's 1-byte character code (i.e. JISX0201),
3682 or the first byte of BIG5's 2-byte code,
3683 or the first byte of UTF-8/16. */
3684 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3685 | CODING_CATEGORY_MASK_ISO_8BIT
3686 | CODING_CATEGORY_MASK_SJIS
3687 | CODING_CATEGORY_MASK_BIG5
3688 | CODING_CATEGORY_MASK_UTF_8
3689 | CODING_CATEGORY_MASK_UTF_16_BE
3690 | CODING_CATEGORY_MASK_UTF_16_LE
);
3692 /* Or, we may have to consider the possibility of CCL. */
3693 if (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3694 && (coding_system_table
[CODING_CATEGORY_IDX_CCL
]
3695 ->spec
.ccl
.valid_codes
)[c
])
3696 try |= CODING_CATEGORY_MASK_CCL
;
3699 utf16_examined_p
= iso2022_examined_p
= 0;
3702 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
3704 if (!iso2022_examined_p
3705 && (priorities
[i
] & try & CODING_CATEGORY_MASK_ISO
))
3707 mask
|= detect_coding_iso2022 (src
, src_end
);
3708 iso2022_examined_p
= 1;
3710 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_SJIS
)
3711 mask
|= detect_coding_sjis (src
, src_end
);
3712 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_UTF_8
)
3713 mask
|= detect_coding_utf_8 (src
, src_end
);
3714 else if (!utf16_examined_p
3715 && (priorities
[i
] & try &
3716 CODING_CATEGORY_MASK_UTF_16_BE_LE
))
3718 mask
|= detect_coding_utf_16 (src
, src_end
);
3719 utf16_examined_p
= 1;
3721 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_BIG5
)
3722 mask
|= detect_coding_big5 (src
, src_end
);
3723 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_EMACS_MULE
)
3724 mask
|= detect_coding_emacs_mule (src
, src_end
);
3725 else if (priorities
[i
] & try & CODING_CATEGORY_MASK_CCL
)
3726 mask
|= detect_coding_ccl (src
, src_end
);
3727 else if (priorities
[i
] & CODING_CATEGORY_MASK_RAW_TEXT
)
3728 mask
|= CODING_CATEGORY_MASK_RAW_TEXT
;
3729 else if (priorities
[i
] & CODING_CATEGORY_MASK_BINARY
)
3730 mask
|= CODING_CATEGORY_MASK_BINARY
;
3731 if (mask
& priorities
[i
])
3732 return priorities
[i
];
3734 return CODING_CATEGORY_MASK_RAW_TEXT
;
3736 if (try & CODING_CATEGORY_MASK_ISO
)
3737 mask
|= detect_coding_iso2022 (src
, src_end
);
3738 if (try & CODING_CATEGORY_MASK_SJIS
)
3739 mask
|= detect_coding_sjis (src
, src_end
);
3740 if (try & CODING_CATEGORY_MASK_BIG5
)
3741 mask
|= detect_coding_big5 (src
, src_end
);
3742 if (try & CODING_CATEGORY_MASK_UTF_8
)
3743 mask
|= detect_coding_utf_8 (src
, src_end
);
3744 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE
)
3745 mask
|= detect_coding_utf_16 (src
, src_end
);
3746 if (try & CODING_CATEGORY_MASK_EMACS_MULE
)
3747 mask
|= detect_coding_emacs_mule (src
, src_end
);
3748 if (try & CODING_CATEGORY_MASK_CCL
)
3749 mask
|= detect_coding_ccl (src
, src_end
);
3751 return (mask
| CODING_CATEGORY_MASK_RAW_TEXT
| CODING_CATEGORY_MASK_BINARY
);
3754 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3755 The information of the detected coding system is set in CODING. */
3758 detect_coding (coding
, src
, src_bytes
)
3759 struct coding_system
*coding
;
3767 val
= Vcoding_category_list
;
3768 mask
= detect_coding_mask (src
, src_bytes
, coding_priorities
, &skip
);
3769 coding
->heading_ascii
= skip
;
3773 /* We found a single coding system of the highest priority in MASK. */
3775 while (mask
&& ! (mask
& 1)) mask
>>= 1, idx
++;
3777 idx
= CODING_CATEGORY_IDX_RAW_TEXT
;
3779 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[idx
])->value
;
3781 if (coding
->eol_type
!= CODING_EOL_UNDECIDED
)
3785 tmp
= Fget (val
, Qeol_type
);
3787 val
= XVECTOR (tmp
)->contents
[coding
->eol_type
];
3789 setup_coding_system (val
, coding
);
3790 /* Set this again because setup_coding_system reset this member. */
3791 coding
->heading_ascii
= skip
;
3794 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3795 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3796 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3798 How many non-eol characters are at the head is returned as *SKIP. */
3800 #define MAX_EOL_CHECK_COUNT 3
3803 detect_eol_type (source
, src_bytes
, skip
)
3804 unsigned char *source
;
3805 int src_bytes
, *skip
;
3807 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3809 int total
= 0; /* How many end-of-lines are found so far. */
3810 int eol_type
= CODING_EOL_UNDECIDED
;
3815 while (src
< src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3818 if (c
== '\n' || c
== '\r')
3821 *skip
= src
- 1 - source
;
3824 this_eol_type
= CODING_EOL_LF
;
3825 else if (src
>= src_end
|| *src
!= '\n')
3826 this_eol_type
= CODING_EOL_CR
;
3828 this_eol_type
= CODING_EOL_CRLF
, src
++;
3830 if (eol_type
== CODING_EOL_UNDECIDED
)
3831 /* This is the first end-of-line. */
3832 eol_type
= this_eol_type
;
3833 else if (eol_type
!= this_eol_type
)
3835 /* The found type is different from what found before. */
3836 eol_type
= CODING_EOL_INCONSISTENT
;
3843 *skip
= src_end
- source
;
3847 /* Like detect_eol_type, but detect EOL type in 2-octet
3848 big-endian/little-endian format for coding systems utf-16-be and
3852 detect_eol_type_in_2_octet_form (source
, src_bytes
, skip
, big_endian_p
)
3853 unsigned char *source
;
3854 int src_bytes
, *skip
;
3856 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
3857 unsigned int c1
, c2
;
3858 int total
= 0; /* How many end-of-lines are found so far. */
3859 int eol_type
= CODING_EOL_UNDECIDED
;
3870 while ((src
+ 1) < src_end
&& total
< MAX_EOL_CHECK_COUNT
)
3872 c1
= (src
[msb
] << 8) | (src
[lsb
]);
3875 if (c1
== '\n' || c1
== '\r')
3878 *skip
= src
- 2 - source
;
3882 this_eol_type
= CODING_EOL_LF
;
3886 if ((src
+ 1) >= src_end
)
3888 this_eol_type
= CODING_EOL_CR
;
3892 c2
= (src
[msb
] << 8) | (src
[lsb
]);
3894 this_eol_type
= CODING_EOL_CRLF
, src
+= 2;
3896 this_eol_type
= CODING_EOL_CR
;
3900 if (eol_type
== CODING_EOL_UNDECIDED
)
3901 /* This is the first end-of-line. */
3902 eol_type
= this_eol_type
;
3903 else if (eol_type
!= this_eol_type
)
3905 /* The found type is different from what found before. */
3906 eol_type
= CODING_EOL_INCONSISTENT
;
3913 *skip
= src_end
- source
;
3917 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3918 is encoded. If it detects an appropriate format of end-of-line, it
3919 sets the information in *CODING. */
3922 detect_eol (coding
, src
, src_bytes
)
3923 struct coding_system
*coding
;
3931 switch (coding
->category_idx
)
3933 case CODING_CATEGORY_IDX_UTF_16_BE
:
3934 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 1);
3936 case CODING_CATEGORY_IDX_UTF_16_LE
:
3937 eol_type
= detect_eol_type_in_2_octet_form (src
, src_bytes
, &skip
, 0);
3940 eol_type
= detect_eol_type (src
, src_bytes
, &skip
);
3944 if (coding
->heading_ascii
> skip
)
3945 coding
->heading_ascii
= skip
;
3947 skip
= coding
->heading_ascii
;
3949 if (eol_type
== CODING_EOL_UNDECIDED
)
3951 if (eol_type
== CODING_EOL_INCONSISTENT
)
3954 /* This code is suppressed until we find a better way to
3955 distinguish raw text file and binary file. */
3957 /* If we have already detected that the coding is raw-text, the
3958 coding should actually be no-conversion. */
3959 if (coding
->type
== coding_type_raw_text
)
3961 setup_coding_system (Qno_conversion
, coding
);
3964 /* Else, let's decode only text code anyway. */
3966 eol_type
= CODING_EOL_LF
;
3969 val
= Fget (coding
->symbol
, Qeol_type
);
3970 if (VECTORP (val
) && XVECTOR (val
)->size
== 3)
3972 setup_coding_system (XVECTOR (val
)->contents
[eol_type
], coding
);
3973 coding
->heading_ascii
= skip
;
3977 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3979 #define DECODING_BUFFER_MAG(coding) \
3980 (coding->type == coding_type_iso2022 \
3982 : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3984 : (coding->type == coding_type_raw_text \
3986 : (coding->type == coding_type_ccl \
3987 ? coding->spec.ccl.decoder.buf_magnification \
3990 /* Return maximum size (bytes) of a buffer enough for decoding
3991 SRC_BYTES of text encoded in CODING. */
3994 decoding_buffer_size (coding
, src_bytes
)
3995 struct coding_system
*coding
;
3998 return (src_bytes
* DECODING_BUFFER_MAG (coding
)
3999 + CONVERSION_BUFFER_EXTRA_ROOM
);
4002 /* Return maximum size (bytes) of a buffer enough for encoding
4003 SRC_BYTES of text to CODING. */
4006 encoding_buffer_size (coding
, src_bytes
)
4007 struct coding_system
*coding
;
4012 if (coding
->type
== coding_type_ccl
)
4013 magnification
= coding
->spec
.ccl
.encoder
.buf_magnification
;
4017 return (src_bytes
* magnification
+ CONVERSION_BUFFER_EXTRA_ROOM
);
4020 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
4021 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
4024 char *conversion_buffer
;
4025 int conversion_buffer_size
;
4027 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
4028 or decoding. Sufficient memory is allocated automatically. If we
4029 run out of memory, return NULL. */
4032 get_conversion_buffer (size
)
4035 if (size
> conversion_buffer_size
)
4038 int real_size
= conversion_buffer_size
* 2;
4040 while (real_size
< size
) real_size
*= 2;
4041 buf
= (char *) xmalloc (real_size
);
4042 xfree (conversion_buffer
);
4043 conversion_buffer
= buf
;
4044 conversion_buffer_size
= real_size
;
4046 return conversion_buffer
;
4050 ccl_coding_driver (coding
, source
, destination
, src_bytes
, dst_bytes
, encodep
)
4051 struct coding_system
*coding
;
4052 unsigned char *source
, *destination
;
4053 int src_bytes
, dst_bytes
, encodep
;
4055 struct ccl_program
*ccl
4056 = encodep
? &coding
->spec
.ccl
.encoder
: &coding
->spec
.ccl
.decoder
;
4059 ccl
->last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4061 coding
->produced
= ccl_driver (ccl
, source
, destination
,
4062 src_bytes
, dst_bytes
, &(coding
->consumed
));
4063 coding
->produced_char
4066 : multibyte_chars_in_text (destination
, coding
->produced
));
4067 coding
->consumed_char
4068 = multibyte_chars_in_text (source
, coding
->consumed
);
4070 switch (ccl
->status
)
4072 case CCL_STAT_SUSPEND_BY_SRC
:
4073 result
= CODING_FINISH_INSUFFICIENT_SRC
;
4075 case CCL_STAT_SUSPEND_BY_DST
:
4076 result
= CODING_FINISH_INSUFFICIENT_DST
;
4079 case CCL_STAT_INVALID_CMD
:
4080 result
= CODING_FINISH_INTERRUPT
;
4083 result
= CODING_FINISH_NORMAL
;
4089 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
4090 decoding, it may detect coding system and format of end-of-line if
4091 those are not yet decided.
4093 This function does not make full use of DESTINATION buffer. For
4094 instance, if coding->type is coding_type_iso2022, it uses only
4095 (DST_BYTES - 7) bytes of DESTINATION buffer. In the case that
4096 DST_BYTES is decided by the function decoding_buffer_size, it
4097 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
4098 So, this function can decode the full SOURCE. But, in the other
4099 case, if you want to avoid carry over, you must supply at least 7
4100 bytes more area in DESTINATION buffer than expected maximum bytes
4101 that will be produced by this function. */
4104 decode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
4105 struct coding_system
*coding
;
4106 unsigned char *source
, *destination
;
4107 int src_bytes
, dst_bytes
;
4112 && coding
->type
!= coding_type_ccl
4113 && ! (coding
->mode
& CODING_MODE_LAST_BLOCK
4114 && CODING_REQUIRE_FLUSHING (coding
)))
4116 coding
->produced
= coding
->produced_char
= 0;
4117 coding
->consumed
= coding
->consumed_char
= 0;
4118 coding
->fake_multibyte
= 0;
4119 return CODING_FINISH_NORMAL
;
4122 if (coding
->type
== coding_type_undecided
)
4123 detect_coding (coding
, source
, src_bytes
);
4125 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4126 detect_eol (coding
, source
, src_bytes
);
4128 switch (coding
->type
)
4130 case coding_type_emacs_mule
:
4131 case coding_type_undecided
:
4132 case coding_type_raw_text
:
4133 if (coding
->eol_type
== CODING_EOL_LF
4134 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
4135 goto label_no_conversion
;
4136 result
= decode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
4139 case coding_type_sjis
:
4140 result
= decode_coding_sjis_big5 (coding
, source
, destination
,
4141 src_bytes
, dst_bytes
, 1);
4144 case coding_type_iso2022
:
4145 result
= decode_coding_iso2022 (coding
, source
, destination
,
4146 src_bytes
, dst_bytes
);
4149 case coding_type_big5
:
4150 result
= decode_coding_sjis_big5 (coding
, source
, destination
,
4151 src_bytes
, dst_bytes
, 0);
4154 case coding_type_ccl
:
4155 result
= ccl_coding_driver (coding
, source
, destination
,
4156 src_bytes
, dst_bytes
, 0);
4159 default: /* i.e. case coding_type_no_conversion: */
4160 label_no_conversion
:
4161 if (dst_bytes
&& src_bytes
> dst_bytes
)
4163 coding
->produced
= dst_bytes
;
4164 result
= CODING_FINISH_INSUFFICIENT_DST
;
4168 coding
->produced
= src_bytes
;
4169 result
= CODING_FINISH_NORMAL
;
4172 bcopy (source
, destination
, coding
->produced
);
4174 safe_bcopy (source
, destination
, coding
->produced
);
4175 coding
->fake_multibyte
= 1;
4177 = coding
->consumed_char
= coding
->produced_char
= coding
->produced
;
4184 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
4186 This function does not make full use of DESTINATION buffer. For
4187 instance, if coding->type is coding_type_iso2022, it uses only
4188 (DST_BYTES - 20) bytes of DESTINATION buffer. In the case that
4189 DST_BYTES is decided by the function encoding_buffer_size, it
4190 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
4191 So, this function can encode the full SOURCE. But, in the other
4192 case, if you want to avoid carry over, you must supply at least 20
4193 bytes more area in DESTINATION buffer than expected maximum bytes
4194 that will be produced by this function. */
4197 encode_coding (coding
, source
, destination
, src_bytes
, dst_bytes
)
4198 struct coding_system
*coding
;
4199 unsigned char *source
, *destination
;
4200 int src_bytes
, dst_bytes
;
4205 && ! (coding
->mode
& CODING_MODE_LAST_BLOCK
4206 && CODING_REQUIRE_FLUSHING (coding
)))
4208 coding
->produced
= coding
->produced_char
= 0;
4209 coding
->consumed
= coding
->consumed_char
= 0;
4210 coding
->fake_multibyte
= 0;
4211 return CODING_FINISH_NORMAL
;
4214 switch (coding
->type
)
4216 case coding_type_emacs_mule
:
4217 case coding_type_undecided
:
4218 case coding_type_raw_text
:
4219 if (coding
->eol_type
== CODING_EOL_LF
4220 || coding
->eol_type
== CODING_EOL_UNDECIDED
)
4221 goto label_no_conversion
;
4222 result
= encode_eol (coding
, source
, destination
, src_bytes
, dst_bytes
);
4225 case coding_type_sjis
:
4226 result
= encode_coding_sjis_big5 (coding
, source
, destination
,
4227 src_bytes
, dst_bytes
, 1);
4230 case coding_type_iso2022
:
4231 result
= encode_coding_iso2022 (coding
, source
, destination
,
4232 src_bytes
, dst_bytes
);
4235 case coding_type_big5
:
4236 result
= encode_coding_sjis_big5 (coding
, source
, destination
,
4237 src_bytes
, dst_bytes
, 0);
4240 case coding_type_ccl
:
4241 result
= ccl_coding_driver (coding
, source
, destination
,
4242 src_bytes
, dst_bytes
, 1);
4245 default: /* i.e. case coding_type_no_conversion: */
4246 label_no_conversion
:
4247 if (dst_bytes
&& src_bytes
> dst_bytes
)
4249 coding
->produced
= dst_bytes
;
4250 result
= CODING_FINISH_INSUFFICIENT_DST
;
4254 coding
->produced
= src_bytes
;
4255 result
= CODING_FINISH_NORMAL
;
4258 bcopy (source
, destination
, coding
->produced
);
4260 safe_bcopy (source
, destination
, coding
->produced
);
4261 if (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
)
4263 unsigned char *p
= destination
, *pend
= p
+ coding
->produced
;
4265 if (*p
++ == '\015') p
[-1] = '\n';
4267 coding
->fake_multibyte
= 1;
4269 = coding
->consumed_char
= coding
->produced_char
= coding
->produced
;
4276 /* Scan text in the region between *BEG and *END (byte positions),
4277 skip characters which we don't have to decode by coding system
4278 CODING at the head and tail, then set *BEG and *END to the region
4279 of the text we actually have to convert. The caller should move
4280 the gap out of the region in advance.
4282 If STR is not NULL, *BEG and *END are indices into STR. */
4285 shrink_decoding_region (beg
, end
, coding
, str
)
4287 struct coding_system
*coding
;
4290 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
, c
;
4292 Lisp_Object translation_table
;
4294 if (coding
->type
== coding_type_ccl
4295 || coding
->type
== coding_type_undecided
4296 || !NILP (coding
->post_read_conversion
))
4298 /* We can't skip any data. */
4301 else if (coding
->type
== coding_type_no_conversion
)
4303 /* We need no conversion, but don't have to skip any data here.
4304 Decoding routine handles them effectively anyway. */
4308 translation_table
= coding
->translation_table_for_decode
;
4309 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4310 translation_table
= Vstandard_translation_table_for_decode
;
4311 if (CHAR_TABLE_P (translation_table
))
4314 for (i
= 0; i
< 128; i
++)
4315 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4318 /* Some ASCII character should be tranlsated. We give up
4323 eol_conversion
= (coding
->eol_type
!= CODING_EOL_LF
);
4325 if ((! eol_conversion
) && (coding
->heading_ascii
>= 0))
4326 /* Detection routine has already found how much we can skip at the
4328 *beg
+= coding
->heading_ascii
;
4332 begp_orig
= begp
= str
+ *beg
;
4333 endp_orig
= endp
= str
+ *end
;
4337 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4338 endp_orig
= endp
= begp
+ *end
- *beg
;
4341 switch (coding
->type
)
4343 case coding_type_emacs_mule
:
4344 case coding_type_raw_text
:
4347 if (coding
->heading_ascii
< 0)
4348 while (begp
< endp
&& *begp
!= '\r' && *begp
< 0x80) begp
++;
4349 while (begp
< endp
&& endp
[-1] != '\r' && endp
[-1] < 0x80)
4351 /* Do not consider LF as ascii if preceded by CR, since that
4352 confuses eol decoding. */
4353 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4360 case coding_type_sjis
:
4361 case coding_type_big5
:
4362 /* We can skip all ASCII characters at the head. */
4363 if (coding
->heading_ascii
< 0)
4366 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\r') begp
++;
4368 while (begp
< endp
&& *begp
< 0x80) begp
++;
4370 /* We can skip all ASCII characters at the tail except for the
4371 second byte of SJIS or BIG5 code. */
4373 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\r') endp
--;
4375 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4376 /* Do not consider LF as ascii if preceded by CR, since that
4377 confuses eol decoding. */
4378 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4380 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] >= 0x80)
4384 default: /* i.e. case coding_type_iso2022: */
4385 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4386 /* We can't skip any data. */
4388 if (coding
->heading_ascii
< 0)
4390 /* We can skip all ASCII characters at the head except for a
4391 few control codes. */
4392 while (begp
< endp
&& (c
= *begp
) < 0x80
4393 && c
!= ISO_CODE_CR
&& c
!= ISO_CODE_SO
4394 && c
!= ISO_CODE_SI
&& c
!= ISO_CODE_ESC
4395 && (!eol_conversion
|| c
!= ISO_CODE_LF
))
4398 switch (coding
->category_idx
)
4400 case CODING_CATEGORY_IDX_ISO_8_1
:
4401 case CODING_CATEGORY_IDX_ISO_8_2
:
4402 /* We can skip all ASCII characters at the tail. */
4404 while (begp
< endp
&& (c
= endp
[-1]) < 0x80 && c
!= '\r') endp
--;
4406 while (begp
< endp
&& endp
[-1] < 0x80) endp
--;
4407 /* Do not consider LF as ascii if preceded by CR, since that
4408 confuses eol decoding. */
4409 if (begp
< endp
&& endp
< endp_orig
&& endp
[-1] == '\r' && endp
[0] == '\n')
4413 case CODING_CATEGORY_IDX_ISO_7
:
4414 case CODING_CATEGORY_IDX_ISO_7_TIGHT
:
4416 /* We can skip all charactes at the tail except for 8-bit
4417 codes and ESC and the following 2-byte at the tail. */
4418 unsigned char *eight_bit
= NULL
;
4422 && (c
= endp
[-1]) != ISO_CODE_ESC
&& c
!= '\r')
4424 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4429 && (c
= endp
[-1]) != ISO_CODE_ESC
)
4431 if (!eight_bit
&& c
& 0x80) eight_bit
= endp
;
4434 /* Do not consider LF as ascii if preceded by CR, since that
4435 confuses eol decoding. */
4436 if (begp
< endp
&& endp
< endp_orig
4437 && endp
[-1] == '\r' && endp
[0] == '\n')
4439 if (begp
< endp
&& endp
[-1] == ISO_CODE_ESC
)
4441 if (endp
+ 1 < endp_orig
&& end
[0] == '(' && end
[1] == 'B')
4442 /* This is an ASCII designation sequence. We can
4443 surely skip the tail. But, if we have
4444 encountered an 8-bit code, skip only the codes
4446 endp
= eight_bit
? eight_bit
: endp
+ 2;
4448 /* Hmmm, we can't skip the tail. */
4456 *beg
+= begp
- begp_orig
;
4457 *end
+= endp
- endp_orig
;
4461 /* Like shrink_decoding_region but for encoding. */
4464 shrink_encoding_region (beg
, end
, coding
, str
)
4466 struct coding_system
*coding
;
4469 unsigned char *begp_orig
, *begp
, *endp_orig
, *endp
;
4471 Lisp_Object translation_table
;
4473 if (coding
->type
== coding_type_ccl
)
4474 /* We can't skip any data. */
4476 else if (coding
->type
== coding_type_no_conversion
)
4478 /* We need no conversion. */
4483 translation_table
= coding
->translation_table_for_encode
;
4484 if (NILP (translation_table
) && !NILP (Venable_character_translation
))
4485 translation_table
= Vstandard_translation_table_for_encode
;
4486 if (CHAR_TABLE_P (translation_table
))
4489 for (i
= 0; i
< 128; i
++)
4490 if (!NILP (CHAR_TABLE_REF (translation_table
, i
)))
4493 /* Some ASCII character should be tranlsated. We give up
4500 begp_orig
= begp
= str
+ *beg
;
4501 endp_orig
= endp
= str
+ *end
;
4505 begp_orig
= begp
= BYTE_POS_ADDR (*beg
);
4506 endp_orig
= endp
= begp
+ *end
- *beg
;
4509 eol_conversion
= (coding
->eol_type
== CODING_EOL_CR
4510 || coding
->eol_type
== CODING_EOL_CRLF
);
4512 /* Here, we don't have to check coding->pre_write_conversion because
4513 the caller is expected to have handled it already. */
4514 switch (coding
->type
)
4516 case coding_type_undecided
:
4517 case coding_type_emacs_mule
:
4518 case coding_type_raw_text
:
4521 while (begp
< endp
&& *begp
!= '\n') begp
++;
4522 while (begp
< endp
&& endp
[-1] != '\n') endp
--;
4528 case coding_type_iso2022
:
4529 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding
, 0) != CHARSET_ASCII
)
4530 /* We can't skip any data. */
4532 if (coding
->flags
& CODING_FLAG_ISO_DESIGNATE_AT_BOL
)
4534 unsigned char *bol
= begp
;
4535 while (begp
< endp
&& *begp
< 0x80)
4538 if (begp
[-1] == '\n')
4542 goto label_skip_tail
;
4547 /* We can skip all ASCII characters at the head and tail. */
4549 while (begp
< endp
&& *begp
< 0x80 && *begp
!= '\n') begp
++;
4551 while (begp
< endp
&& *begp
< 0x80) begp
++;
4554 while (begp
< endp
&& endp
[-1] < 0x80 && endp
[-1] != '\n') endp
--;
4556 while (begp
< endp
&& *(endp
- 1) < 0x80) endp
--;
4560 *beg
+= begp
- begp_orig
;
4561 *end
+= endp
- endp_orig
;
4565 /* As shrinking conversion region requires some overhead, we don't try
4566 shrinking if the length of conversion region is less than this
4568 static int shrink_conversion_region_threshhold
= 1024;
4570 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \
4572 if (*(end) - *(beg) > shrink_conversion_region_threshhold) \
4574 if (encodep) shrink_encoding_region (beg, end, coding, str); \
4575 else shrink_decoding_region (beg, end, coding, str); \
4580 code_convert_region_unwind (dummy
)
4583 inhibit_pre_post_conversion
= 0;
4587 /* Store information about all compositions in the range FROM and TO
4588 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
4589 buffer or a string, defaults to the current buffer. */
4592 coding_save_composition (coding
, from
, to
, obj
)
4593 struct coding_system
*coding
;
4600 if (coding
->composing
== COMPOSITION_DISABLED
)
4602 if (!coding
->cmp_data
)
4603 coding_allocate_composition_data (coding
, from
);
4604 if (!find_composition (from
, to
, &start
, &end
, &prop
, obj
)
4608 && (!find_composition (end
, to
, &start
, &end
, &prop
, obj
)
4611 coding
->composing
= COMPOSITION_NO
;
4614 if (COMPOSITION_VALID_P (start
, end
, prop
))
4616 enum composition_method method
= COMPOSITION_METHOD (prop
);
4617 if (coding
->cmp_data
->used
+ COMPOSITION_DATA_MAX_BUNCH_LENGTH
4618 >= COMPOSITION_DATA_SIZE
)
4619 coding_allocate_composition_data (coding
, from
);
4620 /* For relative composition, we remember start and end
4621 positions, for the other compositions, we also remember
4623 CODING_ADD_COMPOSITION_START (coding
, start
- from
, method
);
4624 if (method
!= COMPOSITION_RELATIVE
)
4626 /* We must store a*/
4627 Lisp_Object val
, ch
;
4629 val
= COMPOSITION_COMPONENTS (prop
);
4633 ch
= XCAR (val
), val
= XCDR (val
);
4634 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4636 else if (VECTORP (val
) || STRINGP (val
))
4638 int len
= (VECTORP (val
)
4639 ? XVECTOR (val
)->size
: XSTRING (val
)->size
);
4641 for (i
= 0; i
< len
; i
++)
4644 ? Faref (val
, make_number (i
))
4645 : XVECTOR (val
)->contents
[i
]);
4646 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (ch
));
4649 else /* INTEGERP (val) */
4650 CODING_ADD_COMPOSITION_COMPONENT (coding
, XINT (val
));
4652 CODING_ADD_COMPOSITION_END (coding
, end
- from
);
4657 && find_composition (start
, to
, &start
, &end
, &prop
, obj
)
4660 /* Make coding->cmp_data point to the first memory block. */
4661 while (coding
->cmp_data
->prev
)
4662 coding
->cmp_data
= coding
->cmp_data
->prev
;
4663 coding
->cmp_data_start
= 0;
4666 /* Reflect the saved information about compositions to OBJ.
4667 CODING->cmp_data points to a memory block for the informaiton. OBJ
4668 is a buffer or a string, defaults to the current buffer. */
4671 coding_restore_composition (coding
, obj
)
4672 struct coding_system
*coding
;
4675 struct composition_data
*cmp_data
= coding
->cmp_data
;
4680 while (cmp_data
->prev
)
4681 cmp_data
= cmp_data
->prev
;
4687 for (i
= 0; i
< cmp_data
->used
; i
+= cmp_data
->data
[i
])
4689 int *data
= cmp_data
->data
+ i
;
4690 enum composition_method method
= (enum composition_method
) data
[3];
4691 Lisp_Object components
;
4693 if (method
== COMPOSITION_RELATIVE
)
4697 int len
= data
[0] - 4, j
;
4698 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
4700 for (j
= 0; j
< len
; j
++)
4701 args
[j
] = make_number (data
[4 + j
]);
4702 components
= (method
== COMPOSITION_WITH_ALTCHARS
4703 ? Fstring (len
, args
) : Fvector (len
, args
));
4705 compose_text (data
[1], data
[2], components
, Qnil
, obj
);
4707 cmp_data
= cmp_data
->next
;
4711 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4712 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4713 coding system CODING, and return the status code of code conversion
4714 (currently, this value has no meaning).
4716 How many characters (and bytes) are converted to how many
4717 characters (and bytes) are recorded in members of the structure
4720 If REPLACE is nonzero, we do various things as if the original text
4721 is deleted and a new text is inserted. See the comments in
4722 replace_range (insdel.c) to know what we are doing. */
4725 code_convert_region (from
, from_byte
, to
, to_byte
, coding
, encodep
, replace
)
4726 int from
, from_byte
, to
, to_byte
, encodep
, replace
;
4727 struct coding_system
*coding
;
4729 int len
= to
- from
, len_byte
= to_byte
- from_byte
;
4730 int require
, inserted
, inserted_byte
;
4731 int head_skip
, tail_skip
, total_skip
= 0;
4732 Lisp_Object saved_coding_symbol
;
4733 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
4735 int fake_multibyte
= 0;
4736 unsigned char *src
, *dst
;
4737 Lisp_Object deletion
;
4738 int orig_point
= PT
, orig_len
= len
;
4742 saved_coding_symbol
= Qnil
;
4744 if (from
< PT
&& PT
< to
)
4746 TEMP_SET_PT_BOTH (from
, from_byte
);
4752 int saved_from
= from
;
4754 prepare_to_modify_buffer (from
, to
, &from
);
4755 if (saved_from
!= from
)
4759 from_byte
= CHAR_TO_BYTE (from
), to_byte
= CHAR_TO_BYTE (to
);
4761 from_byte
= from
, to_byte
= to
;
4762 len_byte
= to_byte
- from_byte
;
4766 if (! encodep
&& CODING_REQUIRE_DETECTION (coding
))
4768 /* We must detect encoding of text and eol format. */
4770 if (from
< GPT
&& to
> GPT
)
4771 move_gap_both (from
, from_byte
);
4772 if (coding
->type
== coding_type_undecided
)
4774 detect_coding (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4775 if (coding
->type
== coding_type_undecided
)
4776 /* It seems that the text contains only ASCII, but we
4777 should not left it undecided because the deeper
4778 decoding routine (decode_coding) tries to detect the
4779 encodings again in vain. */
4780 coding
->type
= coding_type_emacs_mule
;
4782 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4784 saved_coding_symbol
= coding
->symbol
;
4785 detect_eol (coding
, BYTE_POS_ADDR (from_byte
), len_byte
);
4786 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
4787 coding
->eol_type
= CODING_EOL_LF
;
4788 /* We had better recover the original eol format if we
4789 encounter an inconsitent eol format while decoding. */
4790 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
4795 ? ! CODING_REQUIRE_ENCODING (coding
)
4796 : ! CODING_REQUIRE_DECODING (coding
))
4798 coding
->consumed_char
= len
;
4799 coding
->consumed
= len_byte
;
4800 coding
->produced
= len_byte
;
4803 /* See the comment of the member heading_ascii in coding.h. */
4804 && coding
->heading_ascii
< len_byte
)
4806 /* We still may have to combine byte at the head and the
4807 tail of the text in the region. */
4808 if (from
< GPT
&& GPT
< to
)
4809 move_gap_both (to
, to_byte
);
4810 len
= multibyte_chars_in_text (BYTE_POS_ADDR (from_byte
), len_byte
);
4811 adjust_after_insert (from
, from_byte
, to
, to_byte
, len
);
4812 coding
->produced_char
= len
;
4817 adjust_after_insert (from
, from_byte
, to
, to_byte
, len_byte
);
4818 coding
->produced_char
= len_byte
;
4823 /* Now we convert the text. */
4825 /* For encoding, we must process pre-write-conversion in advance. */
4827 && ! NILP (coding
->pre_write_conversion
)
4828 && SYMBOLP (coding
->pre_write_conversion
)
4829 && ! NILP (Ffboundp (coding
->pre_write_conversion
)))
4831 /* The function in pre-write-conversion may put a new text in a
4833 struct buffer
*prev
= current_buffer
;
4835 int count
= specpdl_ptr
- specpdl
;
4837 record_unwind_protect (code_convert_region_unwind
, Qnil
);
4838 /* We should not call any more pre-write/post-read-conversion
4839 functions while this pre-write-conversion is running. */
4840 inhibit_pre_post_conversion
= 1;
4841 call2 (coding
->pre_write_conversion
,
4842 make_number (from
), make_number (to
));
4843 inhibit_pre_post_conversion
= 0;
4844 /* Discard the unwind protect. */
4847 if (current_buffer
!= prev
)
4850 new = Fcurrent_buffer ();
4851 set_buffer_internal_1 (prev
);
4852 del_range_2 (from
, from_byte
, to
, to_byte
, 0);
4853 TEMP_SET_PT_BOTH (from
, from_byte
);
4854 insert_from_buffer (XBUFFER (new), 1, len
, 0);
4856 if (orig_point
>= to
)
4857 orig_point
+= len
- orig_len
;
4858 else if (orig_point
> from
)
4862 from_byte
= multibyte
? CHAR_TO_BYTE (from
) : from_byte
;
4863 to_byte
= multibyte
? CHAR_TO_BYTE (to
) : to
;
4864 len_byte
= to_byte
- from_byte
;
4865 TEMP_SET_PT_BOTH (from
, from_byte
);
4870 deletion
= make_buffer_string_both (from
, from_byte
, to
, to_byte
, 1);
4872 if (coding
->composing
!= COMPOSITION_DISABLED
)
4875 coding_save_composition (coding
, from
, to
, Fcurrent_buffer ());
4877 coding_allocate_composition_data (coding
, from
);
4880 /* For conversion by CCL program and for encoding with composition
4881 handling, we can't skip any character because we may convert or
4882 compose even ASCII characters. */
4883 if (coding
->type
!= coding_type_ccl
4884 && (!encodep
|| coding
->cmp_data
== NULL
))
4886 /* Try to skip the heading and tailing ASCIIs. */
4887 int from_byte_orig
= from_byte
, to_byte_orig
= to_byte
;
4889 if (from
< GPT
&& GPT
< to
)
4890 move_gap_both (from
, from_byte
);
4891 SHRINK_CONVERSION_REGION (&from_byte
, &to_byte
, coding
, NULL
, encodep
);
4892 if (from_byte
== to_byte
4893 && (encodep
|| NILP (coding
->post_read_conversion
))
4894 && ! CODING_REQUIRE_FLUSHING (coding
))
4896 coding
->produced
= len_byte
;
4897 coding
->produced_char
= multibyte
? len
: len_byte
;
4899 /* We must record and adjust for this new text now. */
4900 adjust_after_insert (from
, from_byte_orig
, to
, to_byte_orig
, len
);
4904 head_skip
= from_byte
- from_byte_orig
;
4905 tail_skip
= to_byte_orig
- to_byte
;
4906 total_skip
= head_skip
+ tail_skip
;
4909 len
-= total_skip
; len_byte
-= total_skip
;
4911 if (coding
->cmp_data
)
4912 coding
->cmp_data
->char_offset
= from
;
4915 /* The code conversion routine can not preserve text properties for
4916 now. So, we must remove all text properties in the region.
4917 Here, we must suppress all modification hooks. */
4920 int saved_inhibit_modification_hooks
= inhibit_modification_hooks
;
4921 inhibit_modification_hooks
= 1;
4922 Fset_text_properties (make_number (from
), make_number (to
), Qnil
, Qnil
);
4923 inhibit_modification_hooks
= saved_inhibit_modification_hooks
;
4926 /* For converion, we must put the gap before the text in addition to
4927 making the gap larger for efficient decoding. The required gap
4928 size starts from 2000 which is the magic number used in make_gap.
4929 But, after one batch of conversion, it will be incremented if we
4930 find that it is not enough . */
4933 if (GAP_SIZE
< require
)
4934 make_gap (require
- GAP_SIZE
);
4935 move_gap_both (from
, from_byte
);
4937 inserted
= inserted_byte
= 0;
4939 GAP_SIZE
+= len_byte
;
4942 ZV_BYTE
-= len_byte
;
4945 if (GPT
- BEG
< BEG_UNCHANGED
)
4946 BEG_UNCHANGED
= GPT
- BEG
;
4947 if (Z
- GPT
< END_UNCHANGED
)
4948 END_UNCHANGED
= Z
- GPT
;
4954 /* The buffer memory is now:
4955 +--------+converted-text+---------+-------original-text------+---+
4956 |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4957 |<------------------- GAP_SIZE -------------------->| */
4958 src
= GAP_END_ADDR
- len_byte
;
4959 dst
= GPT_ADDR
+ inserted_byte
;
4962 result
= encode_coding (coding
, src
, dst
, len_byte
, 0);
4964 result
= decode_coding (coding
, src
, dst
, len_byte
, 0);
4966 /* The buffer memory is now:
4967 +--------+-------converted-text--------+--+---original-text--+---+
4968 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4969 |<------------------- GAP_SIZE -------------------->| */
4971 if (coding
->fake_multibyte
)
4974 if (!encodep
&& !multibyte
)
4975 coding
->produced_char
= coding
->produced
;
4976 inserted
+= coding
->produced_char
;
4977 inserted_byte
+= coding
->produced
;
4978 len_byte
-= coding
->consumed
;
4980 if (result
== CODING_FINISH_INSUFFICIENT_CMP
)
4982 coding_allocate_composition_data (coding
, from
+ inserted
);
4986 src
+= coding
->consumed
;
4987 dst
+= coding
->produced
;
4989 if (result
== CODING_FINISH_NORMAL
)
4994 if (! encodep
&& result
== CODING_FINISH_INCONSISTENT_EOL
)
4996 unsigned char *pend
= dst
, *p
= pend
- inserted_byte
;
4997 Lisp_Object eol_type
;
4999 /* Encode LFs back to the original eol format (CR or CRLF). */
5000 if (coding
->eol_type
== CODING_EOL_CR
)
5002 while (p
< pend
) if (*p
++ == '\n') p
[-1] = '\r';
5008 while (p
< pend
) if (*p
++ == '\n') count
++;
5009 if (src
- dst
< count
)
5011 /* We don't have sufficient room for encoding LFs
5012 back to CRLF. We must record converted and
5013 not-yet-converted text back to the buffer
5014 content, enlarge the gap, then record them out of
5015 the buffer contents again. */
5016 int add
= len_byte
+ inserted_byte
;
5019 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
5020 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
5021 make_gap (count
- GAP_SIZE
);
5023 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
5024 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
5025 /* Don't forget to update SRC, DST, and PEND. */
5026 src
= GAP_END_ADDR
- len_byte
;
5027 dst
= GPT_ADDR
+ inserted_byte
;
5031 inserted_byte
+= count
;
5032 coding
->produced
+= count
;
5033 p
= dst
= pend
+ count
;
5037 if (*p
== '\n') count
--, *--p
= '\r';
5041 /* Suppress eol-format conversion in the further conversion. */
5042 coding
->eol_type
= CODING_EOL_LF
;
5044 /* Set the coding system symbol to that for Unix-like EOL. */
5045 eol_type
= Fget (saved_coding_symbol
, Qeol_type
);
5046 if (VECTORP (eol_type
)
5047 && XVECTOR (eol_type
)->size
== 3
5048 && SYMBOLP (XVECTOR (eol_type
)->contents
[CODING_EOL_LF
]))
5049 coding
->symbol
= XVECTOR (eol_type
)->contents
[CODING_EOL_LF
];
5051 coding
->symbol
= saved_coding_symbol
;
5057 if (coding
->type
!= coding_type_ccl
5058 || coding
->mode
& CODING_MODE_LAST_BLOCK
)
5060 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
5063 if (result
== CODING_FINISH_INSUFFICIENT_SRC
)
5065 /* The source text ends in invalid codes. Let's just
5066 make them valid buffer contents, and finish conversion. */
5067 inserted
+= len_byte
;
5068 inserted_byte
+= len_byte
;
5074 if (result
== CODING_FINISH_INTERRUPT
)
5076 /* The conversion procedure was interrupted by a user. */
5080 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
5081 if (coding
->consumed
< 1)
5083 /* It's quite strange to require more memory without
5084 consuming any bytes. Perhaps CCL program bug. */
5090 /* We have just done the first batch of conversion which was
5091 stoped because of insufficient gap. Let's reconsider the
5092 required gap size (i.e. SRT - DST) now.
5094 We have converted ORIG bytes (== coding->consumed) into
5095 NEW bytes (coding->produced). To convert the remaining
5096 LEN bytes, we may need REQUIRE bytes of gap, where:
5097 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5098 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5099 Here, we are sure that NEW >= ORIG. */
5100 float ratio
= coding
->produced
- coding
->consumed
;
5101 ratio
/= coding
->consumed
;
5102 require
= len_byte
* ratio
;
5105 if ((src
- dst
) < (require
+ 2000))
5107 /* See the comment above the previous call of make_gap. */
5108 int add
= len_byte
+ inserted_byte
;
5111 ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
5112 GPT
+= inserted_byte
; GPT_BYTE
+= inserted_byte
;
5113 make_gap (require
+ 2000);
5115 ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
5116 GPT
-= inserted_byte
; GPT_BYTE
-= inserted_byte
;
5119 if (src
- dst
> 0) *dst
= 0; /* Put an anchor. */
5124 || (to
- from
) != (to_byte
- from_byte
)))
5125 inserted
= multibyte_chars_in_text (GPT_ADDR
, inserted_byte
);
5127 /* If we have shrinked the conversion area, adjust it now. */
5131 safe_bcopy (GAP_END_ADDR
, GPT_ADDR
+ inserted_byte
, tail_skip
);
5132 inserted
+= total_skip
; inserted_byte
+= total_skip
;
5133 GAP_SIZE
+= total_skip
;
5134 GPT
-= head_skip
; GPT_BYTE
-= head_skip
;
5135 ZV
-= total_skip
; ZV_BYTE
-= total_skip
;
5136 Z
-= total_skip
; Z_BYTE
-= total_skip
;
5137 from
-= head_skip
; from_byte
-= head_skip
;
5138 to
+= tail_skip
; to_byte
+= tail_skip
;
5142 adjust_after_replace (from
, from_byte
, deletion
, inserted
, inserted_byte
);
5143 inserted
= Z
- prev_Z
;
5145 if (!encodep
&& coding
->cmp_data
&& coding
->cmp_data
->used
)
5146 coding_restore_composition (coding
, Fcurrent_buffer ());
5147 coding_free_composition_data (coding
);
5149 if (! encodep
&& ! NILP (coding
->post_read_conversion
))
5152 int count
= specpdl_ptr
- specpdl
;
5155 TEMP_SET_PT_BOTH (from
, from_byte
);
5157 record_unwind_protect (code_convert_region_unwind
, Qnil
);
5158 /* We should not call any more pre-write/post-read-conversion
5159 functions while this post-read-conversion is running. */
5160 inhibit_pre_post_conversion
= 1;
5161 val
= call1 (coding
->post_read_conversion
, make_number (inserted
));
5162 inhibit_pre_post_conversion
= 0;
5163 /* Discard the unwind protect. */
5165 CHECK_NUMBER (val
, 0);
5166 inserted
+= Z
- prev_Z
;
5169 if (orig_point
>= from
)
5171 if (orig_point
>= from
+ orig_len
)
5172 orig_point
+= inserted
- orig_len
;
5175 TEMP_SET_PT (orig_point
);
5180 signal_after_change (from
, to
- from
, inserted
);
5181 update_compositions (from
, from
+ inserted
, CHECK_BORDER
);
5185 coding
->consumed
= to_byte
- from_byte
;
5186 coding
->consumed_char
= to
- from
;
5187 coding
->produced
= inserted_byte
;
5188 coding
->produced_char
= inserted
;
5195 code_convert_string (str
, coding
, encodep
, nocopy
)
5197 struct coding_system
*coding
;
5198 int encodep
, nocopy
;
5202 int from
= 0, to
= XSTRING (str
)->size
;
5203 int to_byte
= STRING_BYTES (XSTRING (str
));
5204 struct gcpro gcpro1
;
5205 Lisp_Object saved_coding_symbol
;
5208 saved_coding_symbol
= Qnil
;
5209 if ((encodep
&& !NILP (coding
->pre_write_conversion
)
5210 || !encodep
&& !NILP (coding
->post_read_conversion
)))
5212 /* Since we have to call Lisp functions which assume target text
5213 is in a buffer, after setting a temporary buffer, call
5214 code_convert_region. */
5215 int count
= specpdl_ptr
- specpdl
;
5216 struct buffer
*prev
= current_buffer
;
5217 int multibyte
= STRING_MULTIBYTE (str
);
5219 record_unwind_protect (Fset_buffer
, Fcurrent_buffer ());
5220 record_unwind_protect (code_convert_region_unwind
, Qnil
);
5221 inhibit_pre_post_conversion
= 1;
5223 temp_output_buffer_setup (" *code-converting-work*");
5224 set_buffer_internal (XBUFFER (Vstandard_output
));
5225 /* We must insert the contents of STR as is without
5226 unibyte<->multibyte conversion. For that, we adjust the
5227 multibyteness of the working buffer to that of STR. */
5228 Ferase_buffer (); /* for safety */
5229 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
5230 insert_from_string (str
, 0, 0, to
, to_byte
, 0);
5232 code_convert_region (BEGV
, BEGV_BYTE
, ZV
, ZV_BYTE
, coding
, encodep
, 1);
5233 /* Make a unibyte string if we are encoding, otherwise make a
5234 multibyte string. */
5235 Fset_buffer_multibyte (encodep
? Qnil
: Qt
);
5236 str
= make_buffer_string (BEGV
, ZV
, 0);
5237 return unbind_to (count
, str
);
5240 if (! encodep
&& CODING_REQUIRE_DETECTION (coding
))
5242 /* See the comments in code_convert_region. */
5243 if (coding
->type
== coding_type_undecided
)
5245 detect_coding (coding
, XSTRING (str
)->data
, to_byte
);
5246 if (coding
->type
== coding_type_undecided
)
5247 coding
->type
= coding_type_emacs_mule
;
5249 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
5251 saved_coding_symbol
= coding
->symbol
;
5252 detect_eol (coding
, XSTRING (str
)->data
, to_byte
);
5253 if (coding
->eol_type
== CODING_EOL_UNDECIDED
)
5254 coding
->eol_type
= CODING_EOL_LF
;
5255 /* We had better recover the original eol format if we
5256 encounter an inconsitent eol format while decoding. */
5257 coding
->mode
|= CODING_MODE_INHIBIT_INCONSISTENT_EOL
;
5262 ? ! CODING_REQUIRE_ENCODING (coding
)
5263 : ! CODING_REQUIRE_DECODING (coding
))
5264 return (nocopy
? str
: Fcopy_sequence (str
));
5266 if (coding
->composing
!= COMPOSITION_DISABLED
)
5269 coding_save_composition (coding
, from
, to
, str
);
5271 coding_allocate_composition_data (coding
, from
);
5274 /* For conversion by CCL program and for encoding with composition
5275 handling, we can't skip any character because we may convert or
5276 compose even ASCII characters. */
5277 if (coding
->type
!= coding_type_ccl
5278 && (!encodep
|| coding
->cmp_data
== NULL
))
5280 /* Try to skip the heading and tailing ASCIIs. */
5281 int from_orig
= from
;
5283 SHRINK_CONVERSION_REGION (&from
, &to_byte
, coding
, XSTRING (str
)->data
,
5285 if (from
== to_byte
)
5286 return (nocopy
? str
: Fcopy_sequence (str
));
5288 if (coding
->cmp_data
)
5289 coding
->cmp_data
->char_offset
= from
;
5293 len
= encoding_buffer_size (coding
, to_byte
- from
);
5295 len
= decoding_buffer_size (coding
, to_byte
- from
);
5296 len
+= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5298 buf
= get_conversion_buffer (len
);
5302 bcopy (XSTRING (str
)->data
, buf
, from
);
5304 ? encode_coding (coding
, XSTRING (str
)->data
+ from
,
5305 buf
+ from
, to_byte
- from
, len
)
5306 : decode_coding (coding
, XSTRING (str
)->data
+ from
,
5307 buf
+ from
, to_byte
- from
, len
));
5308 if (! encodep
&& result
== CODING_FINISH_INCONSISTENT_EOL
)
5310 /* We simply try to decode the whole string again but without
5311 eol-conversion this time. */
5312 coding
->eol_type
= CODING_EOL_LF
;
5313 coding
->symbol
= saved_coding_symbol
;
5314 coding_free_composition_data (coding
);
5315 return code_convert_string (str
, coding
, encodep
, nocopy
);
5318 bcopy (XSTRING (str
)->data
+ to_byte
, buf
+ from
+ coding
->produced
,
5319 STRING_BYTES (XSTRING (str
)) - to_byte
);
5321 len
= from
+ STRING_BYTES (XSTRING (str
)) - to_byte
;
5323 str
= make_unibyte_string (buf
, len
+ coding
->produced
);
5326 int chars
= (coding
->fake_multibyte
5327 ? multibyte_chars_in_text (buf
+ from
, coding
->produced
)
5328 : coding
->produced_char
);
5329 str
= make_multibyte_string (buf
, len
+ chars
, len
+ coding
->produced
);
5332 if (!encodep
&& coding
->cmp_data
&& coding
->cmp_data
->used
)
5333 coding_restore_composition (coding
, str
);
5335 coding_free_composition_data (coding
);
5341 /*** 8. Emacs Lisp library functions ***/
5343 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
5344 "Return t if OBJECT is nil or a coding-system.\n\
5345 See the documentation of `make-coding-system' for information\n\
5346 about coding-system objects.")
5354 /* Get coding-spec vector for OBJ. */
5355 obj
= Fget (obj
, Qcoding_system
);
5356 return ((VECTORP (obj
) && XVECTOR (obj
)->size
== 5)
5360 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
5361 Sread_non_nil_coding_system
, 1, 1, 0,
5362 "Read a coding system from the minibuffer, prompting with string PROMPT.")
5369 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5370 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
5372 while (XSTRING (val
)->size
== 0);
5373 return (Fintern (val
, Qnil
));
5376 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
5377 "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5378 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5379 (prompt
, default_coding_system
)
5380 Lisp_Object prompt
, default_coding_system
;
5383 if (SYMBOLP (default_coding_system
))
5384 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
5385 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
5386 Qt
, Qnil
, Qcoding_system_history
,
5387 default_coding_system
, Qnil
);
5388 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
5391 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
5393 "Check validity of CODING-SYSTEM.\n\
5394 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5395 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5396 The value of property should be a vector of length 5.")
5398 Lisp_Object coding_system
;
5400 CHECK_SYMBOL (coding_system
, 0);
5401 if (!NILP (Fcoding_system_p (coding_system
)))
5402 return coding_system
;
5404 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
5408 detect_coding_system (src
, src_bytes
, highest
)
5410 int src_bytes
, highest
;
5412 int coding_mask
, eol_type
;
5413 Lisp_Object val
, tmp
;
5416 coding_mask
= detect_coding_mask (src
, src_bytes
, NULL
, &dummy
);
5417 eol_type
= detect_eol_type (src
, src_bytes
, &dummy
);
5418 if (eol_type
== CODING_EOL_INCONSISTENT
)
5419 eol_type
= CODING_EOL_UNDECIDED
;
5424 if (eol_type
!= CODING_EOL_UNDECIDED
)
5427 val2
= Fget (Qundecided
, Qeol_type
);
5429 val
= XVECTOR (val2
)->contents
[eol_type
];
5431 return (highest
? val
: Fcons (val
, Qnil
));
5434 /* At first, gather possible coding systems in VAL. */
5436 for (tmp
= Vcoding_category_list
; CONSP (tmp
); tmp
= XCDR (tmp
))
5438 Lisp_Object category_val
, category_index
;
5440 category_index
= Fget (XCAR (tmp
), Qcoding_category_index
);
5441 category_val
= Fsymbol_value (XCAR (tmp
));
5442 if (!NILP (category_val
)
5443 && NATNUMP (category_index
)
5444 && (coding_mask
& (1 << XFASTINT (category_index
))))
5446 val
= Fcons (category_val
, val
);
5452 val
= Fnreverse (val
);
5454 /* Then, replace the elements with subsidiary coding systems. */
5455 for (tmp
= val
; CONSP (tmp
); tmp
= XCDR (tmp
))
5457 if (eol_type
!= CODING_EOL_UNDECIDED
5458 && eol_type
!= CODING_EOL_INCONSISTENT
)
5461 eol
= Fget (XCAR (tmp
), Qeol_type
);
5463 XCAR (tmp
) = XVECTOR (eol
)->contents
[eol_type
];
5466 return (highest
? XCAR (val
) : val
);
5469 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
5471 "Detect coding system of the text in the region between START and END.\n\
5472 Return a list of possible coding systems ordered by priority.\n\
5474 If only ASCII characters are found, it returns a list of single element\n\
5475 `undecided' or its subsidiary coding system according to a detected\n\
5476 end-of-line format.\n\
5478 If optional argument HIGHEST is non-nil, return the coding system of\n\
5480 (start
, end
, highest
)
5481 Lisp_Object start
, end
, highest
;
5484 int from_byte
, to_byte
;
5486 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5487 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5489 validate_region (&start
, &end
);
5490 from
= XINT (start
), to
= XINT (end
);
5491 from_byte
= CHAR_TO_BYTE (from
);
5492 to_byte
= CHAR_TO_BYTE (to
);
5494 if (from
< GPT
&& to
>= GPT
)
5495 move_gap_both (to
, to_byte
);
5497 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
5498 to_byte
- from_byte
,
5502 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
5504 "Detect coding system of the text in STRING.\n\
5505 Return a list of possible coding systems ordered by priority.\n\
5507 If only ASCII characters are found, it returns a list of single element\n\
5508 `undecided' or its subsidiary coding system according to a detected\n\
5509 end-of-line format.\n\
5511 If optional argument HIGHEST is non-nil, return the coding system of\n\
5514 Lisp_Object string
, highest
;
5516 CHECK_STRING (string
, 0);
5518 return detect_coding_system (XSTRING (string
)->data
,
5519 STRING_BYTES (XSTRING (string
)),
5524 code_convert_region1 (start
, end
, coding_system
, encodep
)
5525 Lisp_Object start
, end
, coding_system
;
5528 struct coding_system coding
;
5531 CHECK_NUMBER_COERCE_MARKER (start
, 0);
5532 CHECK_NUMBER_COERCE_MARKER (end
, 1);
5533 CHECK_SYMBOL (coding_system
, 2);
5535 validate_region (&start
, &end
);
5536 from
= XFASTINT (start
);
5537 to
= XFASTINT (end
);
5539 if (NILP (coding_system
))
5540 return make_number (to
- from
);
5542 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5543 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5545 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5546 code_convert_region (from
, CHAR_TO_BYTE (from
), to
, CHAR_TO_BYTE (to
),
5547 &coding
, encodep
, 1);
5548 Vlast_coding_system_used
= coding
.symbol
;
5549 return make_number (coding
.produced_char
);
5552 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
5553 3, 3, "r\nzCoding system: ",
5554 "Decode the current region by specified coding system.\n\
5555 When called from a program, takes three arguments:\n\
5556 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5557 This function sets `last-coding-system-used' to the precise coding system\n\
5558 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5559 not fully specified.)\n\
5560 It returns the length of the decoded text.")
5561 (start
, end
, coding_system
)
5562 Lisp_Object start
, end
, coding_system
;
5564 return code_convert_region1 (start
, end
, coding_system
, 0);
5567 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
5568 3, 3, "r\nzCoding system: ",
5569 "Encode the current region by specified coding system.\n\
5570 When called from a program, takes three arguments:\n\
5571 START, END, and CODING-SYSTEM. START and END are buffer positions.\n\
5572 This function sets `last-coding-system-used' to the precise coding system\n\
5573 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5574 not fully specified.)\n\
5575 It returns the length of the encoded text.")
5576 (start
, end
, coding_system
)
5577 Lisp_Object start
, end
, coding_system
;
5579 return code_convert_region1 (start
, end
, coding_system
, 1);
5583 code_convert_string1 (string
, coding_system
, nocopy
, encodep
)
5584 Lisp_Object string
, coding_system
, nocopy
;
5587 struct coding_system coding
;
5589 CHECK_STRING (string
, 0);
5590 CHECK_SYMBOL (coding_system
, 1);
5592 if (NILP (coding_system
))
5593 return (NILP (nocopy
) ? Fcopy_sequence (string
) : string
);
5595 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5596 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5598 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5599 string
= code_convert_string (string
, &coding
, encodep
, !NILP (nocopy
));
5600 Vlast_coding_system_used
= coding
.symbol
;
5605 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
5607 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5608 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5609 if the decoding operation is trivial.\n\
5610 This function sets `last-coding-system-used' to the precise coding system\n\
5611 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5612 not fully specified.)")
5613 (string
, coding_system
, nocopy
)
5614 Lisp_Object string
, coding_system
, nocopy
;
5616 return code_convert_string1 (string
, coding_system
, nocopy
, 0);
5619 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
5621 "Encode STRING to CODING-SYSTEM, and return the result.\n\
5622 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5623 if the encoding operation is trivial.\n\
5624 This function sets `last-coding-system-used' to the precise coding system\n\
5625 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5626 not fully specified.)")
5627 (string
, coding_system
, nocopy
)
5628 Lisp_Object string
, coding_system
, nocopy
;
5630 return code_convert_string1 (string
, coding_system
, nocopy
, 1);
5633 /* Encode or decode STRING according to CODING_SYSTEM.
5634 Do not set Vlast_coding_system_used.
5636 This function is called only from macros DECODE_FILE and
5637 ENCODE_FILE, thus we ignore character composition. */
5640 code_convert_string_norecord (string
, coding_system
, encodep
)
5641 Lisp_Object string
, coding_system
;
5644 struct coding_system coding
;
5646 CHECK_STRING (string
, 0);
5647 CHECK_SYMBOL (coding_system
, 1);
5649 if (NILP (coding_system
))
5652 if (setup_coding_system (Fcheck_coding_system (coding_system
), &coding
) < 0)
5653 error ("Invalid coding system: %s", XSYMBOL (coding_system
)->name
->data
);
5655 coding
.composing
= COMPOSITION_DISABLED
;
5656 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
5657 return code_convert_string (string
, &coding
, encodep
, Qt
);
5660 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
5661 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5662 Return the corresponding character.")
5666 unsigned char c1
, c2
, s1
, s2
;
5669 CHECK_NUMBER (code
, 0);
5670 s1
= (XFASTINT (code
)) >> 8, s2
= (XFASTINT (code
)) & 0xFF;
5674 XSETFASTINT (val
, s2
);
5675 else if (s2
>= 0xA0 || s2
<= 0xDF)
5677 MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201
, s2
, 0));
5679 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5683 if ((s1
< 0x80 || s1
> 0x9F && s1
< 0xE0 || s1
> 0xEF)
5684 || (s2
< 0x40 || s2
== 0x7F || s2
> 0xFC))
5685 error ("Invalid Shift JIS code: %x", XFASTINT (code
));
5686 DECODE_SJIS (s1
, s2
, c1
, c2
);
5687 XSETFASTINT (val
, MAKE_NON_ASCII_CHAR (charset_jisx0208
, c1
, c2
));
5692 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
5693 "Encode a Japanese character CHAR to shift_jis encoding.\n\
5694 Return the corresponding code in SJIS.")
5698 int charset
, c1
, c2
, s1
, s2
;
5701 CHECK_NUMBER (ch
, 0);
5702 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5703 if (charset
== CHARSET_ASCII
)
5707 else if (charset
== charset_jisx0208
5708 && c1
> 0x20 && c1
< 0x7F && c2
> 0x20 && c2
< 0x7F)
5710 ENCODE_SJIS (c1
, c2
, s1
, s2
);
5711 XSETFASTINT (val
, (s1
<< 8) | s2
);
5713 else if (charset
== charset_katakana_jisx0201
5714 && c1
> 0x20 && c2
< 0xE0)
5716 XSETFASTINT (val
, c1
| 0x80);
5719 error ("Can't encode to shift_jis: %d", XFASTINT (ch
));
5723 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
5724 "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5725 Return the corresponding character.")
5730 unsigned char b1
, b2
, c1
, c2
;
5733 CHECK_NUMBER (code
, 0);
5734 b1
= (XFASTINT (code
)) >> 8, b2
= (XFASTINT (code
)) & 0xFF;
5738 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5743 if ((b1
< 0xA1 || b1
> 0xFE)
5744 || (b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE))
5745 error ("Invalid BIG5 code: %x", XFASTINT (code
));
5746 DECODE_BIG5 (b1
, b2
, charset
, c1
, c2
);
5747 XSETFASTINT (val
, MAKE_NON_ASCII_CHAR (charset
, c1
, c2
));
5752 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
5753 "Encode the Big5 character CHAR to BIG5 coding system.\n\
5754 Return the corresponding character code in Big5.")
5758 int charset
, c1
, c2
, b1
, b2
;
5761 CHECK_NUMBER (ch
, 0);
5762 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
5763 if (charset
== CHARSET_ASCII
)
5767 else if ((charset
== charset_big5_1
5768 && (XFASTINT (ch
) >= 0x250a1 && XFASTINT (ch
) <= 0x271ec))
5769 || (charset
== charset_big5_2
5770 && XFASTINT (ch
) >= 0x290a1 && XFASTINT (ch
) <= 0x2bdb2))
5772 ENCODE_BIG5 (charset
, c1
, c2
, b1
, b2
);
5773 XSETFASTINT (val
, (b1
<< 8) | b2
);
5776 error ("Can't encode to Big5: %d", XFASTINT (ch
));
5780 DEFUN ("set-terminal-coding-system-internal",
5781 Fset_terminal_coding_system_internal
,
5782 Sset_terminal_coding_system_internal
, 1, 1, 0, "")
5784 Lisp_Object coding_system
;
5786 CHECK_SYMBOL (coding_system
, 0);
5787 setup_coding_system (Fcheck_coding_system (coding_system
), &terminal_coding
);
5788 /* We had better not send unsafe characters to terminal. */
5789 terminal_coding
.flags
|= CODING_FLAG_ISO_SAFE
;
5790 /* Characer composition should be disabled. */
5791 terminal_coding
.composing
= COMPOSITION_DISABLED
;
5795 DEFUN ("set-safe-terminal-coding-system-internal",
5796 Fset_safe_terminal_coding_system_internal
,
5797 Sset_safe_terminal_coding_system_internal
, 1, 1, 0, "")
5799 Lisp_Object coding_system
;
5801 CHECK_SYMBOL (coding_system
, 0);
5802 setup_coding_system (Fcheck_coding_system (coding_system
),
5803 &safe_terminal_coding
);
5804 /* Characer composition should be disabled. */
5805 safe_terminal_coding
.composing
= COMPOSITION_DISABLED
;
5809 DEFUN ("terminal-coding-system",
5810 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
5811 "Return coding system specified for terminal output.")
5814 return terminal_coding
.symbol
;
5817 DEFUN ("set-keyboard-coding-system-internal",
5818 Fset_keyboard_coding_system_internal
,
5819 Sset_keyboard_coding_system_internal
, 1, 1, 0, "")
5821 Lisp_Object coding_system
;
5823 CHECK_SYMBOL (coding_system
, 0);
5824 setup_coding_system (Fcheck_coding_system (coding_system
), &keyboard_coding
);
5825 /* Characer composition should be disabled. */
5826 keyboard_coding
.composing
= COMPOSITION_DISABLED
;
5830 DEFUN ("keyboard-coding-system",
5831 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
5832 "Return coding system specified for decoding keyboard input.")
5835 return keyboard_coding
.symbol
;
5839 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
5840 Sfind_operation_coding_system
, 1, MANY
, 0,
5841 "Choose a coding system for an operation based on the target name.\n\
5842 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5843 DECODING-SYSTEM is the coding system to use for decoding\n\
5844 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5845 for encoding (in case OPERATION does encoding).\n\
5847 The first argument OPERATION specifies an I/O primitive:\n\
5848 For file I/O, `insert-file-contents' or `write-region'.\n\
5849 For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5850 For network I/O, `open-network-stream'.\n\
5852 The remaining arguments should be the same arguments that were passed\n\
5853 to the primitive. Depending on which primitive, one of those arguments\n\
5854 is selected as the TARGET. For example, if OPERATION does file I/O,\n\
5855 whichever argument specifies the file name is TARGET.\n\
5857 TARGET has a meaning which depends on OPERATION:\n\
5858 For file I/O, TARGET is a file name.\n\
5859 For process I/O, TARGET is a process name.\n\
5860 For network I/O, TARGET is a service name or a port number\n\
5862 This function looks up what specified for TARGET in,\n\
5863 `file-coding-system-alist', `process-coding-system-alist',\n\
5864 or `network-coding-system-alist' depending on OPERATION.\n\
5865 They may specify a coding system, a cons of coding systems,\n\
5866 or a function symbol to call.\n\
5867 In the last case, we call the function with one argument,\n\
5868 which is a list of all the arguments given to this function.")
5873 Lisp_Object operation
, target_idx
, target
, val
;
5874 register Lisp_Object chain
;
5877 error ("Too few arguments");
5878 operation
= args
[0];
5879 if (!SYMBOLP (operation
)
5880 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
5881 error ("Invalid first arguement");
5882 if (nargs
< 1 + XINT (target_idx
))
5883 error ("Too few arguments for operation: %s",
5884 XSYMBOL (operation
)->name
->data
);
5885 target
= args
[XINT (target_idx
) + 1];
5886 if (!(STRINGP (target
)
5887 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
5888 error ("Invalid %dth argument", XINT (target_idx
) + 1);
5890 chain
= ((EQ (operation
, Qinsert_file_contents
)
5891 || EQ (operation
, Qwrite_region
))
5892 ? Vfile_coding_system_alist
5893 : (EQ (operation
, Qopen_network_stream
)
5894 ? Vnetwork_coding_system_alist
5895 : Vprocess_coding_system_alist
));
5899 for (; CONSP (chain
); chain
= XCDR (chain
))
5905 && ((STRINGP (target
)
5906 && STRINGP (XCAR (elt
))
5907 && fast_string_match (XCAR (elt
), target
) >= 0)
5908 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
5911 /* Here, if VAL is both a valid coding system and a valid
5912 function symbol, we return VAL as a coding system. */
5915 if (! SYMBOLP (val
))
5917 if (! NILP (Fcoding_system_p (val
)))
5918 return Fcons (val
, val
);
5919 if (! NILP (Ffboundp (val
)))
5921 val
= call1 (val
, Flist (nargs
, args
));
5924 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
5925 return Fcons (val
, val
);
5933 DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal
,
5934 Supdate_coding_systems_internal
, 0, 0, 0,
5935 "Update internal database for ISO2022 and CCL based coding systems.\n\
5936 When values of any coding categories are changed, you must\n\
5937 call this function")
5942 for (i
= CODING_CATEGORY_IDX_EMACS_MULE
; i
< CODING_CATEGORY_IDX_MAX
; i
++)
5946 val
= XSYMBOL (XVECTOR (Vcoding_category_table
)->contents
[i
])->value
;
5949 if (! coding_system_table
[i
])
5950 coding_system_table
[i
] = ((struct coding_system
*)
5951 xmalloc (sizeof (struct coding_system
)));
5952 setup_coding_system (val
, coding_system_table
[i
]);
5954 else if (coding_system_table
[i
])
5956 xfree (coding_system_table
[i
]);
5957 coding_system_table
[i
] = NULL
;
5964 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal
,
5965 Sset_coding_priority_internal
, 0, 0, 0,
5966 "Update internal database for the current value of `coding-category-list'.\n\
5967 This function is internal use only.")
5973 val
= Vcoding_category_list
;
5975 while (CONSP (val
) && i
< CODING_CATEGORY_IDX_MAX
)
5977 if (! SYMBOLP (XCAR (val
)))
5979 idx
= XFASTINT (Fget (XCAR (val
), Qcoding_category_index
));
5980 if (idx
>= CODING_CATEGORY_IDX_MAX
)
5982 coding_priorities
[i
++] = (1 << idx
);
5985 /* If coding-category-list is valid and contains all coding
5986 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
5987 the following code saves Emacs from crashing. */
5988 while (i
< CODING_CATEGORY_IDX_MAX
)
5989 coding_priorities
[i
++] = CODING_CATEGORY_MASK_RAW_TEXT
;
5997 /*** 9. Post-amble ***/
6002 conversion_buffer
= (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE
);
6010 /* Emacs' internal format specific initialize routine. */
6011 for (i
= 0; i
<= 0x20; i
++)
6012 emacs_code_class
[i
] = EMACS_control_code
;
6013 emacs_code_class
[0x0A] = EMACS_linefeed_code
;
6014 emacs_code_class
[0x0D] = EMACS_carriage_return_code
;
6015 for (i
= 0x21 ; i
< 0x7F; i
++)
6016 emacs_code_class
[i
] = EMACS_ascii_code
;
6017 emacs_code_class
[0x7F] = EMACS_control_code
;
6018 for (i
= 0x80; i
< 0xFF; i
++)
6019 emacs_code_class
[i
] = EMACS_invalid_code
;
6020 emacs_code_class
[LEADING_CODE_PRIVATE_11
] = EMACS_leading_code_3
;
6021 emacs_code_class
[LEADING_CODE_PRIVATE_12
] = EMACS_leading_code_3
;
6022 emacs_code_class
[LEADING_CODE_PRIVATE_21
] = EMACS_leading_code_4
;
6023 emacs_code_class
[LEADING_CODE_PRIVATE_22
] = EMACS_leading_code_4
;
6025 /* ISO2022 specific initialize routine. */
6026 for (i
= 0; i
< 0x20; i
++)
6027 iso_code_class
[i
] = ISO_control_code
;
6028 for (i
= 0x21; i
< 0x7F; i
++)
6029 iso_code_class
[i
] = ISO_graphic_plane_0
;
6030 for (i
= 0x80; i
< 0xA0; i
++)
6031 iso_code_class
[i
] = ISO_control_code
;
6032 for (i
= 0xA1; i
< 0xFF; i
++)
6033 iso_code_class
[i
] = ISO_graphic_plane_1
;
6034 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
6035 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
6036 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
6037 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
6038 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
6039 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
6040 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
6041 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
6042 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
6043 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
6045 conversion_buffer_size
= MINIMUM_CONVERSION_BUFFER_SIZE
;
6047 setup_coding_system (Qnil
, &keyboard_coding
);
6048 setup_coding_system (Qnil
, &terminal_coding
);
6049 setup_coding_system (Qnil
, &safe_terminal_coding
);
6050 setup_coding_system (Qnil
, &default_buffer_file_coding
);
6052 bzero (coding_system_table
, sizeof coding_system_table
);
6054 bzero (ascii_skip_code
, sizeof ascii_skip_code
);
6055 for (i
= 0; i
< 128; i
++)
6056 ascii_skip_code
[i
] = 1;
6058 #if defined (MSDOS) || defined (WINDOWSNT)
6059 system_eol_type
= CODING_EOL_CRLF
;
6061 system_eol_type
= CODING_EOL_LF
;
6064 inhibit_pre_post_conversion
= 0;
6072 Qtarget_idx
= intern ("target-idx");
6073 staticpro (&Qtarget_idx
);
6075 Qcoding_system_history
= intern ("coding-system-history");
6076 staticpro (&Qcoding_system_history
);
6077 Fset (Qcoding_system_history
, Qnil
);
6079 /* Target FILENAME is the first argument. */
6080 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
6081 /* Target FILENAME is the third argument. */
6082 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
6084 Qcall_process
= intern ("call-process");
6085 staticpro (&Qcall_process
);
6086 /* Target PROGRAM is the first argument. */
6087 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
6089 Qcall_process_region
= intern ("call-process-region");
6090 staticpro (&Qcall_process_region
);
6091 /* Target PROGRAM is the third argument. */
6092 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
6094 Qstart_process
= intern ("start-process");
6095 staticpro (&Qstart_process
);
6096 /* Target PROGRAM is the third argument. */
6097 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
6099 Qopen_network_stream
= intern ("open-network-stream");
6100 staticpro (&Qopen_network_stream
);
6101 /* Target SERVICE is the fourth argument. */
6102 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
6104 Qcoding_system
= intern ("coding-system");
6105 staticpro (&Qcoding_system
);
6107 Qeol_type
= intern ("eol-type");
6108 staticpro (&Qeol_type
);
6110 Qbuffer_file_coding_system
= intern ("buffer-file-coding-system");
6111 staticpro (&Qbuffer_file_coding_system
);
6113 Qpost_read_conversion
= intern ("post-read-conversion");
6114 staticpro (&Qpost_read_conversion
);
6116 Qpre_write_conversion
= intern ("pre-write-conversion");
6117 staticpro (&Qpre_write_conversion
);
6119 Qno_conversion
= intern ("no-conversion");
6120 staticpro (&Qno_conversion
);
6122 Qundecided
= intern ("undecided");
6123 staticpro (&Qundecided
);
6125 Qcoding_system_p
= intern ("coding-system-p");
6126 staticpro (&Qcoding_system_p
);
6128 Qcoding_system_error
= intern ("coding-system-error");
6129 staticpro (&Qcoding_system_error
);
6131 Fput (Qcoding_system_error
, Qerror_conditions
,
6132 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
6133 Fput (Qcoding_system_error
, Qerror_message
,
6134 build_string ("Invalid coding system"));
6136 Qcoding_category
= intern ("coding-category");
6137 staticpro (&Qcoding_category
);
6138 Qcoding_category_index
= intern ("coding-category-index");
6139 staticpro (&Qcoding_category_index
);
6141 Vcoding_category_table
6142 = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX
), Qnil
);
6143 staticpro (&Vcoding_category_table
);
6146 for (i
= 0; i
< CODING_CATEGORY_IDX_MAX
; i
++)
6148 XVECTOR (Vcoding_category_table
)->contents
[i
]
6149 = intern (coding_category_name
[i
]);
6150 Fput (XVECTOR (Vcoding_category_table
)->contents
[i
],
6151 Qcoding_category_index
, make_number (i
));
6155 Qtranslation_table
= intern ("translation-table");
6156 staticpro (&Qtranslation_table
);
6157 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
6159 Qtranslation_table_id
= intern ("translation-table-id");
6160 staticpro (&Qtranslation_table_id
);
6162 Qtranslation_table_for_decode
= intern ("translation-table-for-decode");
6163 staticpro (&Qtranslation_table_for_decode
);
6165 Qtranslation_table_for_encode
= intern ("translation-table-for-encode");
6166 staticpro (&Qtranslation_table_for_encode
);
6168 Qsafe_charsets
= intern ("safe-charsets");
6169 staticpro (&Qsafe_charsets
);
6171 Qvalid_codes
= intern ("valid-codes");
6172 staticpro (&Qvalid_codes
);
6174 Qemacs_mule
= intern ("emacs-mule");
6175 staticpro (&Qemacs_mule
);
6177 Qraw_text
= intern ("raw-text");
6178 staticpro (&Qraw_text
);
6180 defsubr (&Scoding_system_p
);
6181 defsubr (&Sread_coding_system
);
6182 defsubr (&Sread_non_nil_coding_system
);
6183 defsubr (&Scheck_coding_system
);
6184 defsubr (&Sdetect_coding_region
);
6185 defsubr (&Sdetect_coding_string
);
6186 defsubr (&Sdecode_coding_region
);
6187 defsubr (&Sencode_coding_region
);
6188 defsubr (&Sdecode_coding_string
);
6189 defsubr (&Sencode_coding_string
);
6190 defsubr (&Sdecode_sjis_char
);
6191 defsubr (&Sencode_sjis_char
);
6192 defsubr (&Sdecode_big5_char
);
6193 defsubr (&Sencode_big5_char
);
6194 defsubr (&Sset_terminal_coding_system_internal
);
6195 defsubr (&Sset_safe_terminal_coding_system_internal
);
6196 defsubr (&Sterminal_coding_system
);
6197 defsubr (&Sset_keyboard_coding_system_internal
);
6198 defsubr (&Skeyboard_coding_system
);
6199 defsubr (&Sfind_operation_coding_system
);
6200 defsubr (&Supdate_coding_systems_internal
);
6201 defsubr (&Sset_coding_priority_internal
);
6203 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
6204 "List of coding systems.\n\
6206 Do not alter the value of this variable manually. This variable should be\n\
6207 updated by the functions `make-coding-system' and\n\
6208 `define-coding-system-alias'.");
6209 Vcoding_system_list
= Qnil
;
6211 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
6212 "Alist of coding system names.\n\
6213 Each element is one element list of coding system name.\n\
6214 This variable is given to `completing-read' as TABLE argument.\n\
6216 Do not alter the value of this variable manually. This variable should be\n\
6217 updated by the functions `make-coding-system' and\n\
6218 `define-coding-system-alias'.");
6219 Vcoding_system_alist
= Qnil
;
6221 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
6222 "List of coding-categories (symbols) ordered by priority.");
6226 Vcoding_category_list
= Qnil
;
6227 for (i
= CODING_CATEGORY_IDX_MAX
- 1; i
>= 0; i
--)
6228 Vcoding_category_list
6229 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
6230 Vcoding_category_list
);
6233 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
6234 "Specify the coding system for read operations.\n\
6235 It is useful to bind this variable with `let', but do not set it globally.\n\
6236 If the value is a coding system, it is used for decoding on read operation.\n\
6237 If not, an appropriate element is used from one of the coding system alists:\n\
6238 There are three such tables, `file-coding-system-alist',\n\
6239 `process-coding-system-alist', and `network-coding-system-alist'.");
6240 Vcoding_system_for_read
= Qnil
;
6242 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
6243 "Specify the coding system for write operations.\n\
6244 Programs bind this variable with `let', but you should not set it globally.\n\
6245 If the value is a coding system, it is used for encoding of output,\n\
6246 when writing it to a file and when sending it to a file or subprocess.\n\
6248 If this does not specify a coding system, an appropriate element\n\
6249 is used from one of the coding system alists:\n\
6250 There are three such tables, `file-coding-system-alist',\n\
6251 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6252 For output to files, if the above procedure does not specify a coding system,\n\
6253 the value of `buffer-file-coding-system' is used.");
6254 Vcoding_system_for_write
= Qnil
;
6256 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
6257 "Coding system used in the latest file or process I/O.");
6258 Vlast_coding_system_used
= Qnil
;
6260 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
6261 "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6262 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6264 inhibit_eol_conversion
= 0;
6266 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
6267 "Non-nil means process buffer inherits coding system of process output.\n\
6268 Bind it to t if the process output is to be treated as if it were a file\n\
6269 read from some filesystem.");
6270 inherit_process_coding_system
= 0;
6272 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
6273 "Alist to decide a coding system to use for a file I/O operation.\n\
6274 The format is ((PATTERN . VAL) ...),\n\
6275 where PATTERN is a regular expression matching a file name,\n\
6276 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6277 If VAL is a coding system, it is used for both decoding and encoding\n\
6278 the file contents.\n\
6279 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6280 and the cdr part is used for encoding.\n\
6281 If VAL is a function symbol, the function must return a coding system\n\
6282 or a cons of coding systems which are used as above.\n\
6284 See also the function `find-operation-coding-system'\n\
6285 and the variable `auto-coding-alist'.");
6286 Vfile_coding_system_alist
= Qnil
;
6288 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
6289 "Alist to decide a coding system to use for a process I/O operation.\n\
6290 The format is ((PATTERN . VAL) ...),\n\
6291 where PATTERN is a regular expression matching a program name,\n\
6292 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6293 If VAL is a coding system, it is used for both decoding what received\n\
6294 from the program and encoding what sent to the program.\n\
6295 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6296 and the cdr part is used for encoding.\n\
6297 If VAL is a function symbol, the function must return a coding system\n\
6298 or a cons of coding systems which are used as above.\n\
6300 See also the function `find-operation-coding-system'.");
6301 Vprocess_coding_system_alist
= Qnil
;
6303 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
6304 "Alist to decide a coding system to use for a network I/O operation.\n\
6305 The format is ((PATTERN . VAL) ...),\n\
6306 where PATTERN is a regular expression matching a network service name\n\
6307 or is a port number to connect to,\n\
6308 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6309 If VAL is a coding system, it is used for both decoding what received\n\
6310 from the network stream and encoding what sent to the network stream.\n\
6311 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6312 and the cdr part is used for encoding.\n\
6313 If VAL is a function symbol, the function must return a coding system\n\
6314 or a cons of coding systems which are used as above.\n\
6316 See also the function `find-operation-coding-system'.");
6317 Vnetwork_coding_system_alist
= Qnil
;
6319 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
6320 "Coding system to use with system messages.");
6321 Vlocale_coding_system
= Qnil
;
6323 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
6324 "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6325 eol_mnemonic_unix
= build_string (":");
6327 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
6328 "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6329 eol_mnemonic_dos
= build_string ("\\");
6331 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
6332 "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6333 eol_mnemonic_mac
= build_string ("/");
6335 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
6336 "*String displayed in mode line when end-of-line format is not yet determined.");
6337 eol_mnemonic_undecided
= build_string (":");
6339 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
6340 "*Non-nil enables character translation while encoding and decoding.");
6341 Venable_character_translation
= Qt
;
6343 DEFVAR_LISP ("standard-translation-table-for-decode",
6344 &Vstandard_translation_table_for_decode
,
6345 "Table for translating characters while decoding.");
6346 Vstandard_translation_table_for_decode
= Qnil
;
6348 DEFVAR_LISP ("standard-translation-table-for-encode",
6349 &Vstandard_translation_table_for_encode
,
6350 "Table for translationg characters while encoding.");
6351 Vstandard_translation_table_for_encode
= Qnil
;
6353 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist
,
6354 "Alist of charsets vs revision numbers.\n\
6355 While encoding, if a charset (car part of an element) is found,\n\
6356 designate it with the escape sequence identifing revision (cdr part of the element).");
6357 Vcharset_revision_alist
= Qnil
;
6359 DEFVAR_LISP ("default-process-coding-system",
6360 &Vdefault_process_coding_system
,
6361 "Cons of coding systems used for process I/O by default.\n\
6362 The car part is used for decoding a process output,\n\
6363 the cdr part is used for encoding a text to be sent to a process.");
6364 Vdefault_process_coding_system
= Qnil
;
6366 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
6367 "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6368 This is a vector of length 256.\n\
6369 If Nth element is non-nil, the existence of code N in a file\n\
6370 \(or output of subprocess) doesn't prevent it to be detected as\n\
6371 a coding system of ISO 2022 variant which has a flag\n\
6372 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6373 or reading output of a subprocess.\n\
6374 Only 128th through 159th elements has a meaning.");
6375 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
6377 DEFVAR_LISP ("select-safe-coding-system-function",
6378 &Vselect_safe_coding_system_function
,
6379 "Function to call to select safe coding system for encoding a text.\n\
6381 If set, this function is called to force a user to select a proper\n\
6382 coding system which can encode the text in the case that a default\n\
6383 coding system used in each operation can't encode the text.\n\
6385 The default value is `select-safe-coding-system' (which see).");
6386 Vselect_safe_coding_system_function
= Qnil
;
6391 emacs_strerror (error_number
)
6396 synchronize_system_messages_locale ();
6397 str
= strerror (error_number
);
6399 if (! NILP (Vlocale_coding_system
))
6401 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
6402 Vlocale_coding_system
,
6404 str
= (char *) XSTRING (dec
)->data
;