; * lisp/ldefs-boot.el: Update.
[emacs.git] / src / coding.h
blobaab8c2d4380945fe53edb0ed572121814d8c63c9
1 /* Header for coding system handler.
2 Copyright (C) 2001-2019 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7 Copyright (C) 2003
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
11 This file is part of GNU Emacs.
13 GNU Emacs is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or (at
16 your option) any later version.
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
26 #ifndef EMACS_CODING_H
27 #define EMACS_CODING_H
29 #include "lisp.h"
31 INLINE_HEADER_BEGIN
33 /* Index to arguments of Fdefine_coding_system_internal. */
35 enum define_coding_system_arg_index
37 coding_arg_name,
38 coding_arg_mnemonic,
39 coding_arg_coding_type,
40 coding_arg_charset_list,
41 coding_arg_ascii_compatible_p,
42 coding_arg_decode_translation_table,
43 coding_arg_encode_translation_table,
44 coding_arg_post_read_conversion,
45 coding_arg_pre_write_conversion,
46 coding_arg_default_char,
47 coding_arg_for_unibyte,
48 coding_arg_plist,
49 coding_arg_eol_type,
50 coding_arg_max
53 enum define_coding_iso2022_arg_index
55 coding_arg_iso2022_initial = coding_arg_max,
56 coding_arg_iso2022_reg_usage,
57 coding_arg_iso2022_request,
58 coding_arg_iso2022_flags,
59 coding_arg_iso2022_max
62 enum define_coding_utf8_arg_index
64 coding_arg_utf8_bom = coding_arg_max,
65 coding_arg_utf8_max
68 enum define_coding_utf16_arg_index
70 coding_arg_utf16_bom = coding_arg_max,
71 coding_arg_utf16_endian,
72 coding_arg_utf16_max
75 enum define_coding_ccl_arg_index
77 coding_arg_ccl_decoder = coding_arg_max,
78 coding_arg_ccl_encoder,
79 coding_arg_ccl_valids,
80 coding_arg_ccl_max
83 enum define_coding_undecided_arg_index
85 coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
86 coding_arg_undecided_inhibit_iso_escape_detection,
87 coding_arg_undecided_prefer_utf_8,
88 coding_arg_undecided_max
91 /* Hash table for all coding systems. Keys are coding system symbols
92 and values are spec vectors of the corresponding coding system. A
93 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
94 vector of attribute of the coding system. ALIASES is a list of
95 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
96 `mac' or a vector of coding systems (symbols). */
98 extern Lisp_Object Vcoding_system_hash_table;
101 /* Enumeration of index to an attribute vector of a coding system. */
103 enum coding_attr_index
105 coding_attr_base_name,
106 coding_attr_docstring,
107 coding_attr_mnemonic,
108 coding_attr_type,
109 coding_attr_charset_list,
110 coding_attr_ascii_compat,
111 coding_attr_decode_tbl,
112 coding_attr_encode_tbl,
113 coding_attr_trans_tbl,
114 coding_attr_post_read,
115 coding_attr_pre_write,
116 coding_attr_default_char,
117 coding_attr_for_unibyte,
118 coding_attr_plist,
120 coding_attr_category,
121 coding_attr_safe_charsets,
123 /* The followings are extra attributes for each type. */
124 coding_attr_charset_valids,
126 coding_attr_ccl_decoder,
127 coding_attr_ccl_encoder,
128 coding_attr_ccl_valids,
130 coding_attr_iso_initial,
131 coding_attr_iso_usage,
132 coding_attr_iso_request,
133 coding_attr_iso_flags,
135 coding_attr_utf_bom,
136 coding_attr_utf_16_endian,
138 coding_attr_emacs_mule_full,
140 coding_attr_undecided_inhibit_null_byte_detection,
141 coding_attr_undecided_inhibit_iso_escape_detection,
142 coding_attr_undecided_prefer_utf_8,
144 coding_attr_last_index
148 /* Macros to access an element of an attribute vector. */
150 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
151 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
152 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
153 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
154 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
155 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
156 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
157 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
158 #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
159 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
160 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
161 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
162 #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
163 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
164 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
165 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
168 /* Return the name of a coding system specified by ID. */
169 #define CODING_ID_NAME(id) \
170 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
172 /* Return the attribute vector of a coding system specified by ID. */
174 #define CODING_ID_ATTRS(id) \
175 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
177 /* Return the list of aliases of a coding system specified by ID. */
179 #define CODING_ID_ALIASES(id) \
180 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
182 /* Return the eol-type of a coding system specified by ID. */
184 #define CODING_ID_EOL_TYPE(id) \
185 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
188 /* Return the spec vector of CODING_SYSTEM_SYMBOL. */
190 #define CODING_SYSTEM_SPEC(coding_system_symbol) \
191 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
194 /* Return the ID of CODING_SYSTEM_SYMBOL. */
196 #define CODING_SYSTEM_ID(coding_system_symbol) \
197 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
198 coding_system_symbol, NULL)
200 /* Return true if CODING_SYSTEM_SYMBOL is a coding system. */
202 #define CODING_SYSTEM_P(coding_system_symbol) \
203 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
204 || (! NILP (coding_system_symbol) \
205 && ! NILP (Fcoding_system_p (coding_system_symbol))))
207 /* Check if X is a coding system or not. */
209 #define CHECK_CODING_SYSTEM(x) \
210 do { \
211 if (CODING_SYSTEM_ID (x) < 0 \
212 && NILP (Fcheck_coding_system (x))) \
213 wrong_type_argument (Qcoding_system_p, (x)); \
214 } while (false)
217 /* Check if X is a coding system or not. If it is, set SEPC to the
218 spec vector of the coding system. */
220 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
221 do { \
222 spec = CODING_SYSTEM_SPEC (x); \
223 if (NILP (spec)) \
225 Fcheck_coding_system (x); \
226 spec = CODING_SYSTEM_SPEC (x); \
228 if (NILP (spec)) \
229 wrong_type_argument (Qcoding_system_p, (x)); \
230 } while (false)
233 /* Check if X is a coding system or not. If it is, set ID to the
234 ID of the coding system. */
236 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
237 do \
239 id = CODING_SYSTEM_ID (x); \
240 if (id < 0) \
242 Fcheck_coding_system (x); \
243 id = CODING_SYSTEM_ID (x); \
245 if (id < 0) \
246 wrong_type_argument (Qcoding_system_p, (x)); \
247 } while (false)
250 /*** GENERAL section ***/
252 /* Enumeration of result code of code conversion. */
253 enum coding_result_code
255 CODING_RESULT_SUCCESS,
256 CODING_RESULT_INSUFFICIENT_SRC,
257 CODING_RESULT_INSUFFICIENT_DST,
258 CODING_RESULT_INVALID_SRC,
259 CODING_RESULT_INTERRUPT
263 /* Macros used for the member `mode' of the struct coding_system. */
265 /* If set, the decoding/encoding routines treat the current data as
266 the last block of the whole text to be converted, and do the
267 appropriate finishing job. */
268 #define CODING_MODE_LAST_BLOCK 0x01
270 /* If set, it means that the current source text is in a buffer which
271 enables selective display. */
272 #define CODING_MODE_SELECTIVE_DISPLAY 0x02
274 /* This flag is used by the decoding/encoding routines on the fly. If
275 set, it means that right-to-left text is being processed. */
276 #define CODING_MODE_DIRECTION 0x04
278 #define CODING_MODE_FIXED_DESTINATION 0x08
280 /* If set, it means that the encoding routines produces some safe
281 ASCII characters (usually '?') for unsupported characters. */
282 #define CODING_MODE_SAFE_ENCODING 0x10
284 /* For handling composition sequence. */
285 #include "composite.h"
287 enum composition_state
289 COMPOSING_NO,
290 COMPOSING_CHAR,
291 COMPOSING_RULE,
292 COMPOSING_COMPONENT_CHAR,
293 COMPOSING_COMPONENT_RULE
296 /* Structure for the current composition status. */
297 struct composition_status
299 enum composition_state state;
300 enum composition_method method;
301 bool old_form; /* true if pre-21 form */
302 int length; /* number of elements produced in charbuf */
303 int nchars; /* number of characters composed */
304 int ncomps; /* number of composition components */
305 /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
306 See the comment in coding.c. */
307 int carryover[4 /* annotation header */
308 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
309 + 2 /* intermediate -1 -1 */
310 + MAX_COMPOSITION_COMPONENTS /* CHARs */
315 /* Structure of the field `spec.iso_2022' in the structure
316 `coding_system'. */
317 struct iso_2022_spec
319 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
320 unsigned flags;
322 /* The current graphic register invoked to each graphic plane. */
323 int current_invocation[2];
325 /* The current charset designated to each graphic register. The
326 value -1 means that not charset is designated, -2 means that
327 there was an invalid designation previously. */
328 int current_designation[4];
330 /* If positive, we are now scanning CTEXT extended segment. */
331 int ctext_extended_segment_len;
333 /* True temporarily only when graphic register 2 or 3 is invoked by
334 single-shift while encoding. */
335 bool_bf single_shifting : 1;
337 /* True temporarily only when processing at beginning of line. */
338 bool_bf bol : 1;
340 /* If true, we are now scanning embedded UTF-8 sequence. */
341 bool_bf embedded_utf_8 : 1;
343 /* The current composition. */
344 struct composition_status cmp_status;
347 struct emacs_mule_spec
349 struct composition_status cmp_status;
352 struct undecided_spec
354 /* Inhibit null byte detection. 1 means always inhibit,
355 -1 means do not inhibit, 0 means rely on user variable. */
356 int inhibit_nbd;
358 /* Inhibit ISO escape detection. -1, 0, 1 as above. */
359 int inhibit_ied;
361 /* Prefer UTF-8 when the input could be other encodings. */
362 bool prefer_utf_8;
365 enum utf_bom_type
367 utf_detect_bom,
368 utf_without_bom,
369 utf_with_bom
372 enum utf_16_endian_type
374 utf_16_big_endian,
375 utf_16_little_endian
378 struct utf_16_spec
380 enum utf_bom_type bom;
381 enum utf_16_endian_type endian;
382 int surrogate;
385 struct coding_detection_info
387 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
388 /* Which categories are already checked. */
389 int checked;
390 /* Which categories are strongly found. */
391 int found;
392 /* Which categories are rejected. */
393 int rejected;
397 struct coding_system
399 /* ID number of the coding system. This is an index to
400 Vcoding_system_hash_table. This value is set by
401 setup_coding_system. At the early stage of building time, this
402 value is -1 in the array coding_categories to indicate that no
403 coding-system of that category is yet defined. */
404 ptrdiff_t id;
406 /* Flag bits of the coding system. The meaning of each bit is common
407 to all types of coding systems. */
408 unsigned common_flags : 14;
410 /* Mode bits of the coding system. See the comments of the macros
411 CODING_MODE_XXX. */
412 unsigned mode : 5;
414 /* The following two members specify how binary 8-bit code 128..255
415 are represented in source and destination text respectively. True
416 means they are represented by 2-byte sequence, false means they are
417 represented by 1-byte as is (see the comment in character.h). */
418 bool_bf src_multibyte : 1;
419 bool_bf dst_multibyte : 1;
421 /* True if the source of conversion is not in the member
422 `charbuf', but at `src_object'. */
423 bool_bf chars_at_source : 1;
425 /* Nonzero if the result of conversion is in `destination'
426 buffer rather than in `dst_object'. */
427 bool_bf raw_destination : 1;
429 /* Set to true if charbuf contains an annotation. */
430 bool_bf annotated : 1;
432 /* Used internally in coding.c. See the comment of detect_ascii. */
433 unsigned eol_seen : 3;
435 /* Finish status of code conversion. */
436 ENUM_BF (coding_result_code) result : 3;
438 int max_charset_id;
440 /* Detailed information specific to each type of coding system. */
441 union
443 struct iso_2022_spec iso_2022;
444 struct ccl_spec *ccl; /* Defined in ccl.h. */
445 struct utf_16_spec utf_16;
446 enum utf_bom_type utf_8_bom;
447 struct emacs_mule_spec emacs_mule;
448 struct undecided_spec undecided;
449 } spec;
451 unsigned char *safe_charsets;
453 /* How many heading bytes we can skip for decoding. This is set to
454 -1 in setup_coding_system, and updated by detect_coding. So,
455 when this is equal to the byte length of the text being
456 converted, we can skip the actual conversion process except for
457 the eol format. */
458 ptrdiff_t head_ascii;
460 /* How many bytes/chars at the source are detected as valid utf-8
461 sequence. Set by detect_coding_utf_8. */
462 ptrdiff_t detected_utf8_bytes, detected_utf8_chars;
464 /* The following members are set by encoding/decoding routine. */
465 ptrdiff_t produced, produced_char, consumed, consumed_char;
467 ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
468 Lisp_Object src_object;
469 const unsigned char *source;
471 ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
472 Lisp_Object dst_object;
473 unsigned char *destination;
475 /* If an element is non-negative, it is a character code.
477 If it is in the range -128..-1, it is a 8-bit character code
478 minus 256.
480 If it is less than -128, it specifies the start of an annotation
481 chunk. The length of the chunk is -128 minus the value of the
482 element. The following elements are OFFSET, ANNOTATION-TYPE, and
483 a sequence of actual data for the annotation. OFFSET is a
484 character position offset from dst_pos or src_pos,
485 ANNOTATION-TYPE specifies the meaning of the annotation and how to
486 handle the following data.. */
487 int *charbuf;
488 int charbuf_size, charbuf_used;
490 unsigned char carryover[64];
491 int carryover_bytes;
493 int default_char;
495 bool (*detector) (struct coding_system *, struct coding_detection_info *);
496 void (*decoder) (struct coding_system *);
497 bool (*encoder) (struct coding_system *);
500 /* Meanings of bits in the member `common_flags' of the structure
501 coding_system. The lowest 8 bits are reserved for various kind of
502 annotations (currently two of them are used). */
503 #define CODING_ANNOTATION_MASK 0x00FF
504 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
505 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
506 #define CODING_ANNOTATE_CHARSET_MASK 0x0003
507 #define CODING_FOR_UNIBYTE_MASK 0x0100
508 #define CODING_REQUIRE_FLUSHING_MASK 0x0200
509 #define CODING_REQUIRE_DECODING_MASK 0x0400
510 #define CODING_REQUIRE_ENCODING_MASK 0x0800
511 #define CODING_REQUIRE_DETECTION_MASK 0x1000
512 #define CODING_RESET_AT_BOL_MASK 0x2000
514 /* Return nonzero if the coding context CODING requires annotation
515 handling. */
516 #define CODING_REQUIRE_ANNOTATION(coding) \
517 ((coding)->common_flags & CODING_ANNOTATION_MASK)
519 /* Return nonzero if the coding context CODING prefers decoding into
520 unibyte. */
521 #define CODING_FOR_UNIBYTE(coding) \
522 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
524 /* Return nonzero if the coding context CODING requires specific code to be
525 attached at the tail of converted text. */
526 #define CODING_REQUIRE_FLUSHING(coding) \
527 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
529 /* Return nonzero if the coding context CODING requires code conversion on
530 decoding. */
531 #define CODING_REQUIRE_DECODING(coding) \
532 ((coding)->dst_multibyte \
533 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
536 /* Return nonzero if the coding context CODING requires code conversion on
537 encoding.
538 The non-multibyte part of the condition is to support encoding of
539 unibyte strings/buffers generated by string-as-unibyte or
540 (set-buffer-multibyte nil) from multibyte strings/buffers. */
541 #define CODING_REQUIRE_ENCODING(coding) \
542 ((coding)->src_multibyte \
543 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
544 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
547 /* Return nonzero if the coding context CODING requires some kind of code
548 detection. */
549 #define CODING_REQUIRE_DETECTION(coding) \
550 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
552 /* Return nonzero if the coding context CODING requires code conversion on
553 decoding or some kind of code detection. */
554 #define CODING_MAY_REQUIRE_DECODING(coding) \
555 (CODING_REQUIRE_DECODING (coding) \
556 || CODING_REQUIRE_DETECTION (coding))
558 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
559 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
560 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
561 internal format. */
563 #define SJIS_TO_JIS(code) \
564 do { \
565 int s1, s2, j1, j2; \
567 s1 = (code) >> 8, s2 = (code) & 0xFF; \
569 if (s2 >= 0x9F) \
570 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
571 j2 = s2 - 0x7E); \
572 else \
573 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
574 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
575 (code) = (j1 << 8) | j2; \
576 } while (false)
578 #define SJIS_TO_JIS2(code) \
579 do { \
580 int s1, s2, j1, j2; \
582 s1 = (code) >> 8, s2 = (code) & 0xFF; \
584 if (s2 >= 0x9F) \
586 j1 = (s1 == 0xF0 ? 0x28 \
587 : s1 == 0xF1 ? 0x24 \
588 : s1 == 0xF2 ? 0x2C \
589 : s1 == 0xF3 ? 0x2E \
590 : 0x6E + (s1 - 0xF4) * 2); \
591 j2 = s2 - 0x7E; \
593 else \
595 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
596 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
597 : 0x6F + (s1 - 0xF5) * 2); \
598 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
600 (code) = (j1 << 8) | j2; \
601 } while (false)
604 #define JIS_TO_SJIS(code) \
605 do { \
606 int s1, s2, j1, j2; \
608 j1 = (code) >> 8, j2 = (code) & 0xFF; \
609 if (j1 & 1) \
610 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
611 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
612 else \
613 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
614 s2 = j2 + 0x7E); \
615 (code) = (s1 << 8) | s2; \
616 } while (false)
618 #define JIS_TO_SJIS2(code) \
619 do { \
620 int s1, s2, j1, j2; \
622 j1 = (code) >> 8, j2 = (code) & 0xFF; \
623 if (j1 & 1) \
625 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
626 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
627 : 0xF5 + (j1 - 0x6F) / 2); \
628 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
630 else \
632 s1 = (j1 == 0x28 ? 0xF0 \
633 : j1 == 0x24 ? 0xF1 \
634 : j1 == 0x2C ? 0xF2 \
635 : j1 == 0x2E ? 0xF3 \
636 : 0xF4 + (j1 - 0x6E) / 2); \
637 s2 = j2 + 0x7E; \
639 (code) = (s1 << 8) | s2; \
640 } while (false)
642 /* Encode the file name NAME using the specified coding system
643 for file names, if any. */
644 #define ENCODE_FILE(NAME) encode_file_name (NAME)
646 /* Decode the file name NAME using the specified coding system
647 for file names, if any. */
648 #define DECODE_FILE(NAME) decode_file_name (NAME)
650 /* Encode the string STR using the specified coding system
651 for system functions, if any. */
652 #define ENCODE_SYSTEM(str) \
653 (! NILP (Vlocale_coding_system) \
654 ? code_convert_string_norecord (str, Vlocale_coding_system, true) \
655 : str)
657 /* Decode the string STR using the specified coding system
658 for system functions, if any. */
659 #define DECODE_SYSTEM(str) \
660 (! NILP (Vlocale_coding_system) \
661 ? code_convert_string_norecord (str, Vlocale_coding_system, false) \
662 : str)
664 /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
665 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
667 /* Return true if VAL is a high surrogate. VAL must be a 16-bit code
668 unit. */
670 #define UTF_16_HIGH_SURROGATE_P(val) \
671 (((val) & 0xFC00) == 0xD800)
673 /* Return true if VAL is a low surrogate. VAL must be a 16-bit code
674 unit. */
676 #define UTF_16_LOW_SURROGATE_P(val) \
677 (((val) & 0xFC00) == 0xDC00)
679 /* Return the Unicode code point for the given UTF-16 surrogates. */
681 INLINE int
682 surrogates_to_codepoint (int low, int high)
684 eassert (0 <= low && low <= 0xFFFF);
685 eassert (0 <= high && high <= 0xFFFF);
686 eassert (UTF_16_LOW_SURROGATE_P (low));
687 eassert (UTF_16_HIGH_SURROGATE_P (high));
688 return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
691 /* Extern declarations. */
692 extern Lisp_Object code_conversion_save (bool, bool);
693 extern bool encode_coding_utf_8 (struct coding_system *);
694 extern void setup_coding_system (Lisp_Object, struct coding_system *);
695 extern Lisp_Object coding_charset_list (struct coding_system *);
696 extern Lisp_Object coding_system_charset_list (Lisp_Object);
697 extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
698 Lisp_Object, bool, bool, bool);
699 extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
700 bool);
701 extern Lisp_Object encode_file_name (Lisp_Object);
702 extern Lisp_Object decode_file_name (Lisp_Object);
703 extern Lisp_Object raw_text_coding_system (Lisp_Object);
704 extern bool raw_text_coding_system_p (struct coding_system *);
705 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
706 extern Lisp_Object complement_process_encoding_system (Lisp_Object);
708 extern void decode_coding_gap (struct coding_system *,
709 ptrdiff_t, ptrdiff_t);
710 extern void decode_coding_object (struct coding_system *,
711 Lisp_Object, ptrdiff_t, ptrdiff_t,
712 ptrdiff_t, ptrdiff_t, Lisp_Object);
713 extern void encode_coding_object (struct coding_system *,
714 Lisp_Object, ptrdiff_t, ptrdiff_t,
715 ptrdiff_t, ptrdiff_t, Lisp_Object);
717 #if defined (WINDOWSNT) || defined (CYGWIN)
719 /* These functions use Lisp string objects to store the UTF-16LE
720 strings that modern versions of Windows expect. These strings are
721 not particularly useful to Lisp, and all Lisp strings should be
722 native Emacs multibyte. */
724 /* Access the wide-character string stored in a Lisp string object. */
725 #define WCSDATA(x) ((wchar_t *) SDATA (x))
727 /* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
728 string, and store it in *BUF. BUF may safely point to STR on entry. */
729 extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
731 /* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
732 object, to a multi-byte Emacs string and return it. This function
733 calls code_convert_string_norecord internally and has all its
734 failure modes. STR itself is not modified. */
735 extern Lisp_Object from_unicode (Lisp_Object str);
737 /* Convert WSTR to an Emacs string. */
738 extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);
740 #endif /* WINDOWSNT || CYGWIN */
742 /* Macros for backward compatibility. */
744 #define encode_coding_string(coding, string, nocopy) \
745 (STRING_MULTIBYTE(string) ? \
746 (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
747 SBYTES (string), Qt), \
748 (coding)->dst_object) : (string))
751 #define decode_coding_c_string(coding, src, bytes, dst_object) \
752 do { \
753 (coding)->source = (src); \
754 (coding)->src_chars = (coding)->src_bytes = (bytes); \
755 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
756 (dst_object)); \
757 } while (false)
760 extern Lisp_Object preferred_coding_system (void);
763 #ifdef emacs
765 /* Coding system to be used to encode text for terminal display when
766 terminal coding system is nil. */
767 extern struct coding_system safe_terminal_coding;
769 #endif
771 extern char emacs_mule_bytes[256];
773 INLINE_HEADER_END
775 #endif /* EMACS_CODING_H */