1 /**********************************************************************
6 created at: Thu May 24 17:23:27 JST 2007
8 Copyright (C) 2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
18 #include "internal/enc.h"
19 #include "internal/encoding.h"
20 #include "internal/error.h"
21 #include "internal/inits.h"
22 #include "internal/load.h"
23 #include "internal/object.h"
24 #include "internal/string.h"
25 #include "internal/vm.h"
27 #include "ruby/encoding.h"
28 #include "ruby/util.h"
29 #include "ruby_assert.h"
35 #define ENC_ASSERT(expr) RUBY_ASSERT_WHEN(ENC_DEBUG, expr)
36 #define MUST_STRING(str) (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)
38 #undef rb_ascii8bit_encindex
39 #undef rb_utf8_encindex
40 #undef rb_usascii_encindex
42 typedef OnigEncodingType rb_raw_encoding
;
44 #if defined __GNUC__ && __GNUC__ >= 4
45 #pragma GCC visibility push(default)
46 int rb_enc_register(const char *name
, rb_encoding
*encoding
);
47 void rb_enc_set_base(const char *name
, const char *orig
);
48 int rb_enc_set_dummy(int index
);
49 void rb_encdb_declare(const char *name
);
50 int rb_encdb_replicate(const char *name
, const char *orig
);
51 int rb_encdb_dummy(const char *name
);
52 int rb_encdb_alias(const char *alias
, const char *orig
);
53 #pragma GCC visibility pop
56 static ID id_encoding
;
59 #define ENCODING_LIST_CAPA 256
60 static VALUE rb_encoding_list
;
62 struct rb_encoding_entry
{
68 static struct enc_table
{
69 struct rb_encoding_entry list
[ENCODING_LIST_CAPA
];
75 enc_names_free_i(st_data_t name
, st_data_t idx
, st_data_t args
)
77 ruby_xfree((void *)name
);
82 rb_free_global_enc_table(void)
84 for (size_t i
= 0; i
< ENCODING_LIST_CAPA
; i
++) {
85 xfree((void *)global_enc_table
.list
[i
].enc
);
88 st_foreach(global_enc_table
.names
, enc_names_free_i
, (st_data_t
)0);
89 st_free_table(global_enc_table
.names
);
92 static rb_encoding
*global_enc_ascii
,
96 #define GLOBAL_ENC_TABLE_ENTER(enc_table) struct enc_table *enc_table = &global_enc_table; RB_VM_LOCK_ENTER()
97 #define GLOBAL_ENC_TABLE_LEAVE() RB_VM_LOCK_LEAVE()
98 #define GLOBAL_ENC_TABLE_EVAL(enc_table, expr) do { \
99 GLOBAL_ENC_TABLE_ENTER(enc_table); \
103 GLOBAL_ENC_TABLE_LEAVE(); \
107 #define ENC_DUMMY_FLAG (1<<24)
108 #define ENC_INDEX_MASK (~(~0U<<24))
110 #define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
111 #define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
112 #define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
114 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
115 #define UNSPECIFIED_ENCODING INT_MAX
117 #define ENCODING_NAMELEN_MAX 63
118 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
120 static const rb_data_type_t encoding_data_type
= {
123 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
| RUBY_TYPED_WB_PROTECTED
126 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
127 #define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
130 rb_data_is_encoding(VALUE obj
)
132 return is_data_encoding(obj
);
136 enc_new(rb_encoding
*encoding
)
138 VALUE enc
= TypedData_Wrap_Struct(rb_cEncoding
, &encoding_data_type
, (void *)encoding
);
140 FL_SET_RAW(enc
, RUBY_FL_SHAREABLE
);
145 enc_list_update(int index
, rb_raw_encoding
*encoding
)
147 RUBY_ASSERT(index
< ENCODING_LIST_CAPA
);
149 VALUE list
= rb_encoding_list
;
150 if (list
&& NIL_P(rb_ary_entry(list
, index
))) {
151 /* initialize encoding data */
152 rb_ary_store(list
, index
, enc_new(encoding
));
157 enc_list_lookup(int idx
)
159 VALUE list
, enc
= Qnil
;
161 if (idx
< ENCODING_LIST_CAPA
) {
162 list
= rb_encoding_list
;
164 enc
= rb_ary_entry(list
, idx
);
168 rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx
);
176 rb_enc_from_encoding_index(int idx
)
178 return enc_list_lookup(idx
);
182 rb_enc_from_encoding(rb_encoding
*encoding
)
185 if (!encoding
) return Qnil
;
186 idx
= ENC_TO_ENCINDEX(encoding
);
187 return rb_enc_from_encoding_index(idx
);
191 rb_enc_to_index(rb_encoding
*enc
)
193 return enc
? ENC_TO_ENCINDEX(enc
) : 0;
197 rb_enc_dummy_p(rb_encoding
*enc
)
199 return ENC_DUMMY_P(enc
) != 0;
203 check_encoding(rb_encoding
*enc
)
205 int index
= rb_enc_to_index(enc
);
206 if (rb_enc_from_index(index
) != enc
)
208 if (rb_enc_autoload_p(enc
)) {
209 index
= rb_enc_autoload(enc
);
215 enc_check_encoding(VALUE obj
)
217 if (!is_obj_encoding(obj
)) {
220 return check_encoding(RDATA(obj
)->data
);
223 NORETURN(static void not_encoding(VALUE enc
));
225 not_encoding(VALUE enc
)
227 rb_raise(rb_eTypeError
, "wrong argument type %"PRIsVALUE
" (expected Encoding)",
232 must_encoding(VALUE enc
)
234 int index
= enc_check_encoding(enc
);
238 return DATA_PTR(enc
);
242 must_encindex(int index
)
244 rb_encoding
*enc
= rb_enc_from_index(index
);
246 rb_raise(rb_eEncodingError
, "encoding index out of bound: %d",
249 if (ENC_TO_ENCINDEX(enc
) != (int)(index
& ENC_INDEX_MASK
)) {
250 rb_raise(rb_eEncodingError
, "wrong encoding index %d for %s (expected %d)",
251 index
, rb_enc_name(enc
), ENC_TO_ENCINDEX(enc
));
253 if (rb_enc_autoload_p(enc
) && rb_enc_autoload(enc
) == -1) {
254 rb_loaderror("failed to load encoding (%s)",
261 rb_to_encoding_index(VALUE enc
)
266 idx
= enc_check_encoding(enc
);
270 else if (NIL_P(enc
= rb_check_string_type(enc
))) {
273 if (!rb_enc_asciicompat(rb_enc_get(enc
))) {
276 if (!(name
= rb_str_to_cstr(enc
))) {
279 return rb_enc_find_index(name
);
283 name_for_encoding(volatile VALUE
*enc
)
285 VALUE name
= StringValue(*enc
);
288 if (!rb_enc_asciicompat(rb_enc_get(name
))) {
289 rb_raise(rb_eArgError
, "invalid encoding name (non ASCII)");
291 if (!(n
= rb_str_to_cstr(name
))) {
292 rb_raise(rb_eArgError
, "invalid encoding name (NUL byte)");
297 /* Returns encoding index or UNSPECIFIED_ENCODING */
299 str_find_encindex(VALUE enc
)
301 int idx
= rb_enc_find_index(name_for_encoding(&enc
));
307 str_to_encindex(VALUE enc
)
309 int idx
= str_find_encindex(enc
);
311 rb_raise(rb_eArgError
, "unknown encoding name - %"PRIsVALUE
, enc
);
317 str_to_encoding(VALUE enc
)
319 return rb_enc_from_index(str_to_encindex(enc
));
323 rb_to_encoding(VALUE enc
)
325 if (enc_check_encoding(enc
) >= 0) return RDATA(enc
)->data
;
326 return str_to_encoding(enc
);
330 rb_find_encoding(VALUE enc
)
333 if (enc_check_encoding(enc
) >= 0) return RDATA(enc
)->data
;
334 idx
= str_find_encindex(enc
);
335 if (idx
< 0) return NULL
;
336 return rb_enc_from_index(idx
);
340 enc_table_expand(struct enc_table
*enc_table
, int newsize
)
342 if (newsize
> ENCODING_LIST_CAPA
) {
343 rb_raise(rb_eEncodingError
, "too many encoding (> %d)", ENCODING_LIST_CAPA
);
349 enc_register_at(struct enc_table
*enc_table
, int index
, const char *name
, rb_encoding
*base_encoding
)
351 struct rb_encoding_entry
*ent
= &enc_table
->list
[index
];
352 rb_raw_encoding
*encoding
;
354 if (!valid_encoding_name_p(name
)) return -1;
356 ent
->name
= name
= strdup(name
);
358 else if (STRCASECMP(name
, ent
->name
)) {
361 encoding
= (rb_raw_encoding
*)ent
->enc
;
363 encoding
= xmalloc(sizeof(rb_encoding
));
366 *encoding
= *base_encoding
;
369 memset(encoding
, 0, sizeof(*ent
->enc
));
371 encoding
->name
= name
;
372 encoding
->ruby_encoding_index
= index
;
374 st_insert(enc_table
->names
, (st_data_t
)name
, (st_data_t
)index
);
376 enc_list_update(index
, encoding
);
381 enc_register(struct enc_table
*enc_table
, const char *name
, rb_encoding
*encoding
)
383 int index
= enc_table
->count
;
385 enc_table
->count
= enc_table_expand(enc_table
, index
+ 1);
386 return enc_register_at(enc_table
, index
, name
, encoding
);
389 static void set_encoding_const(const char *, rb_encoding
*);
390 static int enc_registered(struct enc_table
*enc_table
, const char *name
);
393 enc_from_index(struct enc_table
*enc_table
, int index
)
395 if (UNLIKELY(index
< 0 || enc_table
->count
<= (index
&= ENC_INDEX_MASK
))) {
398 return enc_table
->list
[index
].enc
;
402 rb_enc_from_index(int index
)
404 return enc_from_index(&global_enc_table
, index
);
408 rb_enc_register(const char *name
, rb_encoding
*encoding
)
412 GLOBAL_ENC_TABLE_ENTER(enc_table
);
414 index
= enc_registered(enc_table
, name
);
417 rb_encoding
*oldenc
= enc_from_index(enc_table
, index
);
418 if (STRCASECMP(name
, rb_enc_name(oldenc
))) {
419 index
= enc_register(enc_table
, name
, encoding
);
421 else if (rb_enc_autoload_p(oldenc
) || !ENC_DUMMY_P(oldenc
)) {
422 enc_register_at(enc_table
, index
, name
, encoding
);
425 rb_raise(rb_eArgError
, "encoding %s is already registered", name
);
429 index
= enc_register(enc_table
, name
, encoding
);
430 set_encoding_const(name
, rb_enc_from_index(index
));
433 GLOBAL_ENC_TABLE_LEAVE();
438 enc_registered(struct enc_table
*enc_table
, const char *name
)
442 if (!name
) return -1;
443 if (!enc_table
->names
) return -1;
444 if (st_lookup(enc_table
->names
, (st_data_t
)name
, &idx
)) {
451 rb_encdb_declare(const char *name
)
453 GLOBAL_ENC_TABLE_ENTER(enc_table
);
455 int idx
= enc_registered(enc_table
, name
);
457 idx
= enc_register(enc_table
, name
, 0);
459 set_encoding_const(name
, rb_enc_from_index(idx
));
461 GLOBAL_ENC_TABLE_LEAVE();
465 enc_check_addable(struct enc_table
*enc_table
, const char *name
)
467 if (enc_registered(enc_table
, name
) >= 0) {
468 rb_raise(rb_eArgError
, "encoding %s is already registered", name
);
470 else if (!valid_encoding_name_p(name
)) {
471 rb_raise(rb_eArgError
, "invalid encoding name: %s", name
);
476 set_base_encoding(struct enc_table
*enc_table
, int index
, rb_encoding
*base
)
478 rb_encoding
*enc
= enc_table
->list
[index
].enc
;
481 enc_table
->list
[index
].base
= base
;
482 if (ENC_DUMMY_P(base
)) ENC_SET_DUMMY((rb_raw_encoding
*)enc
);
487 * Set base encoding for encodings which are not replicas
488 * but not in their own files.
491 rb_enc_set_base(const char *name
, const char *orig
)
493 GLOBAL_ENC_TABLE_ENTER(enc_table
);
495 int idx
= enc_registered(enc_table
, name
);
496 int origidx
= enc_registered(enc_table
, orig
);
497 set_base_encoding(enc_table
, idx
, rb_enc_from_index(origidx
));
499 GLOBAL_ENC_TABLE_LEAVE();
503 * Set encoding dummy.
506 rb_enc_set_dummy(int index
)
508 rb_encoding
*enc
= global_enc_table
.list
[index
].enc
;
509 ENC_SET_DUMMY((rb_raw_encoding
*)enc
);
514 enc_replicate(struct enc_table
*enc_table
, const char *name
, rb_encoding
*encoding
)
518 enc_check_addable(enc_table
, name
);
519 idx
= enc_register(enc_table
, name
, encoding
);
520 if (idx
< 0) rb_raise(rb_eArgError
, "invalid encoding name: %s", name
);
521 set_base_encoding(enc_table
, idx
, encoding
);
522 set_encoding_const(name
, rb_enc_from_index(idx
));
527 enc_replicate_with_index(struct enc_table
*enc_table
, const char *name
, rb_encoding
*origenc
, int idx
)
530 idx
= enc_register(enc_table
, name
, origenc
);
533 idx
= enc_register_at(enc_table
, idx
, name
, origenc
);
536 set_base_encoding(enc_table
, idx
, origenc
);
537 set_encoding_const(name
, rb_enc_from_index(idx
));
540 rb_raise(rb_eArgError
, "failed to replicate encoding");
546 rb_encdb_replicate(const char *name
, const char *orig
)
550 GLOBAL_ENC_TABLE_ENTER(enc_table
);
552 int origidx
= enc_registered(enc_table
, orig
);
553 int idx
= enc_registered(enc_table
, name
);
556 origidx
= enc_register(enc_table
, orig
, 0);
558 r
= enc_replicate_with_index(enc_table
, name
, rb_enc_from_index(origidx
), idx
);
560 GLOBAL_ENC_TABLE_LEAVE();
566 rb_define_dummy_encoding(const char *name
)
570 GLOBAL_ENC_TABLE_ENTER(enc_table
);
572 index
= enc_replicate(enc_table
, name
, rb_ascii8bit_encoding());
573 rb_encoding
*enc
= enc_table
->list
[index
].enc
;
574 ENC_SET_DUMMY((rb_raw_encoding
*)enc
);
576 GLOBAL_ENC_TABLE_LEAVE();
582 rb_encdb_dummy(const char *name
)
586 GLOBAL_ENC_TABLE_ENTER(enc_table
);
588 index
= enc_replicate_with_index(enc_table
, name
,
589 rb_ascii8bit_encoding(),
590 enc_registered(enc_table
, name
));
591 rb_encoding
*enc
= enc_table
->list
[index
].enc
;
592 ENC_SET_DUMMY((rb_raw_encoding
*)enc
);
594 GLOBAL_ENC_TABLE_LEAVE();
601 * enc.dummy? -> true or false
603 * Returns true for dummy encodings.
604 * A dummy encoding is an encoding for which character handling is not properly
606 * It is used for stateful encodings.
608 * Encoding::ISO_2022_JP.dummy? #=> true
609 * Encoding::UTF_8.dummy? #=> false
613 enc_dummy_p(VALUE enc
)
615 return RBOOL(ENC_DUMMY_P(must_encoding(enc
)));
620 * enc.ascii_compatible? -> true or false
622 * Returns whether ASCII-compatible or not.
624 * Encoding::UTF_8.ascii_compatible? #=> true
625 * Encoding::UTF_16BE.ascii_compatible? #=> false
629 enc_ascii_compatible_p(VALUE enc
)
631 return RBOOL(rb_enc_asciicompat(must_encoding(enc
)));
635 * Returns non-zero when the encoding is Unicode series other than UTF-7 else 0.
638 rb_enc_unicode_p(rb_encoding
*enc
)
640 return ONIGENC_IS_UNICODE(enc
);
644 enc_dup_name(st_data_t name
)
646 return (st_data_t
)strdup((const char *)name
);
650 * Returns copied alias name when the key is added for st_table,
654 enc_alias_internal(struct enc_table
*enc_table
, const char *alias
, int idx
)
656 return st_insert2(enc_table
->names
, (st_data_t
)alias
, (st_data_t
)idx
,
661 enc_alias(struct enc_table
*enc_table
, const char *alias
, int idx
)
663 if (!valid_encoding_name_p(alias
)) return -1;
664 if (!enc_alias_internal(enc_table
, alias
, idx
))
665 set_encoding_const(alias
, enc_from_index(enc_table
, idx
));
670 rb_enc_alias(const char *alias
, const char *orig
)
674 GLOBAL_ENC_TABLE_ENTER(enc_table
);
676 enc_check_addable(enc_table
, alias
);
677 if ((idx
= rb_enc_find_index(orig
)) < 0) {
681 r
= enc_alias(enc_table
, alias
, idx
);
684 GLOBAL_ENC_TABLE_LEAVE();
690 rb_encdb_alias(const char *alias
, const char *orig
)
694 GLOBAL_ENC_TABLE_ENTER(enc_table
);
696 int idx
= enc_registered(enc_table
, orig
);
699 idx
= enc_register(enc_table
, orig
, 0);
701 r
= enc_alias(enc_table
, alias
, idx
);
703 GLOBAL_ENC_TABLE_LEAVE();
709 rb_enc_init(struct enc_table
*enc_table
)
711 enc_table_expand(enc_table
, ENCODING_COUNT
+ 1);
712 if (!enc_table
->names
) {
713 enc_table
->names
= st_init_strcasetable_with_size(ENCODING_LIST_CAPA
);
715 #define OnigEncodingASCII_8BIT OnigEncodingASCII
716 #define ENC_REGISTER(enc) enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
717 ENC_REGISTER(ASCII_8BIT
);
719 ENC_REGISTER(US_ASCII
);
720 global_enc_ascii
= enc_table
->list
[ENCINDEX_ASCII_8BIT
].enc
;
721 global_enc_utf_8
= enc_table
->list
[ENCINDEX_UTF_8
].enc
;
722 global_enc_us_ascii
= enc_table
->list
[ENCINDEX_US_ASCII
].enc
;
724 #undef OnigEncodingASCII_8BIT
725 #define ENCDB_REGISTER(name, enc) enc_register_at(enc_table, ENCINDEX_##enc, name, NULL)
726 ENCDB_REGISTER("UTF-16BE", UTF_16BE
);
727 ENCDB_REGISTER("UTF-16LE", UTF_16LE
);
728 ENCDB_REGISTER("UTF-32BE", UTF_32BE
);
729 ENCDB_REGISTER("UTF-32LE", UTF_32LE
);
730 ENCDB_REGISTER("UTF-16", UTF_16
);
731 ENCDB_REGISTER("UTF-32", UTF_32
);
732 ENCDB_REGISTER("UTF8-MAC", UTF8_MAC
);
734 ENCDB_REGISTER("EUC-JP", EUC_JP
);
735 ENCDB_REGISTER("Windows-31J", Windows_31J
);
736 #undef ENCDB_REGISTER
737 enc_table
->count
= ENCINDEX_BUILTIN_MAX
;
741 rb_enc_get_from_index(int index
)
743 return must_encindex(index
);
746 int rb_require_internal_silent(VALUE fname
);
749 load_encoding(const char *name
)
751 VALUE enclib
= rb_sprintf("enc/%s.so", name
);
752 VALUE debug
= ruby_debug
;
754 char *s
= RSTRING_PTR(enclib
) + 4, *e
= RSTRING_END(enclib
) - 3;
759 if (!ISALNUM(*s
)) *s
= '_';
760 else if (ISUPPER(*s
)) *s
= (char)TOLOWER(*s
);
763 enclib
= rb_fstring(enclib
);
765 errinfo
= rb_errinfo();
766 loaded
= rb_require_internal_silent(enclib
);
768 rb_set_errinfo(errinfo
);
770 GLOBAL_ENC_TABLE_ENTER(enc_table
);
772 if (loaded
< 0 || 1 < loaded
) {
775 else if ((idx
= enc_registered(enc_table
, name
)) < 0) {
778 else if (rb_enc_autoload_p(enc_table
->list
[idx
].enc
)) {
782 GLOBAL_ENC_TABLE_LEAVE();
788 enc_autoload_body(struct enc_table
*enc_table
, rb_encoding
*enc
)
790 rb_encoding
*base
= enc_table
->list
[ENC_TO_ENCINDEX(enc
)].base
;
795 if (i
>= enc_table
->count
) return -1;
796 } while (enc_table
->list
[i
].enc
!= base
&& (++i
, 1));
797 if (rb_enc_autoload_p(base
)) {
798 if (rb_enc_autoload(base
) < 0) return -1;
800 i
= enc
->ruby_encoding_index
;
801 enc_register_at(enc_table
, i
& ENC_INDEX_MASK
, rb_enc_name(enc
), base
);
802 ((rb_raw_encoding
*)enc
)->ruby_encoding_index
= i
;
812 rb_enc_autoload(rb_encoding
*enc
)
815 GLOBAL_ENC_TABLE_EVAL(enc_table
, i
= enc_autoload_body(enc_table
, enc
));
817 i
= load_encoding(rb_enc_name(enc
));
822 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
824 rb_enc_find_index(const char *name
)
826 int i
= enc_registered(&global_enc_table
, name
);
830 i
= load_encoding(name
);
832 else if (!(enc
= rb_enc_from_index(i
))) {
833 if (i
!= UNSPECIFIED_ENCODING
) {
834 rb_raise(rb_eArgError
, "encoding %s is not registered", name
);
837 else if (rb_enc_autoload_p(enc
)) {
838 if (rb_enc_autoload(enc
) < 0) {
839 rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
848 rb_enc_find_index2(const char *name
, long len
)
850 char buf
[ENCODING_NAMELEN_MAX
+1];
852 if (len
> ENCODING_NAMELEN_MAX
) return -1;
853 memcpy(buf
, name
, len
);
855 return rb_enc_find_index(buf
);
859 rb_enc_find(const char *name
)
861 int idx
= rb_enc_find_index(name
);
862 if (idx
< 0) idx
= 0;
863 return rb_enc_from_index(idx
);
867 enc_capable(VALUE obj
)
869 if (SPECIAL_CONST_P(obj
)) return SYMBOL_P(obj
);
870 switch (BUILTIN_TYPE(obj
)) {
877 if (is_data_encoding(obj
)) return TRUE
;
884 rb_enc_capable(VALUE obj
)
886 return enc_capable(obj
);
892 CONST_ID(id_encoding
, "encoding");
897 enc_get_index_str(VALUE str
)
899 int i
= ENCODING_GET_INLINED(str
);
900 if (i
== ENCODING_INLINE_MAX
) {
904 iv
= rb_ivar_get(str
, rb_id_encoding());
908 * Tentatively, assume ASCII-8BIT, if encoding index instance
909 * variable is not found. This can happen when freeing after
910 * all instance variables are removed in `obj_free`.
912 iv
= rb_attr_get(str
, rb_id_encoding());
913 i
= NIL_P(iv
) ? ENCINDEX_ASCII_8BIT
: NUM2INT(iv
);
920 rb_enc_get_index(VALUE obj
)
925 if (SPECIAL_CONST_P(obj
)) {
926 if (!SYMBOL_P(obj
)) return -1;
927 obj
= rb_sym2str(obj
);
929 switch (BUILTIN_TYPE(obj
)) {
933 i
= enc_get_index_str(obj
);
936 tmp
= rb_funcallv(obj
, rb_intern("internal_encoding"), 0, 0);
938 tmp
= rb_funcallv(obj
, rb_intern("external_encoding"), 0, 0);
940 if (is_obj_encoding(tmp
)) {
941 i
= enc_check_encoding(tmp
);
945 if (is_data_encoding(obj
)) {
946 i
= enc_check_encoding(obj
);
956 enc_set_index(VALUE obj
, int idx
)
958 if (!enc_capable(obj
)) {
959 rb_raise(rb_eArgError
, "cannot set encoding on non-encoding capable object");
962 if (idx
< ENCODING_INLINE_MAX
) {
963 ENCODING_SET_INLINED(obj
, idx
);
966 ENCODING_SET_INLINED(obj
, ENCODING_INLINE_MAX
);
967 rb_ivar_set(obj
, rb_id_encoding(), INT2NUM(idx
));
971 rb_enc_set_index(VALUE obj
, int idx
)
973 rb_check_frozen(obj
);
975 enc_set_index(obj
, idx
);
979 rb_enc_associate_index(VALUE obj
, int idx
)
982 int oldidx
, oldtermlen
, termlen
;
984 /* enc_check_capable(obj);*/
985 rb_check_frozen(obj
);
986 oldidx
= rb_enc_get_index(obj
);
989 if (SPECIAL_CONST_P(obj
)) {
990 rb_raise(rb_eArgError
, "cannot set encoding");
992 enc
= must_encindex(idx
);
993 if (!ENC_CODERANGE_ASCIIONLY(obj
) ||
994 !rb_enc_asciicompat(enc
)) {
995 ENC_CODERANGE_CLEAR(obj
);
997 termlen
= rb_enc_mbminlen(enc
);
998 oldtermlen
= rb_enc_mbminlen(rb_enc_from_index(oldidx
));
999 if (oldtermlen
!= termlen
&& RB_TYPE_P(obj
, T_STRING
)) {
1000 rb_str_change_terminator_length(obj
, oldtermlen
, termlen
);
1002 enc_set_index(obj
, idx
);
1007 rb_enc_associate(VALUE obj
, rb_encoding
*enc
)
1009 return rb_enc_associate_index(obj
, rb_enc_to_index(enc
));
1013 rb_enc_get(VALUE obj
)
1015 return rb_enc_from_index(rb_enc_get_index(obj
));
1019 rb_enc_inspect_name(rb_encoding
*enc
)
1021 if (enc
== global_enc_ascii
) {
1022 return "BINARY (ASCII-8BIT)";
1028 rb_encoding_check(rb_encoding
* enc
, VALUE str1
, VALUE str2
)
1031 rb_raise(rb_eEncCompatError
, "incompatible character encodings: %s and %s",
1032 rb_enc_inspect_name(rb_enc_get(str1
)),
1033 rb_enc_inspect_name(rb_enc_get(str2
)));
1037 static rb_encoding
* enc_compatible_str(VALUE str1
, VALUE str2
);
1040 rb_enc_check_str(VALUE str1
, VALUE str2
)
1042 rb_encoding
*enc
= enc_compatible_str(MUST_STRING(str1
), MUST_STRING(str2
));
1043 return rb_encoding_check(enc
, str1
, str2
);
1047 rb_enc_check(VALUE str1
, VALUE str2
)
1049 rb_encoding
*enc
= rb_enc_compatible(str1
, str2
);
1050 return rb_encoding_check(enc
, str1
, str2
);
1054 enc_compatible_latter(VALUE str1
, VALUE str2
, int idx1
, int idx2
)
1057 rb_encoding
*enc1
= rb_enc_from_index(idx1
);
1058 rb_encoding
*enc2
= rb_enc_from_index(idx2
);
1060 isstr2
= RB_TYPE_P(str2
, T_STRING
);
1061 if (isstr2
&& RSTRING_LEN(str2
) == 0)
1063 isstr1
= RB_TYPE_P(str1
, T_STRING
);
1064 if (isstr1
&& isstr2
&& RSTRING_LEN(str1
) == 0)
1065 return (rb_enc_asciicompat(enc1
) && rb_enc_str_asciionly_p(str2
)) ? enc1
: enc2
;
1066 if (!rb_enc_asciicompat(enc1
) || !rb_enc_asciicompat(enc2
)) {
1070 /* objects whose encoding is the same of contents */
1071 if (!isstr2
&& idx2
== ENCINDEX_US_ASCII
)
1073 if (!isstr1
&& idx1
== ENCINDEX_US_ASCII
)
1090 cr1
= rb_enc_str_coderange(str1
);
1092 cr2
= rb_enc_str_coderange(str2
);
1094 /* may need to handle ENC_CODERANGE_BROKEN */
1095 if (cr1
== ENC_CODERANGE_7BIT
) return enc2
;
1096 if (cr2
== ENC_CODERANGE_7BIT
) return enc1
;
1098 if (cr2
== ENC_CODERANGE_7BIT
) {
1102 if (cr1
== ENC_CODERANGE_7BIT
)
1109 enc_compatible_str(VALUE str1
, VALUE str2
)
1111 int idx1
= enc_get_index_str(str1
);
1112 int idx2
= enc_get_index_str(str2
);
1114 if (idx1
< 0 || idx2
< 0)
1118 return rb_enc_from_index(idx1
);
1121 return enc_compatible_latter(str1
, str2
, idx1
, idx2
);
1126 rb_enc_compatible(VALUE str1
, VALUE str2
)
1128 int idx1
= rb_enc_get_index(str1
);
1129 int idx2
= rb_enc_get_index(str2
);
1131 if (idx1
< 0 || idx2
< 0)
1135 return rb_enc_from_index(idx1
);
1138 return enc_compatible_latter(str1
, str2
, idx1
, idx2
);
1142 rb_enc_copy(VALUE obj1
, VALUE obj2
)
1144 rb_enc_associate_index(obj1
, rb_enc_get_index(obj2
));
1150 * obj.encoding -> encoding
1152 * Returns the Encoding object that represents the encoding of obj.
1156 rb_obj_encoding(VALUE obj
)
1158 int idx
= rb_enc_get_index(obj
);
1160 rb_raise(rb_eTypeError
, "unknown encoding");
1162 return rb_enc_from_encoding_index(idx
& ENC_INDEX_MASK
);
1166 rb_enc_fast_mbclen(const char *p
, const char *e
, rb_encoding
*enc
)
1168 return ONIGENC_MBC_ENC_LEN(enc
, (UChar
*)p
, (UChar
*)e
);
1172 rb_enc_mbclen(const char *p
, const char *e
, rb_encoding
*enc
)
1174 int n
= ONIGENC_PRECISE_MBC_ENC_LEN(enc
, (UChar
*)p
, (UChar
*)e
);
1175 if (MBCLEN_CHARFOUND_P(n
) && MBCLEN_CHARFOUND_LEN(n
) <= e
-p
)
1176 return MBCLEN_CHARFOUND_LEN(n
);
1178 int min
= rb_enc_mbminlen(enc
);
1179 return min
<= e
-p
? min
: (int)(e
-p
);
1184 rb_enc_precise_mbclen(const char *p
, const char *e
, rb_encoding
*enc
)
1188 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
1189 n
= ONIGENC_PRECISE_MBC_ENC_LEN(enc
, (UChar
*)p
, (UChar
*)e
);
1191 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n
-(int)(e
-p
));
1196 rb_enc_ascget(const char *p
, const char *e
, int *len
, rb_encoding
*enc
)
1202 if (rb_enc_asciicompat(enc
)) {
1203 c
= (unsigned char)*p
;
1209 l
= rb_enc_precise_mbclen(p
, e
, enc
);
1210 if (!MBCLEN_CHARFOUND_P(l
))
1212 c
= rb_enc_mbc_to_codepoint(p
, e
, enc
);
1213 if (!rb_enc_isascii(c
, enc
))
1220 rb_enc_codepoint_len(const char *p
, const char *e
, int *len_p
, rb_encoding
*enc
)
1224 rb_raise(rb_eArgError
, "empty string");
1225 r
= rb_enc_precise_mbclen(p
, e
, enc
);
1226 if (!MBCLEN_CHARFOUND_P(r
)) {
1227 rb_raise(rb_eArgError
, "invalid byte sequence in %s", rb_enc_name(enc
));
1229 if (len_p
) *len_p
= MBCLEN_CHARFOUND_LEN(r
);
1230 return rb_enc_mbc_to_codepoint(p
, e
, enc
);
1234 rb_enc_codelen(int c
, rb_encoding
*enc
)
1236 int n
= ONIGENC_CODE_TO_MBCLEN(enc
,c
);
1238 rb_raise(rb_eArgError
, "invalid codepoint 0x%x in %s", c
, rb_enc_name(enc
));
1244 rb_enc_toupper(int c
, rb_encoding
*enc
)
1246 return (ONIGENC_IS_ASCII_CODE(c
)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c
):(c
));
1250 rb_enc_tolower(int c
, rb_encoding
*enc
)
1252 return (ONIGENC_IS_ASCII_CODE(c
)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c
):(c
));
1257 * enc.inspect -> string
1259 * Returns a string which represents the encoding for programmers.
1261 * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1262 * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1265 enc_inspect(VALUE self
)
1269 if (!is_data_encoding(self
)) {
1272 if (!(enc
= DATA_PTR(self
)) || rb_enc_from_index(rb_enc_to_index(enc
)) != enc
) {
1273 rb_raise(rb_eTypeError
, "broken Encoding");
1276 return rb_enc_sprintf(rb_usascii_encoding(),
1277 "#<%"PRIsVALUE
":%s%s%s>", rb_obj_class(self
),
1278 rb_enc_inspect_name(enc
),
1279 (ENC_DUMMY_P(enc
) ? " (dummy)" : ""),
1280 rb_enc_autoload_p(enc
) ? " (autoload)" : "");
1285 * enc.name -> string
1286 * enc.to_s -> string
1288 * Returns the name of the encoding.
1290 * Encoding::UTF_8.name #=> "UTF-8"
1293 enc_name(VALUE self
)
1295 return rb_fstring_cstr(rb_enc_name((rb_encoding
*)DATA_PTR(self
)));
1299 enc_names_i(st_data_t name
, st_data_t idx
, st_data_t args
)
1301 VALUE
*arg
= (VALUE
*)args
;
1303 if ((int)idx
== (int)arg
[0]) {
1304 VALUE str
= rb_fstring_cstr((char *)name
);
1305 rb_ary_push(arg
[1], str
);
1312 * enc.names -> array
1314 * Returns the list of name and aliases of the encoding.
1316 * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK"]
1319 enc_names(VALUE self
)
1323 args
[0] = (VALUE
)rb_to_encoding_index(self
);
1324 args
[1] = rb_ary_new2(0);
1325 st_foreach(global_enc_table
.names
, enc_names_i
, (st_data_t
)args
);
1331 * Encoding.list -> [enc1, enc2, ...]
1333 * Returns the list of loaded encodings.
1336 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1337 * #<Encoding:ISO-2022-JP (dummy)>]
1339 * Encoding.find("US-ASCII")
1340 * #=> #<Encoding:US-ASCII>
1343 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1344 * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1348 enc_list(VALUE klass
)
1350 VALUE ary
= rb_ary_new2(0);
1351 rb_ary_replace(ary
, rb_encoding_list
);
1357 * Encoding.find(string) -> enc
1359 * Search the encoding with specified <i>name</i>.
1360 * <i>name</i> should be a string.
1362 * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1364 * Names which this method accept are encoding names and aliases
1365 * including following special aliases
1367 * "external":: default external encoding
1368 * "internal":: default internal encoding
1369 * "locale":: locale encoding
1370 * "filesystem":: filesystem encoding
1372 * An ArgumentError is raised when no encoding with <i>name</i>.
1373 * Only <code>Encoding.find("internal")</code> however returns nil
1374 * when no encoding named "internal", in other words, when Ruby has no
1375 * default internal encoding.
1378 enc_find(VALUE klass
, VALUE enc
)
1381 if (is_obj_encoding(enc
))
1383 idx
= str_to_encindex(enc
);
1384 if (idx
== UNSPECIFIED_ENCODING
) return Qnil
;
1385 return rb_enc_from_encoding_index(idx
);
1390 * Encoding.compatible?(obj1, obj2) -> enc or nil
1392 * Checks the compatibility of two objects.
1394 * If the objects are both strings they are compatible when they are
1395 * concatenatable. The encoding of the concatenated string will be returned
1396 * if they are compatible, nil if they are not.
1398 * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1399 * #=> #<Encoding:ISO-8859-1>
1401 * Encoding.compatible?(
1402 * "\xa1".force_encoding("iso-8859-1"),
1403 * "\xa1\xa1".force_encoding("euc-jp"))
1406 * If the objects are non-strings their encodings are compatible when they
1407 * have an encoding and:
1408 * * Either encoding is US-ASCII compatible
1409 * * One of the encodings is a 7-bit encoding
1413 enc_compatible_p(VALUE klass
, VALUE str1
, VALUE str2
)
1417 if (!enc_capable(str1
)) return Qnil
;
1418 if (!enc_capable(str2
)) return Qnil
;
1419 enc
= rb_enc_compatible(str1
, str2
);
1420 if (!enc
) return Qnil
;
1421 return rb_enc_from_encoding(enc
);
1424 NORETURN(static VALUE
enc_s_alloc(VALUE klass
));
1427 enc_s_alloc(VALUE klass
)
1429 rb_undefined_alloc(klass
);
1430 UNREACHABLE_RETURN(Qnil
);
1435 enc_dump(int argc
, VALUE
*argv
, VALUE self
)
1437 rb_check_arity(argc
, 0, 1);
1438 return enc_name(self
);
1443 enc_load(VALUE klass
, VALUE str
)
1450 enc_m_loader(VALUE klass
, VALUE str
)
1452 return enc_find(klass
, str
);
1456 rb_ascii8bit_encoding(void)
1458 return global_enc_ascii
;
1462 rb_ascii8bit_encindex(void)
1464 return ENCINDEX_ASCII_8BIT
;
1468 rb_utf8_encoding(void)
1470 return global_enc_utf_8
;
1474 rb_utf8_encindex(void)
1476 return ENCINDEX_UTF_8
;
1480 rb_usascii_encoding(void)
1482 return global_enc_us_ascii
;
1486 rb_usascii_encindex(void)
1488 return ENCINDEX_US_ASCII
;
1491 int rb_locale_charmap_index(void);
1494 rb_locale_encindex(void)
1496 int idx
= rb_locale_charmap_index();
1498 if (idx
< 0) idx
= ENCINDEX_UTF_8
;
1500 if (enc_registered(&global_enc_table
, "locale") < 0) {
1502 void Init_w32_codepage(void);
1503 Init_w32_codepage();
1505 GLOBAL_ENC_TABLE_ENTER(enc_table
);
1507 enc_alias_internal(enc_table
, "locale", idx
);
1509 GLOBAL_ENC_TABLE_LEAVE();
1516 rb_locale_encoding(void)
1518 return rb_enc_from_index(rb_locale_encindex());
1522 rb_filesystem_encindex(void)
1524 int idx
= enc_registered(&global_enc_table
, "filesystem");
1525 if (idx
< 0) idx
= ENCINDEX_ASCII_8BIT
;
1530 rb_filesystem_encoding(void)
1532 return rb_enc_from_index(rb_filesystem_encindex());
1535 struct default_encoding
{
1536 int index
; /* -2 => not yet set, -1 => nil */
1540 static struct default_encoding default_external
= {0};
1543 enc_set_default_encoding(struct default_encoding
*def
, VALUE encoding
, const char *name
)
1545 int overridden
= FALSE
;
1547 if (def
->index
!= -2)
1551 GLOBAL_ENC_TABLE_ENTER(enc_table
);
1553 if (NIL_P(encoding
)) {
1556 char *name_dup
= strdup(name
);
1558 st_data_t existing_name
= (st_data_t
)name_dup
;
1559 if (st_delete(enc_table
->names
, &existing_name
, NULL
)) {
1560 xfree((void *)existing_name
);
1563 st_insert(enc_table
->names
, (st_data_t
)name_dup
,
1564 (st_data_t
)UNSPECIFIED_ENCODING
);
1567 def
->index
= rb_enc_to_index(rb_to_encoding(encoding
));
1569 enc_alias_internal(enc_table
, name
, def
->index
);
1572 if (def
== &default_external
) {
1573 enc_alias_internal(enc_table
, "filesystem", Init_enc_set_filesystem_encoding());
1576 GLOBAL_ENC_TABLE_LEAVE();
1582 rb_default_external_encoding(void)
1584 if (default_external
.enc
) return default_external
.enc
;
1586 if (default_external
.index
>= 0) {
1587 default_external
.enc
= rb_enc_from_index(default_external
.index
);
1588 return default_external
.enc
;
1591 return rb_locale_encoding();
1596 rb_enc_default_external(void)
1598 return rb_enc_from_encoding(rb_default_external_encoding());
1603 * Encoding.default_external -> enc
1605 * Returns default external encoding.
1607 * The default external encoding is used by default for strings created from
1608 * the following locations:
1611 * * File data read from disk
1614 * * Zlib::GzipReader
1615 * * Zlib::GzipWriter
1619 * While strings created from these locations will have this encoding, the
1620 * encoding may not be valid. Be sure to check String#valid_encoding?.
1622 * File data written to disk will be transcoded to the default external
1623 * encoding when written, if default_internal is not nil.
1625 * The default external encoding is initialized by the -E option.
1626 * If -E isn't set, it is initialized to UTF-8 on Windows and the locale on
1627 * other operating systems.
1630 get_default_external(VALUE klass
)
1632 return rb_enc_default_external();
1636 rb_enc_set_default_external(VALUE encoding
)
1638 if (NIL_P(encoding
)) {
1639 rb_raise(rb_eArgError
, "default external can not be nil");
1641 enc_set_default_encoding(&default_external
, encoding
,
1647 * Encoding.default_external = enc
1649 * Sets default external encoding. You should not set
1650 * Encoding::default_external in ruby code as strings created before changing
1651 * the value may have a different encoding from strings created after the value
1652 * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1653 * the correct default_external.
1655 * See Encoding::default_external for information on how the default external
1659 set_default_external(VALUE klass
, VALUE encoding
)
1661 rb_warning("setting Encoding.default_external");
1662 rb_enc_set_default_external(encoding
);
1666 static struct default_encoding default_internal
= {-2};
1669 rb_default_internal_encoding(void)
1671 if (!default_internal
.enc
&& default_internal
.index
>= 0) {
1672 default_internal
.enc
= rb_enc_from_index(default_internal
.index
);
1674 return default_internal
.enc
; /* can be NULL */
1678 rb_enc_default_internal(void)
1680 /* Note: These functions cope with default_internal not being set */
1681 return rb_enc_from_encoding(rb_default_internal_encoding());
1686 * Encoding.default_internal -> enc
1688 * Returns default internal encoding. Strings will be transcoded to the
1689 * default internal encoding in the following places if the default internal
1690 * encoding is not nil:
1693 * * Etc.sysconfdir and Etc.systmpdir
1694 * * File data read from disk
1695 * * File names from Dir
1697 * * String#inspect and Regexp#inspect
1698 * * Strings returned from Readline
1699 * * Strings returned from SDBM
1702 * * Values in ARGV including $PROGRAM_NAME
1704 * Additionally String#encode and String#encode! use the default internal
1705 * encoding if no encoding is given.
1707 * The script encoding (__ENCODING__), not default_internal, is used as the
1708 * encoding of created strings.
1710 * Encoding::default_internal is initialized with -E option or nil otherwise.
1713 get_default_internal(VALUE klass
)
1715 return rb_enc_default_internal();
1719 rb_enc_set_default_internal(VALUE encoding
)
1721 enc_set_default_encoding(&default_internal
, encoding
,
1727 * Encoding.default_internal = enc or nil
1729 * Sets default internal encoding or removes default internal encoding when
1730 * passed nil. You should not set Encoding::default_internal in ruby code as
1731 * strings created before changing the value may have a different encoding
1732 * from strings created after the change. Instead you should use
1733 * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1735 * See Encoding::default_internal for information on how the default internal
1739 set_default_internal(VALUE klass
, VALUE encoding
)
1741 rb_warning("setting Encoding.default_internal");
1742 rb_enc_set_default_internal(encoding
);
1747 set_encoding_const(const char *name
, rb_encoding
*enc
)
1749 VALUE encoding
= rb_enc_from_encoding(enc
);
1750 char *s
= (char *)name
;
1751 int haslower
= 0, hasupper
= 0, valid
= 0;
1753 if (ISDIGIT(*s
)) return;
1756 while (*++s
&& (ISALNUM(*s
) || *s
== '_')) {
1757 if (ISLOWER(*s
)) haslower
= 1;
1761 if (s
- name
> ENCODING_NAMELEN_MAX
) return;
1763 rb_define_const(rb_cEncoding
, name
, encoding
);
1765 if (!valid
|| haslower
) {
1766 size_t len
= s
- name
;
1767 if (len
> ENCODING_NAMELEN_MAX
) return;
1768 if (!haslower
|| !hasupper
) {
1770 if (ISLOWER(*s
)) haslower
= 1;
1771 if (ISUPPER(*s
)) hasupper
= 1;
1772 } while (*++s
&& (!haslower
|| !hasupper
));
1776 if (len
++ > ENCODING_NAMELEN_MAX
) return;
1777 MEMCPY(s
= ALLOCA_N(char, len
), name
, char, len
);
1780 if (ISLOWER(*s
)) *s
= ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s
);
1782 if (!ISALNUM(*s
)) *s
= '_';
1785 rb_define_const(rb_cEncoding
, name
, encoding
);
1789 for (s
= (char *)name
; *s
; ++s
) {
1790 if (ISLOWER(*s
)) *s
= ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s
);
1792 rb_define_const(rb_cEncoding
, name
, encoding
);
1798 rb_enc_name_list_i(st_data_t name
, st_data_t idx
, st_data_t arg
)
1800 VALUE ary
= (VALUE
)arg
;
1801 VALUE str
= rb_fstring_cstr((char *)name
);
1802 rb_ary_push(ary
, str
);
1808 * Encoding.name_list -> ["enc1", "enc2", ...]
1810 * Returns the list of available encoding names.
1812 * Encoding.name_list
1813 * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1814 * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1816 * "BINARY", "CP932", "eucJP"]
1821 rb_enc_name_list(VALUE klass
)
1823 VALUE ary
= rb_ary_new2(global_enc_table
.names
->num_entries
);
1824 st_foreach(global_enc_table
.names
, rb_enc_name_list_i
, (st_data_t
)ary
);
1829 rb_enc_aliases_enc_i(st_data_t name
, st_data_t orig
, st_data_t arg
)
1831 VALUE
*p
= (VALUE
*)arg
;
1832 VALUE aliases
= p
[0], ary
= p
[1];
1833 int idx
= (int)orig
;
1834 VALUE key
, str
= rb_ary_entry(ary
, idx
);
1837 rb_encoding
*enc
= rb_enc_from_index(idx
);
1839 if (!enc
) return ST_CONTINUE
;
1840 if (STRCASECMP((char*)name
, rb_enc_name(enc
)) == 0) {
1843 str
= rb_fstring_cstr(rb_enc_name(enc
));
1844 rb_ary_store(ary
, idx
, str
);
1846 key
= rb_fstring_cstr((char *)name
);
1847 rb_hash_aset(aliases
, key
, str
);
1853 * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1855 * Returns the hash of available encoding alias and original encoding name.
1858 * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1968"=>"US-ASCII",
1859 * "SJIS"=>"Windows-31J", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1864 rb_enc_aliases(VALUE klass
)
1867 aliases
[0] = rb_hash_new();
1868 aliases
[1] = rb_ary_new();
1870 st_foreach(global_enc_table
.names
, rb_enc_aliases_enc_i
, (st_data_t
)aliases
);
1876 * An \Encoding instance represents a character encoding usable in Ruby.
1877 * It is defined as a constant under the \Encoding namespace.
1878 * It has a name and, optionally, aliases:
1880 * Encoding::US_ASCII.name # => "US-ASCII"
1881 * Encoding::US_ASCII.names # => ["US-ASCII", "ASCII", "ANSI_X3.4-1968", "646"]
1883 * A Ruby method that accepts an encoding as an argument will accept:
1885 * - An \Encoding object.
1886 * - The name of an encoding.
1887 * - An alias for an encoding name.
1889 * These are equivalent:
1891 * 'foo'.encode(Encoding::US_ASCII) # Encoding object.
1892 * 'foo'.encode('US-ASCII') # Encoding name.
1893 * 'foo'.encode('ASCII') # Encoding alias.
1895 * For a full discussion of encodings and their uses,
1896 * see {the Encodings document}[rdoc-ref:encodings.rdoc].
1898 * Encoding::ASCII_8BIT is a special-purpose encoding that is usually used for
1899 * a string of bytes, not a string of characters.
1900 * But as the name indicates, its characters in the ASCII range
1901 * are considered as ASCII characters.
1902 * This is useful when you use other ASCII-compatible encodings.
1912 rb_cEncoding
= rb_define_class("Encoding", rb_cObject
);
1913 rb_define_alloc_func(rb_cEncoding
, enc_s_alloc
);
1914 rb_undef_method(CLASS_OF(rb_cEncoding
), "new");
1915 rb_define_method(rb_cEncoding
, "to_s", enc_name
, 0);
1916 rb_define_method(rb_cEncoding
, "inspect", enc_inspect
, 0);
1917 rb_define_method(rb_cEncoding
, "name", enc_name
, 0);
1918 rb_define_method(rb_cEncoding
, "names", enc_names
, 0);
1919 rb_define_method(rb_cEncoding
, "dummy?", enc_dummy_p
, 0);
1920 rb_define_method(rb_cEncoding
, "ascii_compatible?", enc_ascii_compatible_p
, 0);
1921 rb_define_singleton_method(rb_cEncoding
, "list", enc_list
, 0);
1922 rb_define_singleton_method(rb_cEncoding
, "name_list", rb_enc_name_list
, 0);
1923 rb_define_singleton_method(rb_cEncoding
, "aliases", rb_enc_aliases
, 0);
1924 rb_define_singleton_method(rb_cEncoding
, "find", enc_find
, 1);
1925 rb_define_singleton_method(rb_cEncoding
, "compatible?", enc_compatible_p
, 2);
1927 rb_define_method(rb_cEncoding
, "_dump", enc_dump
, -1);
1928 rb_define_singleton_method(rb_cEncoding
, "_load", enc_load
, 1);
1930 rb_define_singleton_method(rb_cEncoding
, "default_external", get_default_external
, 0);
1931 rb_define_singleton_method(rb_cEncoding
, "default_external=", set_default_external
, 1);
1932 rb_define_singleton_method(rb_cEncoding
, "default_internal", get_default_internal
, 0);
1933 rb_define_singleton_method(rb_cEncoding
, "default_internal=", set_default_internal
, 1);
1934 rb_define_singleton_method(rb_cEncoding
, "locale_charmap", rb_locale_charmap
, 0); /* in localeinit.c */
1936 struct enc_table
*enc_table
= &global_enc_table
;
1938 list
= rb_encoding_list
= rb_ary_new2(ENCODING_LIST_CAPA
);
1939 RBASIC_CLEAR_CLASS(list
);
1940 rb_vm_register_global_object(list
);
1942 for (i
= 0; i
< enc_table
->count
; ++i
) {
1943 rb_ary_push(list
, enc_new(enc_table
->list
[i
].enc
));
1946 rb_marshal_define_compat(rb_cEncoding
, Qnil
, 0, enc_m_loader
);
1950 Init_encodings(void)
1952 rb_enc_init(&global_enc_table
);
1955 /* locale insensitive ctype functions */
1958 rb_enc_foreach_name(int (*func
)(st_data_t name
, st_data_t idx
, st_data_t arg
), st_data_t arg
)
1960 st_foreach(global_enc_table
.names
, func
, arg
);