1 #ifndef _ROMANIZATION_HXX
2 #define _ROMANIZATION_HXX
8 /*! \file romanization.hxx
9 * \todo refactor everything
10 * \note http://www.unicode.org/charts/unihangridindex.html
11 * \note <b>ic_cjk*()</b> functions: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs#CJK_Unified_Ideographs
19 * Class responsible for converting romaji to hiragana/katakana.
20 * Uses wapuro- style romanization (currently mostly Hepburn).
21 * \todo kunrei-shiki and nihon-shiki
29 //! One element in romanization table.
31 vector
<string
> romaji
;
35 inline static bool is_cjk_base (unsigned int ucs
) {return ucs
>=0x4e00 && ucs
<=0x9fff;}
36 inline static bool is_cjk_a (unsigned int ucs
) {return ucs
>=0x3400 && ucs
<=0x4dbf;}
37 inline static bool is_cjk_b (unsigned int ucs
) {return ucs
>=0x20000 && ucs
<=0x2a6df;}
38 inline static bool is_cjk_c (unsigned int ucs
) {return ucs
>=0x2a700 && ucs
<=0x2b73f;}
39 inline static bool is_cjk_d (unsigned int ucs
) {return ucs
>=0x2b740 && ucs
<=0x2b81f;}
40 // range U+1B000 - U+1B0FF is both hiragana ans katakana
41 inline static bool is_hiragana (unsigned int ucs
) {return ucs
>=0x3040 && ucs
<=0x309f;}
42 inline static bool is_katakana (unsigned int ucs
) {return ucs
>=0x30a0 && ucs
<=0xffef;}
43 inline static bool is_kana (unsigned int ucs
) {return is_hiragana(ucs
) || is_katakana(ucs
);}
44 inline static bool is_cjk (unsigned int ucs
)
45 {return is_cjk_base(ucs
) || is_cjk_a(ucs
) || is_cjk_b(ucs
) || is_cjk_c(ucs
) || is_cjk_d(ucs
);}
48 string
romaji_to_kana ( const string
&rmj
, bool to_hiragana
= true );
49 inline string
romaji_to_kana ( const char *rmj
, bool to_hiragana
= true )
50 { return romaji_to_kana(string(rmj
),to_hiragana
); };
52 inline string
romaji_to_hiragana ( const char *s
)
53 { return romaji_to_kana(string(s
),true); };
54 inline string
romaji_to_hiragana ( const string
&s
)
55 { return romaji_to_kana(s
,true); };
57 inline string
romaji_to_katakana ( const char *s
)
58 { return romaji_to_kana(string(s
),false); };
59 inline string
romaji_to_katakana ( const string
&s
)
60 { return romaji_to_kana(s
,false); };
62 inline string
hiragana_to_katakana ( const string
&s
);
64 string
kana_to_romaji ( const string
&k
);
66 //! Returns true if at least one letter in <i>s</i> is a kanji.
67 static inline bool contains_kanji ( const char *s
)
69 for( unsigned int code
: utils::utf8_to_ints(s
) )
75 //! Returns true if all letters in <i>s</i> are kanji.
76 static inline bool is_kanji ( const char *s
)
78 for ( unsigned int code
: utils::utf8_to_ints(s
) )
84 static inline bool is_kanji ( int code
) { return is_cjk(code
);};
87 vector
<RmnElement
> table_
;
88 size_t current_romaji_
;
96 #endif // _ROMANIZATION_HXX