OPT added to MAkefile; KanjiView::Cell constructor: fg,bg changed to int (Fl_Color...
[aoi.git] / src / romanization.hxx
blob74890ca86a6168e891bd2af0603cd69251fef130
1 #ifndef _ROMANIZATION_HXX
2 #define _ROMANIZATION_HXX
4 #include <string>
5 #include <vector>
6 #include "utils.hxx"
8 /*! \file romanization.hxx
9 * \todo refactor everything
10 * \note http://www.unicode.org/charts/unihangridindex.html
11 * \note <b>ic_cjk*()</b> functions: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs#CJK_Unified_Ideographs
15 using std::string;
16 using std::vector;
18 /*!
19 * Class responsible for converting romaji to hiragana/katakana.
20 * Uses wapuro- style romanization (currently mostly Hepburn).
21 * \todo kunrei-shiki and nihon-shiki
23 class Romanization
25 public:
26 Romanization();
27 ~Romanization(){};
29 //! One element in romanization table.
30 struct RmnElement {
31 vector<string> romaji;
32 string hiragana;
33 string katakana;
35 inline static bool is_cjk_base (unsigned int ucs) {return ucs>=0x4e00 && ucs<=0x9fff;}
36 inline static bool is_cjk_a (unsigned int ucs) {return ucs>=0x3400 && ucs<=0x4dbf;}
37 inline static bool is_cjk_b (unsigned int ucs) {return ucs>=0x20000 && ucs<=0x2a6df;}
38 inline static bool is_cjk_c (unsigned int ucs) {return ucs>=0x2a700 && ucs<=0x2b73f;}
39 inline static bool is_cjk_d (unsigned int ucs) {return ucs>=0x2b740 && ucs<=0x2b81f;}
40 // range U+1B000 - U+1B0FF is both hiragana ans katakana
41 inline static bool is_hiragana (unsigned int ucs) {return ucs>=0x3040 && ucs<=0x309f;}
42 inline static bool is_katakana (unsigned int ucs) {return ucs>=0x30a0 && ucs<=0xffef;}
43 inline static bool is_kana (unsigned int ucs) {return is_hiragana(ucs) || is_katakana(ucs);}
44 inline static bool is_cjk (unsigned int ucs)
45 {return is_cjk_base(ucs) || is_cjk_a(ucs) || is_cjk_b(ucs) || is_cjk_c(ucs) || is_cjk_d(ucs);}
48 string romaji_to_kana ( const string &rmj, bool to_hiragana = true );
49 inline string romaji_to_kana ( const char *rmj, bool to_hiragana = true )
50 { return romaji_to_kana(string(rmj),to_hiragana); };
52 inline string romaji_to_hiragana ( const char *s )
53 { return romaji_to_kana(string(s),true); };
54 inline string romaji_to_hiragana ( const string &s )
55 { return romaji_to_kana(s,true); };
57 inline string romaji_to_katakana ( const char *s )
58 { return romaji_to_kana(string(s),false); };
59 inline string romaji_to_katakana ( const string &s )
60 { return romaji_to_kana(s,false); };
62 inline string hiragana_to_katakana ( const string &s );
64 string kana_to_romaji ( const string &k );
66 //! Returns true if at least one letter in <i>s</i> is a kanji.
67 static inline bool contains_kanji ( const char *s )
69 for( unsigned int code: utils::utf8_to_ints(s) )
70 if ( is_cjk(code) )
71 return true;
72 return false;
75 //! Returns true if all letters in <i>s</i> are kanji.
76 static inline bool is_kanji ( const char *s )
78 for ( unsigned int code: utils::utf8_to_ints(s) )
79 if ( !is_cjk(code) )
80 return false;
81 return true;
84 static inline bool is_kanji ( int code ) { return is_cjk(code);};
86 private:
87 vector<RmnElement> table_;
88 size_t current_romaji_;
89 string tsu_h_;
90 string tsu_k_;
91 vector<string> tsus_;
96 #endif // _ROMANIZATION_HXX