1 // $Id: tinygettext.cpp 4776 2007-02-02 16:39:01Z ravualhemio $
4 // Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 #include <sys/types.h>
29 #include "console/console.hpp"
30 #include "findlocale.hpp"
31 #include "tinygettext.hpp"
32 #include "physfs/physfs.hpp"
34 //#define TRANSLATION_DEBUG
35 #define log_warning warning
37 namespace TinyGetText
{
39 /** Convert \a which is in \a from_charset to \a to_charset and return it */
40 std::string
convert(const std::string
& text
,
41 const std::string
& from_charset
,
42 const std::string
& to_charset
)
44 if (from_charset
== to_charset
)
47 iconv_t cd
= iconv_open(to_charset
.c_str(), from_charset
.c_str());
49 size_t in_len
= text
.length();
50 size_t out_len
= text
.length()*3; // FIXME: cross fingers that this is enough
52 char* out_orig
= new char[out_len
];
53 char* in_orig
= new char[in_len
+1];
54 strcpy(in_orig
, text
.c_str());
57 ICONV_CONST
char* in
= in_orig
;
58 size_t out_len_temp
= out_len
; // iconv is counting down the bytes it has
59 // written from this...
61 size_t retval
= iconv(cd
, &in
, &in_len
, &out
, &out_len_temp
);
62 out_len
-= out_len_temp
; // see above
63 if (retval
== (size_t) -1)
65 log_warning
<< strerror(errno
) << std::endl
;
66 log_warning
<< "Error: conversion from " << from_charset
<< " to " << to_charset
<< " went wrong: " << retval
<< std::endl
;
71 std::string
ret(out_orig
, out_len
);
77 bool has_suffix(const std::string
& lhs
, const std::string rhs
)
79 if (lhs
.length() < rhs
.length())
82 return lhs
.compare(lhs
.length() - rhs
.length(), rhs
.length(), rhs
) == 0;
85 bool has_prefix(const std::string
& lhs
, const std::string rhs
)
87 if (lhs
.length() < rhs
.length())
90 return lhs
.compare(0, rhs
.length(), rhs
) == 0;
93 int plural1(int ) { return 0; }
94 int plural2_1(int n
) { return (n
!= 1); }
95 int plural2_2(int n
) { return (n
> 1); }
96 int plural3_lv(int n
) { return (n
%10==1 && n
%100!=11 ? 0 : n
!= 0 ? 1 : 2); }
97 int plural3_ga(int n
) { return n
==1 ? 0 : n
==2 ? 1 : 2; }
98 int plural3_lt(int n
) { return (n
%10==1 && n
%100!=11 ? 0 : n
%10>=2 && (n
%100<10 || n
%100>=20) ? 1 : 2); }
99 int plural3_1(int n
) { return (n
%10==1 && n
%100!=11 ? 0 : n
%10>=2 && n
%10<=4 && (n
%100<10 || n
%100>=20) ? 1 : 2); }
100 int plural3_sk(int n
) { return (n
==1) ? 0 : (n
>=2 && n
<=4) ? 1 : 2; }
101 int plural3_pl(int n
) { return (n
==1 ? 0 : n
%10>=2 && n
%10<=4 && (n
%100<10 || n
%100>=20) ? 1 : 2); }
102 int plural3_sl(int n
) { return (n
%100==1 ? 0 : n
%100==2 ? 1 : n
%100==3 || n
%100==4 ? 2 : 3); }
104 /** Language Definitions */
106 LanguageDef
lang_hu("hu", "Hungarian", 1, plural1
); // "nplurals=1; plural=0;"
107 LanguageDef
lang_ja("ja", "Japanese", 1, plural1
); // "nplurals=1; plural=0;"
108 LanguageDef
lang_ko("ko", "Korean", 1, plural1
); // "nplurals=1; plural=0;"
109 LanguageDef
lang_tr("tr", "Turkish", 1, plural1
); // "nplurals=1; plural=0;"
110 LanguageDef
lang_da("da", "Danish", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
111 LanguageDef
lang_nl("nl", "Dutch", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
112 LanguageDef
lang_en("en", "English", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
113 LanguageDef
lang_fo("fo", "Faroese", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
114 LanguageDef
lang_de("de", "German", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
115 LanguageDef
lang_nb("nb", "Norwegian Bokmal", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
116 LanguageDef
lang_no("no", "Norwegian", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
117 LanguageDef
lang_nn("nn", "Norwegian Nynorsk", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
118 LanguageDef
lang_sv("sv", "Swedish", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
119 LanguageDef
lang_et("et", "Estonian", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
120 LanguageDef
lang_fi("fi", "Finnish", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
121 LanguageDef
lang_el("el", "Greek", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
122 LanguageDef
lang_he("he", "Hebrew", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
123 LanguageDef
lang_it("it", "Italian", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
124 LanguageDef
lang_pt("pt", "Portuguese", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
125 LanguageDef
lang_es("es", "Spanish", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
126 LanguageDef
lang_eo("eo", "Esperanto", 2, plural2_1
); // "nplurals=2; plural=(n != 1);"
127 LanguageDef
lang_fr("fr", "French", 2, plural2_2
); // "nplurals=2; plural=(n > 1);"
128 LanguageDef
lang_pt_BR("pt_BR", "Brazilian", 2, plural2_2
); // "nplurals=2; plural=(n > 1);"
129 LanguageDef
lang_lv("lv", "Latvian", 3, plural3_lv
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2);"
130 LanguageDef
lang_ga("ga", "Irish", 3, plural3_ga
); // "nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;"
131 LanguageDef
lang_lt("lt", "Lithuanian", 3, plural3_lt
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2);"
132 LanguageDef
lang_hr("hr", "Croatian", 3, plural3_1
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
133 LanguageDef
lang_cs("cs", "Czech", 3, plural3_1
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
134 LanguageDef
lang_ru("ru", "Russian", 3, plural3_1
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
135 LanguageDef
lang_uk("uk", "Ukrainian", 3, plural3_1
); // "nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);"
136 LanguageDef
lang_sk("sk", "Slovak", 3, plural3_sk
); // "nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;"
137 LanguageDef
lang_pl("pl", "Polish", 3, plural3_pl
); // "nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);
138 LanguageDef
lang_sl("sl", "Slovenian", 3, plural3_sl
); // "nplurals=4; plural=(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3);"
142 get_language_def(const std::string
& name
)
144 if (name
== "hu") return lang_hu
;
145 else if (name
== "ja") return lang_ja
;
146 else if (name
== "ko") return lang_ko
;
147 else if (name
== "tr") return lang_tr
;
148 else if (name
== "da") return lang_da
;
149 else if (name
== "nl") return lang_nl
;
150 else if (name
== "en") return lang_en
;
151 else if (name
== "fo") return lang_fo
;
152 else if (name
== "de") return lang_de
;
153 else if (name
== "nb") return lang_nb
;
154 else if (name
== "no") return lang_no
;
155 else if (name
== "nn") return lang_nn
;
156 else if (name
== "sv") return lang_sv
;
157 else if (name
== "et") return lang_et
;
158 else if (name
== "fi") return lang_fi
;
159 else if (name
== "el") return lang_el
;
160 else if (name
== "he") return lang_he
;
161 else if (name
== "it") return lang_it
;
162 else if (name
== "pt") return lang_pt
;
163 else if (name
== "es") return lang_es
;
164 else if (name
== "eo") return lang_eo
;
165 else if (name
== "fr") return lang_fr
;
166 else if (name
== "pt_BR") return lang_pt_BR
;
167 else if (name
== "lv") return lang_lv
;
168 else if (name
== "ga") return lang_ga
;
169 else if (name
== "lt") return lang_lt
;
170 else if (name
== "hr") return lang_hr
;
171 else if (name
== "cs") return lang_cs
;
172 else if (name
== "ru") return lang_ru
;
173 else if (name
== "uk") return lang_uk
;
174 else if (name
== "sk") return lang_sk
;
175 else if (name
== "pl") return lang_pl
;
176 else if (name
== "sl") return lang_sl
;
180 DictionaryManager::DictionaryManager()
181 : current_dict(&empty_dict
)
183 parseLocaleAliases();
184 // Environment variable DBW_LANG overrides language settings.
185 const char* lang
= getenv( "DBW_LANG" );
187 set_language( lang
);
190 // use findlocale to setup language
192 FL_FindLocale( &locale
, FL_MESSAGES
);
194 if (locale
->country
) {
195 set_language( std::string(locale
->lang
)+"_"+std::string(locale
->country
) );
197 set_language( std::string(locale
->lang
) );
200 FL_FreeLocale( &locale
);
204 DictionaryManager::parseLocaleAliases()
206 // try to parse language alias list
207 std::ifstream
in("/usr/share/locale/locale.alias");
210 while(in
.good() && !in
.eof()) {
211 while(isspace(c
) && !in
.eof())
214 if(c
== '#') { // skip comments
215 while(c
!= '\n' && !in
.eof())
221 while(!isspace(c
) && !in
.eof()) {
225 while(isspace(c
) && !in
.eof())
227 std::string language
;
228 while(!isspace(c
) && !in
.eof()) {
235 set_language_alias(alias
, language
);
240 DictionaryManager::get_dictionary(const std::string
& spec
)
243 //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
245 std::string lang
= get_language_from_spec(spec
);
247 //log_debug << "...normalized as \"" << lang << "\"" << std::endl;
249 Dictionaries::iterator i
= dictionaries
.find(get_language_from_spec(lang
));
250 if (i
!= dictionaries
.end())
254 else // Dictionary for languages lang isn't loaded, so we load it
256 //log_debug << "get_dictionary: " << lang << std::endl;
257 Dictionary
& dict
= dictionaries
[lang
];
259 dict
.set_language(get_language_def(lang
));
261 dict
.set_charset(charset
);
263 for (SearchPath::iterator p
= search_path
.begin(); p
!= search_path
.end(); ++p
)
265 char** files
= PHYSFS_enumerateFiles(p
->c_str());
268 log_warning
<< "Error: enumerateFiles() failed on " << *p
<< std::endl
;
272 for(const char* const* filename
= files
;
273 *filename
!= 0; filename
++) {
275 // check if filename matches requested language
276 std::string fname
= std::string(*filename
);
277 std::string load_from_file
= "";
278 if(fname
== lang
+ ".po") {
279 load_from_file
= fname
;
281 std::string::size_type s
= lang
.find("_");
282 if(s
!= std::string::npos
) {
283 std::string lang_short
= std::string(lang
, 0, s
);
284 if (fname
== lang_short
+ ".po") {
285 load_from_file
= lang_short
;
290 // if it matched, load dictionary
291 if (load_from_file
!= "") {
292 //log_debug << "Loading dictionary for language \"" << lang << "\" from \"" << filename << "\"" << std::endl;
293 std::string pofile
= *p
+ "/" + *filename
;
295 PhysFS::IStream
in(pofile
);
296 read_po_file(dict
, in
);
297 } catch(std::exception
& e
) {
298 log_warning
<< "Error: Failure file opening: " << pofile
<< std::endl
;
299 log_warning
<< e
.what() << "" << std::endl
;
304 PHYSFS_freeList(files
);
312 std::set
<std::string
>
313 DictionaryManager::get_languages()
315 std::set
<std::string
> languages
;
317 for (SearchPath::iterator p
= search_path
.begin(); p
!= search_path
.end(); ++p
)
319 char** files
= PHYSFS_enumerateFiles(p
->c_str());
322 log_warning
<< "Error: opendir() failed on " << *p
<< std::endl
;
326 for(const char* const* file
= files
; *file
!= 0; file
++) {
327 if(has_suffix(*file
, ".po")) {
328 std::string filename
= *file
;
329 languages
.insert(filename
.substr(0, filename
.length()-3));
332 PHYSFS_freeList(files
);
339 DictionaryManager::set_language(const std::string
& lang
)
341 //log_debug << "set_language \"" << lang << "\"" << std::endl;
342 language
= get_language_from_spec(lang
);
343 //log_debug << "==> \"" << language << "\"" << std::endl;
344 current_dict
= & (get_dictionary(language
));
348 DictionaryManager::get_language() const
354 DictionaryManager::set_charset(const std::string
& charset
)
356 dictionaries
.clear(); // changing charset invalidates cache
357 this->charset
= charset
;
358 set_language(language
);
362 DictionaryManager::set_language_alias(const std::string
& alias
,
363 const std::string
& language
)
365 language_aliases
.insert(std::make_pair(alias
, language
));
369 DictionaryManager::get_language_from_spec(const std::string
& spec
)
371 std::string lang
= spec
;
372 Aliases::iterator i
= language_aliases
.find(lang
);
373 if(i
!= language_aliases
.end()) {
377 std::string::size_type s
= lang
.find(".");
378 if(s
!= std::string::npos
) {
379 lang
= std::string(lang
, 0, s
);
383 if(s
== std::string::npos
) {
384 std::string lang_big
= lang
;
385 std::transform (lang_big
.begin(), lang_big
.end(), lang_big
.begin(), toupper
);
386 lang
+= "_" + lang_big
;
394 DictionaryManager::add_directory(const std::string
& pathname
)
396 dictionaries
.clear(); // adding directories invalidates cache
397 search_path
.push_back(pathname
);
398 set_language(language
);
401 //---------------------------------------------------------------------------
403 Dictionary::Dictionary(const LanguageDef
& language_
, const std::string
& charset_
)
404 : language(language_
), charset(charset_
)
408 Dictionary::Dictionary()
414 Dictionary::get_charset() const
420 Dictionary::set_charset(const std::string
& charset_
)
426 Dictionary::set_language(const LanguageDef
& lang
)
432 Dictionary::translate(const std::string
& msgid
, const std::string
& msgid2
, int num
)
434 PluralEntries::iterator i
= plural_entries
.find(msgid
);
435 std::map
<int, std::string
>& msgstrs
= i
->second
;
437 if (i
!= plural_entries
.end() && !msgstrs
.empty())
439 int g
= language
.plural(num
);
440 std::map
<int, std::string
>::iterator j
= msgstrs
.find(g
);
441 if (j
!= msgstrs
.end())
447 // Return the first translation, in case we can't translate the specific number
448 return msgstrs
.begin()->second
;
453 #ifdef TRANSLATION_DEBUG
454 log_warning
<< "Couldn't translate: " << msgid
<< std::endl
;
455 log_warning
<< "Candidates: " << std::endl
;
456 for (PluralEntries::iterator i
= plural_entries
.begin(); i
!= plural_entries
.end(); ++i
)
457 log_debug
<< "'" << i
->first
<< "'" << std::endl
;
460 if (plural2_1(num
)) // default to english rules
468 Dictionary::translate(const char* msgid
)
470 Entries::iterator i
= entries
.find(msgid
);
471 if (i
!= entries
.end() && !i
->second
.empty())
473 return i
->second
.c_str();
477 #ifdef TRANSLATION_DBEUG
478 log_warning
<< "Couldn't translate: " << msgid
<< std::endl
;
485 Dictionary::translate(const std::string
& msgid
)
487 Entries::iterator i
= entries
.find(msgid
);
488 if (i
!= entries
.end() && !i
->second
.empty())
494 #ifdef TRANSLATION_DBEUG
495 log_warning
<< "Couldn't translate: " << msgid
<< std::endl
;
502 Dictionary::add_translation(const std::string
& msgid
, const std::string
& ,
503 const std::map
<int, std::string
>& msgstrs
)
505 // Do we need msgid2 for anything? its after all supplied to the
506 // translate call, so we just throw it away
507 plural_entries
[msgid
] = msgstrs
;
511 Dictionary::add_translation(const std::string
& msgid
, const std::string
& msgstr
)
513 entries
[msgid
] = msgstr
;
527 std::string from_charset
;
528 std::string to_charset
;
530 std::string current_msgid
;
531 std::string current_msgid_plural
;
532 std::map
<int, std::string
> msgstr_plural
;
536 enum { WANT_MSGID
, WANT_MSGSTR
, WANT_MSGSTR_PLURAL
, WANT_MSGID_PLURAL
} state
;
539 POFileReader(std::istream
& in
, Dictionary
& dict_
)
545 if(c
== (char) 0xef) { // skip UTF-8 intro that some texteditors produce
554 void parse_header(const std::string
& header
)
556 // Seperate the header in lines
557 typedef std::vector
<std::string
> Lines
;
560 std::string::size_type start
= 0;
561 for(std::string::size_type i
= 0; i
< header
.length(); ++i
)
563 if (header
[i
] == '\n')
565 lines
.push_back(header
.substr(start
, i
- start
));
570 for(Lines::iterator i
= lines
.begin(); i
!= lines
.end(); ++i
)
572 if (has_prefix(*i
, "Content-Type: text/plain; charset=")) {
573 from_charset
= i
->substr(strlen("Content-Type: text/plain; charset="));
577 if (from_charset
.empty() || from_charset
== "CHARSET")
579 log_warning
<< "Error: Charset not specified for .po, fallback to ISO-8859-1" << std::endl
;
580 from_charset
= "ISO-8859-1";
583 to_charset
= dict
.get_charset();
584 if (to_charset
.empty())
585 { // No charset requested from the dict, use utf-8
586 to_charset
= "utf-8";
587 dict
.set_charset(from_charset
);
591 void add_token(const Token
& token
)
596 if (token
.keyword
== "msgid")
598 current_msgid
= token
.content
;
599 state
= WANT_MSGID_PLURAL
;
601 else if (token
.keyword
.empty())
603 debug("tinigettext", 1) << "Got EOF, everything looks ok." << std::endl
;
607 log_warning
<< "tinygettext: expected 'msgid' keyword, got " << token
.keyword
<< " at line " << line_num
<< std::endl
;
611 case WANT_MSGID_PLURAL
:
612 if (token
.keyword
== "msgid_plural")
614 current_msgid_plural
= token
.content
;
615 state
= WANT_MSGSTR_PLURAL
;
625 if (token
.keyword
== "msgstr")
627 if (current_msgid
== "")
628 { // .po Header is hidden in the msgid with the empty string
629 parse_header(token
.content
);
633 dict
.add_translation(current_msgid
, convert(token
.content
, from_charset
, to_charset
));
639 log_warning
<< "tinygettext: expected 'msgstr' keyword, got " << token
.keyword
<< " at line " << line_num
<< std::endl
;
643 case WANT_MSGSTR_PLURAL
:
644 if (has_prefix(token
.keyword
, "msgstr["))
647 if (sscanf(token
.keyword
.c_str(), "msgstr[%d]", &num
) != 1)
649 log_warning
<< "Error: Couldn't parse: " << token
.keyword
<< std::endl
;
653 msgstr_plural
[num
] = convert(token
.content
, from_charset
, to_charset
);
658 dict
.add_translation(current_msgid
, current_msgid_plural
, msgstr_plural
);
667 inline int getchar(std::istream
& in
)
675 void tokenize_po(std::istream
& in
)
677 enum State
{ READ_KEYWORD
,
679 READ_CONTENT_IN_STRING
,
682 State state
= READ_KEYWORD
;
686 while((c
= getchar(in
)) != EOF
)
688 //log_debug << "Lexing char: " << char(c) << " " << state << std::endl;
694 state
= SKIP_COMMENT
;
703 } while((c
= getchar(in
)) != EOF
&& !isspace(c
));
706 state
= READ_CONTENT
;
711 while((c
= getchar(in
)) != EOF
)
714 // Found start of content
715 state
= READ_CONTENT_IN_STRING
;
717 } else if (isspace(c
)) {
719 } else { // Read something that may be a keyword
721 state
= READ_KEYWORD
;
728 case READ_CONTENT_IN_STRING
:
733 if (c
== 'n') token
.content
+= '\n';
734 else if (c
== 't') token
.content
+= '\t';
735 else if (c
== 'r') token
.content
+= '\r';
736 else if (c
== '"') token
.content
+= '"';
737 else if (c
== '\\') token
.content
+= '\\';
740 log_warning
<< "Unhandled escape character: " << char(c
) << std::endl
;
745 log_warning
<< "Unterminated string" << std::endl
;
747 } else if (c
== '"') { // Content string is terminated
748 state
= READ_CONTENT
;
756 state
= READ_KEYWORD
;
764 void read_po_file(Dictionary
& dict_
, std::istream
& in
)
766 POFileReader
reader(in
, dict_
);
769 } // namespace TinyGetText