2 Copyright 2013 Karel Matas
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "parsers.hxx"
21 #include "sqlite3.hxx"
27 using aoi::ElementKanji
;
28 using aoi::ElementReading
;
29 using aoi::ElementSense
;
33 using utils::to_string
;
35 const char *TEMP_JMDICT
= "gztemp.jmdict";
36 const char *TEMP_KANJIDIC
= "gztemp.kanjidic";
38 // http://seriss.com/people/erco/fltk/
42 // parser by mel oescapovat pripadny vyskys SEPARATOR_SQL
43 // ( | se vyskytuje prave jednou)
44 // - nejlepe jeste pred parsovanim (po nacteni XML do pameti)
48 * Parses JMDict. File may be GZipped.
49 * \exception utils::ParsingError
50 * \param fname path to JMDict file
52 void parse_jmdict ( const char *fname
)
54 printf("Loading JMdict file '%s'.\n", fname
);
56 ss
<< "BEGIN TRANSACTION;\n";
60 printf("Decompressing file: %s -> %s\n", fname
, TEMP_JMDICT
);
61 utils::gzip_decompress_file( fname
, TEMP_JMDICT
);
62 parsers::JmdictParser
jmp(TEMP_JMDICT
);
63 const char *SEP
= aoi::SEPARATOR_SQL
;
66 for ( auto &mi
: aoi_config::db_tables
.at("main") ){
67 if ( strncmp( mi
.first
, "d_",2 ) !=0 && strcmp( mi
.first
, "aoi" )!=0 )
69 ss
<< "DROP TABLE IF EXISTS " << mi
.first
<< ";\n"
70 << "CREATE TABLE " << mi
.first
<< " ( ";
71 for ( size_t i
=0; i
<mi
.second
.size(); i
++ )
72 ss
<< mi
.second
[i
].name
<< " " << mi
.second
[i
].type
73 << ((i
==mi
.second
.size()-1) ? "":",");
78 ss
<< "INSERT INTO aoi (key,val) VALUES ('jmdict_version', '"
79 << jmp
.get_version() << "');\n";
80 printf("jmdict_version: %s\n", jmp
.get_version().c_str());
83 for ( std::pair
<string
,string
> elt
: jmp
.get_entities() ){
84 ss
<< "INSERT INTO d_entities (abbr,desc) VALUES ('" << SQLite3::escape(elt
.first
)
85 << "','" << SQLite3::escape(elt
.second
) << "');\n";
89 DicWord entry
= jmp
.get_entry();
90 while ( entry
.did() != -1 ) {
91 for ( ElementReading
&rele
: entry
.r_ele() )
92 ss
<< rele
.sql(entry
.did(),SEP
);
93 for ( ElementKanji
&kele
: entry
.k_ele() )
94 ss
<< kele
.sql(entry
.did(),SEP
);
95 for ( ElementSense
&sele
: entry
.s_ele() )
96 ss
<< sele
.sql(entry
.did(),SEP
);
99 entry
= jmp
.get_entry();
101 printf("%d entries processed.\n", n_entries
);
103 catch ( utils::ParsingError
&e
){
104 std::string msg
= "App::parse_jmdict(): ParsingError: ";
106 printf("parse_jmdict(): ParsingError: %s\n", e
.what() );
110 ss
<< "END TRANSACTION;\n";
111 printf("Writing file 'script.jmdict.sql'...\n");
112 std::ofstream
f ("script.jmdict.sql");
121 * Parses kanjidic2. Works in the same way as parse_jmdict().
124 void parse_kanjidic ( const char *fname
)
126 printf("Loading kanjidic file: %s\n", fname
);
127 const char *SEP
= aoi::SEPARATOR_SQL
;
131 printf("Decompressing file: %s -> %s\n", fname
, TEMP_KANJIDIC
);
132 utils::gzip_decompress_file( fname
, TEMP_KANJIDIC
);
133 parsers::KanjidicParser
p(TEMP_KANJIDIC
);
134 auto kanji
= p
.get_entry();
136 std::stringstream ss
;
137 ss
<< "BEGIN TRANSACTION;\n";
140 for ( auto &mi
: aoi_config::db_tables
.at("main") ){
141 if ( strncmp( mi
.first
, "k_",2 ) !=0 )
143 ss
<< "DROP TABLE IF EXISTS " << mi
.first
<< ";\n"
144 << "CREATE TABLE " << mi
.first
<< " ( ";
145 for ( size_t i
=0; i
<mi
.second
.size(); i
++ )
146 ss
<< mi
.second
[i
].name
<< " " << mi
.second
[i
].type
147 << ((i
==mi
.second
.size()-1) ? "":",");
152 ss
<< "REPLACE INTO aoi (key,val) VALUES ('kanjidic_version','"
153 << p
.get_version() << "');\n";
155 while ( kanji
.kanji() != "" ){
157 ss
<< "INSERT INTO k_kanji "
158 << "(kanji,ucs,onyomi,kunyomi,meaning,nanori,flags,jlpt,grade,freq,strokes,"
159 << "rad_classic,rad_nelson,components)"
161 << kanji
.kanji() << "','"
162 << kanji
.ucs() << "','"
163 << to_string(kanji
.onyomi(),SEP
) << "','"
164 << to_string(kanji
.kunyomi(),SEP
) << "','"
165 << SQLite3::escape(to_string(kanji
.meaning(),SEP
)) << "','"
166 << to_string(kanji
.nanori(),SEP
) << "','"
167 << to_string(kanji
.flags(),SEP
) << "',"
168 << kanji
.jlpt() << ","
169 << kanji
.grade() << ","
170 << kanji
.freq() << ","
171 << kanji
.strokes() << ","
172 << kanji
.rad_classic() << ","
173 << kanji
.rad_nelson() << ","
176 for ( aoi::SKIP
&s
: kanji
.skip() ) {
177 ss
<< "INSERT INTO k_skip (kanji,skip1,skip2,skip3,misclass) VALUES('"
178 << kanji
.kanji() << "'," << s
.s1
<< "," << s
.s2
<< "," << s
.s3
179 << ",'" << s
.misclass
<< "');\n";
181 kanji
= p
.get_entry();
183 ss
<< "END TRANSACTION\n;";
185 printf("Writing file 'script.kanjidic.sql'...\n");
186 std::ofstream
f ("script.kanjidic.sql");
190 catch ( utils::ParsingError
&e
){
191 printf("parse_kanjidic(): ParsingError: %s\n", e
.what());
194 remove( TEMP_KANJIDIC
);
201 printf("USAGE: aoi [OPTIONS]\n");
202 printf("OPTIONS:\n");
203 printf(" -geometry W*H+X+Y \n");
204 printf(" -scheme none|GTK+|plastic \n");
205 printf(" -config config string\n");
206 printf(" -parse jmdict|kanjidic parse either jmdict or kanjidic\n");
210 //////////////////////////////////////////////////////////////////////////
213 int main ( int argc
, char **argv
)
217 char **fltk_argv
= &argv
[0];
219 // COmmandline options
221 for ( int i
=1; i
< argc
; i
++ ){
223 if ( !strcmp( argv
[i
], "-config") ){
224 string opts
= argv
[++i
];
225 if ( opts
== "help" ){
226 for ( auto mi
: App::get()->get_config_map() )
227 printf("%s [%s]\n %s\n\n",
228 mi
.first
.c_str(), mi
.second
.val
.c_str(), mi
.second
.desc
.c_str() );
231 for ( string
&s
: utils::split_string( opts
, ":" ) ){
232 vector
<string
> kv
= utils::split_string(s
, "=");
233 App::get()->log("Config override: " + kv
[0] + "=" + kv
[1]);
235 App::get()->config_override( kv
[0], kv
[1] ); }
236 catch ( std::runtime_error
&e
) {
237 App::get()->log_w("Unknown option: " + kv
[0] ); }
239 App::get()->apply_config();
242 else if ( !strcmp( argv
[i
], "-parse" ) ) {
244 printf("-parse: Missing parameter: either kanjidic or jmdict\n");
248 if ( !strcmp( argv
[i
], "jmdict") ){
249 if ( utils::file_exists("JMdict_e.gz") )
250 parse_jmdict( "JMdict_e.gz" );
251 else if ( utils::file_exists("JMdict_e") )
252 parse_jmdict( "JMdict_e" );
254 printf("File not found: JMdict_e or JMdict_e.gz\n");
258 else if ( !strcmp( argv
[i
], "kanjidic") ){
259 if ( utils::file_exists("kanjidic2.xml.gz") )
260 parse_kanjidic( "kanjidic2.xml.gz" );
261 else if ( utils::file_exists("kanjidic2.xml") )
262 parse_kanjidic( "kanjidic2.xml" );
264 printf("File not found: kanjidic2.xml or kanjidic2.xml.gz\n");
269 printf("-parse: wrong parameter '%s'.\n", argv
[i
] );
270 printf("Possible parameters: kanjidic or jmdict\n");
275 printf("argv[%d] : %s -> FLTK\n", i
, argv
[i
]);
276 // pass unparsed arguments to FLTK
277 fltk_argv
[fltk_argc
++] = argv
[i
];
280 ret
= App::get()->run(fltk_argc
,fltk_argv
);
282 catch ( std::exception
&e
){
283 // This should never happen ...
284 string msg
= std::string("Something went wrong...\n")
285 + std::string(typeid(e
).name())
286 + std::string(": ") + std::string(e
.what());
287 App::get()->alert(msg
);