KDict converted to a singleton.
[jben.git] / kdict.cpp
blob4c867d52d093f8b97f8cb47ed8c0016550151193
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjidic.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "kdict.h"
25 #include "file_utils.h"
26 #include "jutils.h"
27 #include "wx/tokenzr.h"
28 #include "wx/file.h"
29 #include "global.h"
30 #include <fstream>
31 using namespace std;
33 KDict* KDict::kdictSingleton = NULL;
35 const KDict *KDict::GetKDict() {
36 if(kdictSingleton) return kdictSingleton;
37 kdictSingleton = new KDict;
38 kdictSingleton->LoadKanjidic();
39 kdictSingleton->LoadKradfile();
40 kdictSingleton->LoadRadkfile();
41 return kdictSingleton;
44 void KDict::Destroy() {
45 if(kdictSingleton) {
46 delete kdictSingleton;
47 kdictSingleton = NULL;
51 int KDict::LoadKanjidic(const char *filename) {
52 char *rawData = NULL;
53 unsigned int size;
54 int returnCode=0xDEADBEEF;
56 ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
57 if(ifile) {
58 size = ifile.tellg();
59 ifile.seekg(0);
60 rawData = new char[size+1];
61 rawData[size] = '\0';
62 ifile.read(rawData, size);
63 #ifdef DEBUG
64 if(strlen(rawData)!=size)
65 fprintf(stderr,
66 "WARNING: kanjidic file size: %d, read-in string: %d\n",
67 strlen(rawData),
68 size);
69 #endif
71 /* Create the kanjidic object with our string data. */
72 this->KanjidicParser(rawData);
74 returnCode = KD_SUCCESS;
76 else
77 returnCode = KD_FAILURE;
79 if(rawData) delete[] rawData;
80 return returnCode;
83 int KDict::LoadKradfile(const char *filename) {
84 int returnCode = 0xDEADBEEF;
85 return returnCode;
88 int KDict::LoadRadkfile(const char *filename) {
89 int returnCode = 0xDEADBEEF;
90 return returnCode;
93 /* This could be sped up: copy the first UTF-8 character into a string, then
94 run a conversion on that. Trivial though. */
95 void KDict::KanjidicParser(char *kanjidicRawData) {
96 char *token = strtok(kanjidicRawData, "\n");
97 wxString wxToken;
98 while(token) {
99 if( (strlen(token)>0) && (token[0]!='#') ) {
100 UTF8ToWx(token, wxToken);
101 /* Convert token to proper format */
102 wxToken = ConvertKanjidicEntry(wxToken);
103 /* Add to hash table */
104 if(!kanjidicData.assign(wxToken[0], token)) {
105 #ifdef DEBUG
106 fprintf(stderr,
107 "Error assigning (%lc, %ls) to hash table!\n",
108 wxToken[0], wxToken.c_str());
109 #endif
112 token = strtok(NULL, "\n");
116 KDict::~KDict() {
117 /* Currently: nothing here. */
120 /* This function returns a wxString containing the desired line of the
121 kanjidic hash table. A conversion from string to wxString is included
122 in this call since strings are only used for more compressed internal
123 storage. This is followed by a slight reformatting of the string for
124 better presentation. */
125 wxString KDict::GetKanjidicStr(wxChar c) const {
126 BoostHM<wxChar,string>::iterator it = kanjidicData.find(c);
127 if(it==kanjidicData.end()) return _T("");
128 wxString s;
129 UTF8ToWx(it->second, s);
130 return ConvertKanjidicEntry(s);
134 * Performs transformations on a KANJIDIC string for our internal usage.
135 * Currently, this includes the following:
136 * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
137 * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
139 wxString KDict::ConvertKanjidicEntry(const wxString& s) {
140 size_t index, lastIndex;
141 wxString temp = s;
143 /* First conversion: あ.いう to あ(いう) */
144 index = temp.find(_T('.'), 0);
145 while(index!=wxString::npos) {
146 /* Proceed if the character preceding the "." is hiragana/katakana. */
147 if(IsFurigana(temp[index-1])) {
148 temp[index] = _T('(');
149 index = temp.find(_T(' '), index+1);
150 if(index==wxString::npos) {
151 temp.append(_T(')'));
152 break;
153 } else
154 temp.insert(index, _T(')'));
156 lastIndex = index;
157 index = temp.find(_T('.'), lastIndex+1);
160 /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
161 index = temp.find(_T('-'), 0);
162 while(index!=wxString::npos) {
163 /* Proceed if the character before or after the "-" is hiragana/katakana. */
164 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
165 temp[index]=_T('〜');
167 lastIndex = index;
168 index = temp.find(_T('-'), lastIndex+1);
171 /* Return the converted string */
172 return temp;
175 wxString KDict::KanjidicToHtml(const wxString& kanjidicStr) {
176 return KanjidicToHtml(kanjidicStr,
177 jben->prefs->kanjidicOptions,
178 jben->prefs->kanjidicDictionaries);
181 wxString KDict::KanjidicToHtml(const wxString& kanjidicStr,
182 long options, long dictionaries) {
183 /* return wxString(_T("<p>"))
184 .append(s[0])
185 .append(_T("</p>"));*/
187 wxString result;
189 wxString header, onyomi, kunyomi, nanori, radicalReading, english;
190 wxString dictionaryInfo;
191 wxString lowRelevance;
192 wxString unhandled;
193 long grade = -1, frequency = -1, tmode = 0;
194 wxString strokes;
195 wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
196 wxString sTemp, token;
197 wxStringTokenizer t(kanjidicStr, _T(' '));
198 wxChar c, c2;
200 /* Special processing for the first 2 entries of the line. */
201 if(t.CountTokens()>1) {
202 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
203 /*header.append(_T("<p style=\"font-size:32pt\">")) */
204 header.append(_T("<p><font size=\"7\">"))
205 .append(t.GetNextToken())
206 .append(_T("</font></p>"));
207 lowRelevance.append(_T("<li>JIS code: 0x"))
208 .append(t.GetNextToken())
209 .append(_T("</li>"));
212 /* NEW! Temporary code for loading in SODs and SODAs from KanjiCafe! */
213 if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
214 wxCSConv transcoder(_T("utf-8"));
215 if(transcoder.IsOk()) {
216 string utfStr;
217 /* Get a UTF8-encoded string for the kanji. */
218 WxToUTF8(kanjidicStr[0], utfStr);
219 sTemp.clear();
221 /* Convert to a low-to-high-byte hex string. */
222 for(unsigned int i=0;i<utfStr.length();i++) {
223 sTemp.Append(
224 wxString::Format(_T("%02x"),
225 (unsigned char)utfStr[i]));
228 wxString sod;
229 /* Load static SOD, if present */
230 if((options & KDO_SOD_STATIC) != 0) {
231 wxFileName fn;
232 fn.AppendDir(_T("sods"));
233 fn.AppendDir(_T("sod-utf8-hex"));
234 fn.SetName(sTemp);
235 fn.SetExt(_T("png"));
236 #ifdef DEBUG
237 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
238 #endif
239 if(wxFile::Exists(
240 fn.GetFullPath()
241 )) {
242 sod.append(wxString::Format(
243 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
247 /* Load animated SOD, if present */
248 if((options & KDO_SOD_ANIM) != 0) {
249 wxFileName fn;
250 fn.AppendDir(_T("sods"));
251 fn.AppendDir(_T("soda-utf8-hex"));
252 fn.SetName(sTemp);
253 fn.SetExt(_T("gif"));
254 #ifdef DEBUG
255 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
256 #endif
257 if(wxFile::Exists(
258 fn.GetFullPath()
259 )) {
260 if(sod.length()>0) sod.append(_T("<br />"));
261 sod.append(wxString::Format(
262 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
266 /* Append the chart(s) in a paragraph object. */
267 if(sod.length()>0) {
268 header.append(wxString::Format(
269 _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
272 } else {
273 fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
276 /* END OF EXPERIMENTAL NEW CODE */
278 while(t.HasMoreTokens()) {
279 token = t.GetNextToken();
280 sTemp = token;
281 c = sTemp[0];
282 /* If a preceding character is detected, strip it */
283 if(c == _T('(') || c == _T('〜')) {
284 sTemp = sTemp.substr(1);
285 c = sTemp[0];
287 if(tmode==0) {
288 if(IsKatakana(c)) {
289 /* Onyomi reading detected */
290 /*if(onyomi.length()>0) onyomi.append(_T(" ")); */
291 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
292 onyomi.append(token); /* Copy the original string, including ()'s and 〜's */
293 continue;
295 else if(IsHiragana(c)) {
296 /* Kunyomi reading detected */
297 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
298 kunyomi.append(token); /* Copy the original string, including ()'s and 〜's */
299 continue;
301 } else if(tmode==1) {
302 if(IsFurigana(c)) {
303 /* Nanori reading detected */
304 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
305 nanori.append(token); /* Copy the original string, including ()'s and 〜's */
306 continue;
308 } else if(tmode==2) {
309 if(IsFurigana(c)) {
310 /* Special radical reading detected */
311 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
312 radicalReading.append(token);
313 continue;
316 if(c == _T('{')) {
317 /* English meaning detected
318 Special handling is needed to take care of spaces, though.
319 We'll "cheat" and mess with our iterator a bit if a space is detected. */
320 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
321 sTemp.append(_T(" ")).append(t.GetNextToken());
323 if(english.length()>0) english.append(_T(", "));
324 english.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
326 else {
327 switch(c) {
328 case _T('T'): /* Change "t mode" */
329 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
330 So, by making a copy of wxString and performing the conversion in the copy, we get around this.
331 This ugly kludge is repeated twice below for frequency and grade level. */
332 wxString(sTemp.substr(1)).ToLong(&tmode);
333 #ifdef DEBUG
334 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
335 #endif
336 break;
337 case _T('B'): /* Bushu radical */
338 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
339 break;
340 case _T('C'): /* Classical radical */
341 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
342 break;
343 case _T('F'): /* Frequency */
344 wxString(sTemp.substr(1)).ToLong(&frequency);
345 break;
346 case _T('G'): /* Grade level */
347 wxString(sTemp.substr(1)).ToLong(&grade);
348 break;
349 case _T('S'): /* Stroke count */
350 if(strokes.length()==0) {
351 strokes = sTemp.substr(1);
352 } else if(!strokes.find(_T(' '))!=wxString::npos) {
353 strokes.append(_T(" (Miscounts: "))
354 .append(sTemp.substr(1))
355 .append(_T(")"));
356 } else {
357 strokes = strokes.substr(0, strokes.length()-1)
358 .append(_T(", "))
359 .append(sTemp.substr(1))
360 .append(_T(")"));
362 break;
363 case _T('U'): /* Unicode value */
364 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
365 break;
366 /* From here, it's all dictionary codes */
367 case _T('H'):
368 if((dictionaries & KDD_NJECD)!=0)
369 dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
370 .append(sTemp.substr(1)).append(_T("</li>"));
371 break;
372 case _T('N'):
373 if((dictionaries & KDD_MRJECD)!=0)
374 dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
375 .append(sTemp.substr(1)).append(_T("</li>"));
376 break;
377 case _T('V'):
378 if((dictionaries & KDD_NNJECD)!=0)
379 dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
380 .append(sTemp.substr(1)).append(_T("</li>"));
381 break;
382 case _T('P'):
383 /* SKIP codes. */
384 /* This is a thorny issue. If we want to include a stock KANJIDIC, then we */
385 /* need to add encryption to the file and prevent copy/pasting of that data. */
386 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
387 #ifdef USE_SKIP
388 if((dictionaries & KDD_SKIP)!=0)
389 dictionaryInfo.append(_T("<li>SKIP code: "))
390 .append(sTemp.substr(1)).append(_T("</li>"));
391 #endif
392 break;
393 case _T('I'): /* Spahn/Hadamitzky dictionaries */
394 if(sTemp[1]==_T('N')) {
395 if((dictionaries & KDD_KK)!=0) {
396 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
397 .append(sTemp.substr(2)).append(_T("</li>"));
399 } else {
400 if((dictionaries & KDD_KD)!=0) {
401 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
402 .append(sTemp.substr(1)).append(_T("</li>"));
405 break;
406 case _T('Q'):
407 if((dictionaries & KDD_FC)!=0) {
408 dictionaryInfo.append(_T("<li>Four Corner code: "))
409 .append(sTemp.substr(1)).append(_T("</li>"));
411 break;
412 case _T('M'):
413 c2 = sTemp[1];
414 if(c2==_T('N')) {
415 if((dictionaries & KDD_MOROI)!=0) {
416 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
417 .append(sTemp.substr(2)).append(_T("</li>"));
419 } else if(c2==_T('P')) {
420 if((dictionaries & KDD_MOROVP)!=0) {
421 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
422 .append(sTemp.substr(2)).append(_T("</li>"));
425 break;
426 case _T('E'):
427 if((dictionaries & KDD_GRJC)!=0) {
428 dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
429 .append(sTemp.substr(1)).append(_T("</li>"));
431 break;
432 case _T('K'):
433 if((dictionaries & KDD_GKD)!=0) {
434 dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
435 .append(sTemp.substr(1)).append(_T("</li>"));
437 break;
438 case _T('L'):
439 if((dictionaries & KDD_RTK)!=0) {
440 dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
441 .append(sTemp.substr(1)).append(_T("</li>"));
443 break;
444 case _T('O'):
445 if((dictionaries & KDD_JN)!=0) {
446 dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
447 .append(sTemp.substr(1)).append(_T("</li>"));
449 break;
450 case _T('D'):
451 c2 = sTemp[1];
452 switch(c2) {
453 case _T('B'):
454 if((dictionaries & KDD_JBP)!=0) {
455 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
456 .append(sTemp.substr(2)).append(_T("</li>"));
458 break;
459 case _T('C'):
460 if((dictionaries & KDD_KWJLP)!=0) {
461 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
462 .append(sTemp.substr(2)).append(_T("</li>"));
464 break;
465 case _T('F'):
466 if((dictionaries & KDD_JKF)!=0) {
467 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
468 .append(sTemp.substr(2)).append(_T("</li>"));
470 break;
471 case _T('G'):
472 if((dictionaries & KDD_KCKG)!=0) {
473 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
474 .append(sTemp.substr(2)).append(_T("</li>"));
476 break;
477 case _T('H'):
478 if((dictionaries & KDD_GTRWJH)!=0) {
479 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
480 .append(sTemp.substr(2)).append(_T("</li>"));
482 break;
483 case _T('J'):
484 if((dictionaries & KDD_KIC)!=0) {
485 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
486 .append(sTemp.substr(2)).append(_T("</li>"));
488 break;
489 case _T('K'):
490 if((dictionaries & KDD_KLD)!=0) {
491 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
492 .append(sTemp.substr(2)).append(_T("</li>"));
494 break;
495 case _T('O'):
496 if((dictionaries & KDD_EK)!=0) {
497 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
498 .append(sTemp.substr(2)).append(_T("</li>"));
500 break;
501 case _T('R'):
502 if((dictionaries & KDD_DR)!=0) {
503 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
504 .append(sTemp.substr(2)).append(_T("</li>"));
506 break;
507 case _T('S'):
508 if((dictionaries & KDD_GTRWJS)!=0) {
509 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
510 .append(sTemp.substr(2)).append(_T("</li>"));
512 break;
513 case _T('T'):
514 if((dictionaries & KDD_TKC)!=0) {
515 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
516 .append(sTemp.substr(2)).append(_T("</li>"));
518 break;
519 default:
520 if(unhandled.length()>0) unhandled.append(_T(" "));
521 unhandled.append(sTemp);
522 break;
524 break;
525 /* Crossreferences and miscodes */
526 case _T('X'):
527 if(crossReferences.length()>0) crossReferences.append(_T(", "));
528 crossReferences.append(sTemp.substr(1));
529 break;
530 case _T('Z'):
531 if(miscodes.length()>0) miscodes.append(_T(", "));
532 miscodes.append(sTemp.substr(1));
533 break;
534 /* Korean/Pinyin (Chinese) romanization */
535 case _T('W'):
536 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
537 koreanRomanization.append(sTemp.substr(1));
538 break;
539 case _T('Y'):
540 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
541 pinyinRomanization.append(sTemp.substr(1));
542 break;
543 default:
544 if(unhandled.length()>0) unhandled.append(_T(" "));
545 unhandled.append(sTemp);
546 break;
549 } /* while(t.HasMoreTokens()) */
551 if(header.length() > 0) result.append(header);
552 #ifdef DEBUG
553 printf("DEBUG: header=[%ls]\n", header.c_str());
554 #endif
555 result.append(_T("<ul>"));
556 if((options & KDO_READINGS) != 0) {
557 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
558 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
559 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
560 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
562 if((options & KDO_MEANINGS) != 0) {
563 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
565 if((options & KDO_HIGHIMPORTANCE) != 0) {
566 if(strokes.length() > 0)
567 result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
568 else
569 result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
570 result.append(_T("<li>Grade Level: "));
571 if(grade<=6 && grade >= 1) { /* Jouyou (Grade #) */
572 result.append(_T("Jouyou (Grade "))
573 .append(wxString::Format(_T("%d"), (int)grade))
574 .append(_T(")"));
575 } else if(grade==8) { /* Jouyou (General usage) */
576 result.append(_T("Jouyou (General usage)"));
577 } else if(grade==9) { /* Jinmeiyou (Characters for names) */
578 result.append(_T("Jinmeiyou (Characters for names)"));
579 } else if(grade==-1) { /* No flag specified in kanjidic string */
580 result.append(_T("Unspecified"));
581 } else {
582 result.append(_T("Unhandled grade level (Grade "))
583 .append(wxString::Format(_T("%d"), (int)grade))
584 .append(_T(")"));
586 result.append(_T("</li>"));
587 if(frequency!=-1)
588 result.append(_T("<li>Frequency Ranking: "))
589 .append(wxString::Format(_T("%d"), (int)frequency))
590 .append(_T("</li>"));
591 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
593 if((options & KDO_DICTIONARIES) != 0) {
594 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
596 if((options & KDO_VOCABCROSSREF) != 0) {
597 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
598 wxChar thisKanji = kanjidicStr[0];
599 vector<wxString> crossRefList;
600 vector<wxString>::iterator vIt;
601 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
602 if(vIt->find(thisKanji)!=wxString::npos) {
603 crossRefList.push_back(*vIt);
606 if(crossRefList.size()>0) {
607 result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
608 vIt = crossRefList.begin();
609 result.append(*vIt);
610 for(++vIt; vIt!=crossRefList.end(); vIt++) {
611 result.append(_T("&nbsp; ")).append(*vIt);
613 result.append(_T("</font></li>"));
616 if((options & KDO_LOWIMPORTANCE) != 0) {
617 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
618 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
619 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
620 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
621 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
623 if((options & KDO_UNHANDLED) != 0) {
624 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
626 result.append(_T("</ul>"));
628 return result;
631 int KDict::GetIntField(wxChar kanji, const wxString& marker) const {
632 wxString markerStr, kanjiEntry, temp;
633 size_t index=0;
634 long value=-1;
635 int markerLen;
637 markerStr.Printf(_T(" %s"), marker.c_str());
638 markerLen=markerStr.length();
640 kanjiEntry = GetKanjidicStr(kanji);
641 if(kanjiEntry.length()>0) {
642 index = kanjiEntry.find(markerStr);
643 if(index!=wxString::npos) {
644 temp = kanjiEntry.substr(
645 index+markerLen,
646 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
647 temp.ToLong(&value);
651 return (int)value;
654 const BoostHM<wxChar,string>* KDict::GetHashTable() const {
655 return &kanjidicData;
658 enum {
659 KDR_Onyomi=1,
660 KDR_Kunyomi,
661 KDR_English
664 wxString KDict::GetOnyomiStr(wxChar c) const {
665 return GetKanjidicReading(c, KDR_Onyomi);
668 wxString KDict::GetKunyomiStr(wxChar c) const {
669 return GetKanjidicReading(c, KDR_Kunyomi);
672 wxString KDict::GetEnglishStr(wxChar c) const {
673 return GetKanjidicReading(c, KDR_English);
676 wxString KDict::GetKanjidicReading(wxChar c, int readingType) const {
677 wxString result;
678 wxString kanjidicStr = GetKanjidicStr(c);
680 long tmode = 0;
681 wxString sTemp, token;
682 wxStringTokenizer t(kanjidicStr, _T(' '));
684 /* The first two tokens are guaranteed not to be what we're looking for. Skip them. */
685 if(t.CountTokens()>1) {
686 t.GetNextToken();
687 t.GetNextToken();
689 while(t.HasMoreTokens()) {
690 token = t.GetNextToken();
691 sTemp = token;
692 c = sTemp[0];
693 /* If a preceding character is detected, strip it */
694 if(c == _T('(') || c == _T('〜')) {
695 sTemp = sTemp.substr(1);
696 c = sTemp[0];
698 if(tmode==0) {
699 if(IsKatakana(c) && readingType==KDR_Onyomi) {
700 /* Onyomi reading detected */
701 if(result.length()>0) result.append(_T(" "));
702 result.append(token); /* Copy the original string, including ()'s and 〜's */
703 continue;
705 else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
706 /* Kunyomi reading detected */
707 if(result.length()>0) result.append(_T(" "));
708 result.append(token); /* Copy the original string, including ()'s and 〜's */
709 continue;
712 if(c == _T('{') && readingType==KDR_English) {
713 /* English meaning detected
714 Special handling is needed to take care of spaces, though.
715 We'll "cheat" and mess with our iterator a bit if a space is detected. */
716 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
717 sTemp.append(_T(" ")).append(t.GetNextToken());
719 if(result.length()>0) result.append(_T(", "));
720 result.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
722 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
725 return result;