Added .gitignore info for Windows build files.
[jben.git] / kanjidic.cpp
blob3ce518319cb5c0ceeafe0896d68872471f98cb4a
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjidic.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "kanjidic.h"
25 #include "file_utils.h"
26 #include "jutils.h"
27 #include "wx/tokenzr.h"
28 #include "wx/file.h"
29 #include "global.h"
30 #include <fstream>
31 using namespace std;
33 KanjiDic *KanjiDic::LoadKanjiDic(const char *filename, int& returnCode) {
34 KanjiDic *k=NULL;
35 char *rawData = NULL;
36 unsigned int size;
38 ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
39 if(ifile) {
40 size = ifile.tellg();
41 ifile.seekg(0);
42 rawData = new char[size+1];
43 rawData[size] = '\0';
44 ifile.read(rawData, size);
45 #ifdef DEBUG
46 if(strlen(rawData)!=size)
47 fprintf(stderr,
48 "WARNING: kanjidic file size: %d, read-in string: %d\n",
49 strlen(rawData),
50 size);
51 #endif
53 /* Create the kanjidic object with our string data. */
54 k = new KanjiDic(rawData);
56 returnCode = KD_SUCCESS;
58 else
59 returnCode = KD_FAILURE;
61 if(rawData) delete[] rawData;
62 return k;
65 /* This could be sped up: copy the first UTF-8 character into a string, then
66 run a conversion on that. Trivial though. */
67 KanjiDic::KanjiDic(char *kanjidicRawData) {
68 char *token = strtok(kanjidicRawData, "\n");
69 wxString wxToken;
70 while(token) {
71 if( (strlen(token)>0) && (token[0]!='#') ) {
72 UTF8ToWx(token, wxToken);
73 /* Convert token to proper format */
74 wxToken = ConvertKanjidicEntry(wxToken);
75 /* Add to hash table */
76 if(!kanjiHash.assign(wxToken[0], token)) {
77 #ifdef DEBUG
78 fprintf(stderr,
79 "Error assigning (%lc, %ls) to hash table!\n",
80 wxToken[0], wxToken.c_str());
81 #endif
84 token = strtok(NULL, "\n");
88 KanjiDic::~KanjiDic() {
89 /* Currently: nothing here. */
92 /* This function returns a wxString containing the desired line of the
93 kanjidic hash table. A conversion from string to wxString is included
94 in this call since strings are only used for more compressed internal
95 storage. This is followed by a slight reformatting of the string for
96 better presentation. */
97 wxString KanjiDic::GetKanjidicStr(wxChar c) {
98 BoostHM<wxChar,string>::iterator it = kanjiHash.find(c);
99 if(it==kanjiHash.end()) return _T("");
100 wxString s;
101 UTF8ToWx(it->second, s);
102 return ConvertKanjidicEntry(s);
106 * Performs transformations on a KANJIDIC string for our internal usage.
107 * Currently, this includes the following:
108 * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
109 * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
111 wxString KanjiDic::ConvertKanjidicEntry(const wxString& s) {
112 size_t index, lastIndex;
113 wxString temp = s;
115 /* First conversion: あ.いう to あ(いう) */
116 index = temp.find(_T('.'), 0);
117 while(index!=wxString::npos) {
118 /* Proceed if the character preceding the "." is hiragana/katakana. */
119 if(IsFurigana(temp[index-1])) {
120 temp[index] = _T('(');
121 index = temp.find(_T(' '), index+1);
122 if(index==wxString::npos) {
123 temp.append(_T(')'));
124 break;
125 } else
126 temp.insert(index, _T(')'));
128 lastIndex = index;
129 index = temp.find(_T('.'), lastIndex+1);
132 /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
133 index = temp.find(_T('-'), 0);
134 while(index!=wxString::npos) {
135 /* Proceed if the character before or after the "-" is hiragana/katakana. */
136 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
137 temp[index]=_T('〜');
139 lastIndex = index;
140 index = temp.find(_T('-'), lastIndex+1);
143 /* Return the converted string */
144 return temp;
147 wxString KanjiDic::KanjidicToHtml(const wxString& kanjidicStr) {
148 return KanjidicToHtml(kanjidicStr, prefs->kanjidicOptions, prefs->kanjidicDictionaries);
151 wxString KanjiDic::KanjidicToHtml(const wxString& kanjidicStr, long options, long dictionaries) {
152 /* return wxString(_T("<p>"))
153 .append(s[0])
154 .append(_T("</p>"));*/
156 wxString result;
158 wxString header, onyomi, kunyomi, nanori, radicalReading, english;
159 wxString dictionaryInfo;
160 wxString lowRelevance;
161 wxString unhandled;
162 long grade = -1, frequency = -1, tmode = 0;
163 wxString strokes;
164 wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
165 wxString sTemp, token;
166 wxStringTokenizer t(kanjidicStr, _T(' '));
167 wxChar c, c2;
169 /* Special processing for the first 2 entries of the line. */
170 if(t.CountTokens()>1) {
171 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
172 /*header.append(_T("<p style=\"font-size:32pt\">")) */
173 header.append(_T("<p><font size=\"7\">"))
174 .append(t.GetNextToken())
175 .append(_T("</font></p>"));
176 lowRelevance.append(_T("<li>JIS code: 0x"))
177 .append(t.GetNextToken())
178 .append(_T("</li>"));
181 /* NEW! Temporary code for loading in SODs and SODAs from KanjiCafe! */
182 if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
183 wxCSConv transcoder(_T("utf-8"));
184 if(transcoder.IsOk()) {
185 string utfStr;
186 /* Get a UTF8-encoded string for the kanji. */
187 WxToUTF8(kanjidicStr[0], utfStr);
188 sTemp.clear();
190 /* Convert to a low-to-high-byte hex string. */
191 for(unsigned int i=0;i<utfStr.length();i++) {
192 sTemp.Append(
193 wxString::Format(_T("%02x"),
194 (unsigned char)utfStr[i]));
197 wxString sod;
198 /* Load static SOD, if present */
199 if((options & KDO_SOD_STATIC) != 0) {
200 wxFileName fn;
201 fn.AppendDir(_T("sods"));
202 fn.AppendDir(_T("sod-utf8-hex"));
203 fn.SetName(sTemp);
204 fn.SetExt(_T("png"));
205 #ifdef DEBUG
206 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
207 #endif
208 if(wxFile::Exists(
209 fn.GetFullPath()
210 )) {
211 sod.append(wxString::Format(
212 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
216 /* Load animated SOD, if present */
217 if((options & KDO_SOD_ANIM) != 0) {
218 wxFileName fn;
219 fn.AppendDir(_T("sods"));
220 fn.AppendDir(_T("soda-utf8-hex"));
221 fn.SetName(sTemp);
222 fn.SetExt(_T("gif"));
223 #ifdef DEBUG
224 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
225 #endif
226 if(wxFile::Exists(
227 fn.GetFullPath()
228 )) {
229 if(sod.length()>0) sod.append(_T("<br />"));
230 sod.append(wxString::Format(
231 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
235 /* Append the chart(s) in a paragraph object. */
236 if(sod.length()>0) {
237 header.append(wxString::Format(
238 _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
241 } else {
242 fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
245 /* END OF EXPERIMENTAL NEW CODE */
247 while(t.HasMoreTokens()) {
248 token = t.GetNextToken();
249 sTemp = token;
250 c = sTemp[0];
251 /* If a preceding character is detected, strip it */
252 if(c == _T('(') || c == _T('〜')) {
253 sTemp = sTemp.substr(1);
254 c = sTemp[0];
256 if(tmode==0) {
257 if(IsKatakana(c)) {
258 /* Onyomi reading detected */
259 /*if(onyomi.length()>0) onyomi.append(_T(" ")); */
260 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
261 onyomi.append(token); /* Copy the original string, including ()'s and 〜's */
262 continue;
264 else if(IsHiragana(c)) {
265 /* Kunyomi reading detected */
266 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
267 kunyomi.append(token); /* Copy the original string, including ()'s and 〜's */
268 continue;
270 } else if(tmode==1) {
271 if(IsFurigana(c)) {
272 /* Nanori reading detected */
273 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
274 nanori.append(token); /* Copy the original string, including ()'s and 〜's */
275 continue;
277 } else if(tmode==2) {
278 if(IsFurigana(c)) {
279 /* Special radical reading detected */
280 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
281 radicalReading.append(token);
282 continue;
285 if(c == _T('{')) {
286 /* English meaning detected
287 Special handling is needed to take care of spaces, though.
288 We'll "cheat" and mess with our iterator a bit if a space is detected. */
289 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
290 sTemp.append(_T(" ")).append(t.GetNextToken());
292 if(english.length()>0) english.append(_T(", "));
293 english.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
295 else {
296 switch(c) {
297 case _T('T'): /* Change "t mode" */
298 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
299 So, by making a copy of wxString and performing the conversion in the copy, we get around this.
300 This ugly kludge is repeated twice below for frequency and grade level. */
301 wxString(sTemp.substr(1)).ToLong(&tmode);
302 #ifdef DEBUG
303 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
304 #endif
305 break;
306 case _T('B'): /* Bushu radical */
307 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
308 break;
309 case _T('C'): /* Classical radical */
310 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
311 break;
312 case _T('F'): /* Frequency */
313 wxString(sTemp.substr(1)).ToLong(&frequency);
314 break;
315 case _T('G'): /* Grade level */
316 wxString(sTemp.substr(1)).ToLong(&grade);
317 break;
318 case _T('S'): /* Stroke count */
319 if(strokes.length()==0) {
320 strokes = sTemp.substr(1);
321 } else if(!strokes.find(_T(' '))!=wxString::npos) {
322 strokes.append(_T(" (Miscounts: "))
323 .append(sTemp.substr(1))
324 .append(_T(")"));
325 } else {
326 strokes = strokes.substr(0, strokes.length()-1)
327 .append(_T(", "))
328 .append(sTemp.substr(1))
329 .append(_T(")"));
331 break;
332 case _T('U'): /* Unicode value */
333 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
334 break;
335 /* From here, it's all dictionary codes */
336 case _T('H'):
337 if((dictionaries & KDD_NJECD)!=0)
338 dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
339 .append(sTemp.substr(1)).append(_T("</li>"));
340 break;
341 case _T('N'):
342 if((dictionaries & KDD_MRJECD)!=0)
343 dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
344 .append(sTemp.substr(1)).append(_T("</li>"));
345 break;
346 case _T('V'):
347 if((dictionaries & KDD_NNJECD)!=0)
348 dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
349 .append(sTemp.substr(1)).append(_T("</li>"));
350 break;
351 case _T('P'):
352 /* SKIP codes. */
353 /* This is a thorny issue. If we want to include a stock KANJIDIC, then we */
354 /* need to add encryption to the file and prevent copy/pasting of that data. */
355 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
356 #ifdef USE_SKIP
357 if((dictionaries & KDD_SKIP)!=0)
358 dictionaryInfo.append(_T("<li>SKIP code: "))
359 .append(sTemp.substr(1)).append(_T("</li>"));
360 #endif
361 break;
362 case _T('I'): /* Spahn/Hadamitzky dictionaries */
363 if(sTemp[1]==_T('N')) {
364 if((dictionaries & KDD_KK)!=0) {
365 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
366 .append(sTemp.substr(2)).append(_T("</li>"));
368 } else {
369 if((dictionaries & KDD_KD)!=0) {
370 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
371 .append(sTemp.substr(1)).append(_T("</li>"));
374 break;
375 case _T('Q'):
376 if((dictionaries & KDD_FC)!=0) {
377 dictionaryInfo.append(_T("<li>Four Corner code: "))
378 .append(sTemp.substr(1)).append(_T("</li>"));
380 break;
381 case _T('M'):
382 c2 = sTemp[1];
383 if(c2==_T('N')) {
384 if((dictionaries & KDD_MOROI)!=0) {
385 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
386 .append(sTemp.substr(2)).append(_T("</li>"));
388 } else if(c2==_T('P')) {
389 if((dictionaries & KDD_MOROVP)!=0) {
390 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
391 .append(sTemp.substr(2)).append(_T("</li>"));
394 break;
395 case _T('E'):
396 if((dictionaries & KDD_GRJC)!=0) {
397 dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
398 .append(sTemp.substr(1)).append(_T("</li>"));
400 break;
401 case _T('K'):
402 if((dictionaries & KDD_GKD)!=0) {
403 dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
404 .append(sTemp.substr(1)).append(_T("</li>"));
406 break;
407 case _T('L'):
408 if((dictionaries & KDD_RTK)!=0) {
409 dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
410 .append(sTemp.substr(1)).append(_T("</li>"));
412 break;
413 case _T('O'):
414 if((dictionaries & KDD_JN)!=0) {
415 dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
416 .append(sTemp.substr(1)).append(_T("</li>"));
418 break;
419 case _T('D'):
420 c2 = sTemp[1];
421 switch(c2) {
422 case _T('B'):
423 if((dictionaries & KDD_JBP)!=0) {
424 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
425 .append(sTemp.substr(2)).append(_T("</li>"));
427 break;
428 case _T('C'):
429 if((dictionaries & KDD_KWJLP)!=0) {
430 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
431 .append(sTemp.substr(2)).append(_T("</li>"));
433 break;
434 case _T('F'):
435 if((dictionaries & KDD_JKF)!=0) {
436 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
437 .append(sTemp.substr(2)).append(_T("</li>"));
439 break;
440 case _T('G'):
441 if((dictionaries & KDD_KCKG)!=0) {
442 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
443 .append(sTemp.substr(2)).append(_T("</li>"));
445 break;
446 case _T('H'):
447 if((dictionaries & KDD_GTRWJH)!=0) {
448 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
449 .append(sTemp.substr(2)).append(_T("</li>"));
451 break;
452 case _T('J'):
453 if((dictionaries & KDD_KIC)!=0) {
454 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
455 .append(sTemp.substr(2)).append(_T("</li>"));
457 break;
458 case _T('K'):
459 if((dictionaries & KDD_KLD)!=0) {
460 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
461 .append(sTemp.substr(2)).append(_T("</li>"));
463 break;
464 case _T('O'):
465 if((dictionaries & KDD_EK)!=0) {
466 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
467 .append(sTemp.substr(2)).append(_T("</li>"));
469 break;
470 case _T('R'):
471 if((dictionaries & KDD_DR)!=0) {
472 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
473 .append(sTemp.substr(2)).append(_T("</li>"));
475 break;
476 case _T('S'):
477 if((dictionaries & KDD_GTRWJS)!=0) {
478 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
479 .append(sTemp.substr(2)).append(_T("</li>"));
481 break;
482 case _T('T'):
483 if((dictionaries & KDD_TKC)!=0) {
484 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
485 .append(sTemp.substr(2)).append(_T("</li>"));
487 break;
488 default:
489 if(unhandled.length()>0) unhandled.append(_T(" "));
490 unhandled.append(sTemp);
491 break;
493 break;
494 /* Crossreferences and miscodes */
495 case _T('X'):
496 if(crossReferences.length()>0) crossReferences.append(_T(", "));
497 crossReferences.append(sTemp.substr(1));
498 break;
499 case _T('Z'):
500 if(miscodes.length()>0) miscodes.append(_T(", "));
501 miscodes.append(sTemp.substr(1));
502 break;
503 /* Korean/Pinyin (Chinese) romanization */
504 case _T('W'):
505 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
506 koreanRomanization.append(sTemp.substr(1));
507 break;
508 case _T('Y'):
509 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
510 pinyinRomanization.append(sTemp.substr(1));
511 break;
512 default:
513 if(unhandled.length()>0) unhandled.append(_T(" "));
514 unhandled.append(sTemp);
515 break;
518 } /* while(t.HasMoreTokens()) */
520 if(header.length() > 0) result.append(header);
521 #ifdef DEBUG
522 printf("DEBUG: header=[%ls]\n", header.c_str());
523 #endif
524 result.append(_T("<ul>"));
525 if((options & KDO_READINGS) != 0) {
526 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
527 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
528 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
529 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
531 if((options & KDO_MEANINGS) != 0) {
532 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
534 if((options & KDO_HIGHIMPORTANCE) != 0) {
535 if(strokes.length() > 0)
536 result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
537 else
538 result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
539 result.append(_T("<li>Grade Level: "));
540 if(grade<=6 && grade >= 1) { /* Jouyou (Grade #) */
541 result.append(_T("Jouyou (Grade "))
542 .append(wxString::Format(_T("%d"), (int)grade))
543 .append(_T(")"));
544 } else if(grade==8) { /* Jouyou (General usage) */
545 result.append(_T("Jouyou (General usage)"));
546 } else if(grade==9) { /* Jinmeiyou (Characters for names) */
547 result.append(_T("Jinmeiyou (Characters for names)"));
548 } else if(grade==-1) { /* No flag specified in kanjidic string */
549 result.append(_T("Unspecified"));
550 } else {
551 result.append(_T("Unhandled grade level (Grade "))
552 .append(wxString::Format(_T("%d"), (int)grade))
553 .append(_T(")"));
555 result.append(_T("</li>"));
556 if(frequency!=-1)
557 result.append(_T("<li>Frequency Ranking: "))
558 .append(wxString::Format(_T("%d"), (int)frequency))
559 .append(_T("</li>"));
560 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
562 if((options & KDO_DICTIONARIES) != 0) {
563 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
565 if((options & KDO_VOCABCROSSREF) != 0) {
566 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
567 wxChar thisKanji = kanjidicStr[0];
568 vector<wxString> crossRefList;
569 vector<wxString>::iterator vIt;
570 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
571 if(vIt->find(thisKanji)!=wxString::npos) {
572 crossRefList.push_back(*vIt);
575 if(crossRefList.size()>0) {
576 result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
577 vIt = crossRefList.begin();
578 result.append(*vIt);
579 for(++vIt; vIt!=crossRefList.end(); vIt++) {
580 result.append(_T("&nbsp; ")).append(*vIt);
582 result.append(_T("</font></li>"));
585 if((options & KDO_LOWIMPORTANCE) != 0) {
586 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
587 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
588 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
589 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
590 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
592 if((options & KDO_UNHANDLED) != 0) {
593 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
595 result.append(_T("</ul>"));
597 return result;
600 int KanjiDic::GetIntField(wxChar kanji, const wxString& marker) {
601 wxString markerStr, kanjiEntry, temp;
602 size_t index=0;
603 long value=-1;
604 int markerLen;
606 markerStr.Printf(_T(" %s"), marker.c_str());
607 markerLen=markerStr.length();
609 kanjiEntry = GetKanjidicStr(kanji);
610 if(kanjiEntry.length()>0) {
611 index = kanjiEntry.find(markerStr);
612 if(index!=wxString::npos) {
613 temp = kanjiEntry.substr(
614 index+markerLen,
615 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
616 temp.ToLong(&value);
620 return (int)value;
623 const BoostHM<wxChar,string> *KanjiDic::GetHashTable() {
624 return &kanjiHash;
627 enum {
628 KDR_Onyomi=1,
629 KDR_Kunyomi,
630 KDR_English
633 wxString KanjiDic::GetOnyomiStr(wxChar c) {
634 return GetKanjidicReading(c, KDR_Onyomi);
637 wxString KanjiDic::GetKunyomiStr(wxChar c) {
638 return GetKanjidicReading(c, KDR_Kunyomi);
641 wxString KanjiDic::GetEnglishStr(wxChar c) {
642 return GetKanjidicReading(c, KDR_English);
645 wxString KanjiDic::GetKanjidicReading(wxChar c, int readingType) {
646 wxString result;
647 wxString kanjidicStr = GetKanjidicStr(c);
649 long tmode = 0;
650 wxString sTemp, token;
651 wxStringTokenizer t(kanjidicStr, _T(' '));
653 /* The first two tokens are guaranteed not to be what we're looking for. Skip them. */
654 if(t.CountTokens()>1) {
655 t.GetNextToken();
656 t.GetNextToken();
658 while(t.HasMoreTokens()) {
659 token = t.GetNextToken();
660 sTemp = token;
661 c = sTemp[0];
662 /* If a preceding character is detected, strip it */
663 if(c == _T('(') || c == _T('〜')) {
664 sTemp = sTemp.substr(1);
665 c = sTemp[0];
667 if(tmode==0) {
668 if(IsKatakana(c) && readingType==KDR_Onyomi) {
669 /* Onyomi reading detected */
670 if(result.length()>0) result.append(_T(" "));
671 result.append(token); /* Copy the original string, including ()'s and 〜's */
672 continue;
674 else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
675 /* Kunyomi reading detected */
676 if(result.length()>0) result.append(_T(" "));
677 result.append(token); /* Copy the original string, including ()'s and 〜's */
678 continue;
681 if(c == _T('{') && readingType==KDR_English) {
682 /* English meaning detected
683 Special handling is needed to take care of spaces, though.
684 We'll "cheat" and mess with our iterator a bit if a space is detected. */
685 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
686 sTemp.append(_T(" ")).append(t.GetNextToken());
688 if(result.length()>0) result.append(_T(", "));
689 result.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
691 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
694 return result;