Const conversions for edict and kanjidic objects. Removal of obsolete dictionary...
[jben.git] / kanjidic.cpp
blob2a42ed442bbb3781df1bd5707a48163096b2cf28
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjidic.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "kanjidic.h"
25 #include "file_utils.h"
26 #include "jutils.h"
27 #include "wx/tokenzr.h"
28 #include "wx/file.h"
29 #include "global.h"
30 #include <fstream>
31 using namespace std;
33 Kanjidic *Kanjidic::LoadKanjidic(const char *filename, int& returnCode) {
34 Kanjidic *k=NULL;
35 char *rawData = NULL;
36 unsigned int size;
38 ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
39 if(ifile) {
40 size = ifile.tellg();
41 ifile.seekg(0);
42 rawData = new char[size+1];
43 rawData[size] = '\0';
44 ifile.read(rawData, size);
45 #ifdef DEBUG
46 if(strlen(rawData)!=size)
47 fprintf(stderr,
48 "WARNING: kanjidic file size: %d, read-in string: %d\n",
49 strlen(rawData),
50 size);
51 #endif
53 /* Create the kanjidic object with our string data. */
54 k = new Kanjidic(rawData);
56 returnCode = KD_SUCCESS;
58 else
59 returnCode = KD_FAILURE;
61 if(rawData) delete[] rawData;
62 return k;
65 /* This could be sped up: copy the first UTF-8 character into a string, then
66 run a conversion on that. Trivial though. */
67 Kanjidic::Kanjidic(char *kanjidicRawData) {
68 char *token = strtok(kanjidicRawData, "\n");
69 wxString wxToken;
70 while(token) {
71 if( (strlen(token)>0) && (token[0]!='#') ) {
72 UTF8ToWx(token, wxToken);
73 /* Convert token to proper format */
74 wxToken = ConvertKanjidicEntry(wxToken);
75 /* Add to hash table */
76 if(!kanjiHash.assign(wxToken[0], token)) {
77 #ifdef DEBUG
78 fprintf(stderr,
79 "Error assigning (%lc, %ls) to hash table!\n",
80 wxToken[0], wxToken.c_str());
81 #endif
84 token = strtok(NULL, "\n");
88 Kanjidic::~Kanjidic() {
89 /* Currently: nothing here. */
92 /* This function returns a wxString containing the desired line of the
93 kanjidic hash table. A conversion from string to wxString is included
94 in this call since strings are only used for more compressed internal
95 storage. This is followed by a slight reformatting of the string for
96 better presentation. */
97 wxString Kanjidic::GetKanjidicStr(wxChar c) const {
98 BoostHM<wxChar,string>::iterator it = kanjiHash.find(c);
99 if(it==kanjiHash.end()) return _T("");
100 wxString s;
101 UTF8ToWx(it->second, s);
102 return ConvertKanjidicEntry(s);
106 * Performs transformations on a KANJIDIC string for our internal usage.
107 * Currently, this includes the following:
108 * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
109 * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
111 wxString Kanjidic::ConvertKanjidicEntry(const wxString& s) {
112 size_t index, lastIndex;
113 wxString temp = s;
115 /* First conversion: あ.いう to あ(いう) */
116 index = temp.find(_T('.'), 0);
117 while(index!=wxString::npos) {
118 /* Proceed if the character preceding the "." is hiragana/katakana. */
119 if(IsFurigana(temp[index-1])) {
120 temp[index] = _T('(');
121 index = temp.find(_T(' '), index+1);
122 if(index==wxString::npos) {
123 temp.append(_T(')'));
124 break;
125 } else
126 temp.insert(index, _T(')'));
128 lastIndex = index;
129 index = temp.find(_T('.'), lastIndex+1);
132 /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
133 index = temp.find(_T('-'), 0);
134 while(index!=wxString::npos) {
135 /* Proceed if the character before or after the "-" is hiragana/katakana. */
136 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
137 temp[index]=_T('〜');
139 lastIndex = index;
140 index = temp.find(_T('-'), lastIndex+1);
143 /* Return the converted string */
144 return temp;
147 wxString Kanjidic::KanjidicToHtml(const wxString& kanjidicStr) {
148 return KanjidicToHtml(kanjidicStr,
149 jben->prefs->kanjidicOptions,
150 jben->prefs->kanjidicDictionaries);
153 wxString Kanjidic::KanjidicToHtml(const wxString& kanjidicStr,
154 long options, long dictionaries) {
155 /* return wxString(_T("<p>"))
156 .append(s[0])
157 .append(_T("</p>"));*/
159 wxString result;
161 wxString header, onyomi, kunyomi, nanori, radicalReading, english;
162 wxString dictionaryInfo;
163 wxString lowRelevance;
164 wxString unhandled;
165 long grade = -1, frequency = -1, tmode = 0;
166 wxString strokes;
167 wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
168 wxString sTemp, token;
169 wxStringTokenizer t(kanjidicStr, _T(' '));
170 wxChar c, c2;
172 /* Special processing for the first 2 entries of the line. */
173 if(t.CountTokens()>1) {
174 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
175 /*header.append(_T("<p style=\"font-size:32pt\">")) */
176 header.append(_T("<p><font size=\"7\">"))
177 .append(t.GetNextToken())
178 .append(_T("</font></p>"));
179 lowRelevance.append(_T("<li>JIS code: 0x"))
180 .append(t.GetNextToken())
181 .append(_T("</li>"));
184 /* NEW! Temporary code for loading in SODs and SODAs from KanjiCafe! */
185 if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
186 wxCSConv transcoder(_T("utf-8"));
187 if(transcoder.IsOk()) {
188 string utfStr;
189 /* Get a UTF8-encoded string for the kanji. */
190 WxToUTF8(kanjidicStr[0], utfStr);
191 sTemp.clear();
193 /* Convert to a low-to-high-byte hex string. */
194 for(unsigned int i=0;i<utfStr.length();i++) {
195 sTemp.Append(
196 wxString::Format(_T("%02x"),
197 (unsigned char)utfStr[i]));
200 wxString sod;
201 /* Load static SOD, if present */
202 if((options & KDO_SOD_STATIC) != 0) {
203 wxFileName fn;
204 fn.AppendDir(_T("sods"));
205 fn.AppendDir(_T("sod-utf8-hex"));
206 fn.SetName(sTemp);
207 fn.SetExt(_T("png"));
208 #ifdef DEBUG
209 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
210 #endif
211 if(wxFile::Exists(
212 fn.GetFullPath()
213 )) {
214 sod.append(wxString::Format(
215 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
219 /* Load animated SOD, if present */
220 if((options & KDO_SOD_ANIM) != 0) {
221 wxFileName fn;
222 fn.AppendDir(_T("sods"));
223 fn.AppendDir(_T("soda-utf8-hex"));
224 fn.SetName(sTemp);
225 fn.SetExt(_T("gif"));
226 #ifdef DEBUG
227 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
228 #endif
229 if(wxFile::Exists(
230 fn.GetFullPath()
231 )) {
232 if(sod.length()>0) sod.append(_T("<br />"));
233 sod.append(wxString::Format(
234 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
238 /* Append the chart(s) in a paragraph object. */
239 if(sod.length()>0) {
240 header.append(wxString::Format(
241 _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
244 } else {
245 fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
248 /* END OF EXPERIMENTAL NEW CODE */
250 while(t.HasMoreTokens()) {
251 token = t.GetNextToken();
252 sTemp = token;
253 c = sTemp[0];
254 /* If a preceding character is detected, strip it */
255 if(c == _T('(') || c == _T('〜')) {
256 sTemp = sTemp.substr(1);
257 c = sTemp[0];
259 if(tmode==0) {
260 if(IsKatakana(c)) {
261 /* Onyomi reading detected */
262 /*if(onyomi.length()>0) onyomi.append(_T(" ")); */
263 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
264 onyomi.append(token); /* Copy the original string, including ()'s and 〜's */
265 continue;
267 else if(IsHiragana(c)) {
268 /* Kunyomi reading detected */
269 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
270 kunyomi.append(token); /* Copy the original string, including ()'s and 〜's */
271 continue;
273 } else if(tmode==1) {
274 if(IsFurigana(c)) {
275 /* Nanori reading detected */
276 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
277 nanori.append(token); /* Copy the original string, including ()'s and 〜's */
278 continue;
280 } else if(tmode==2) {
281 if(IsFurigana(c)) {
282 /* Special radical reading detected */
283 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
284 radicalReading.append(token);
285 continue;
288 if(c == _T('{')) {
289 /* English meaning detected
290 Special handling is needed to take care of spaces, though.
291 We'll "cheat" and mess with our iterator a bit if a space is detected. */
292 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
293 sTemp.append(_T(" ")).append(t.GetNextToken());
295 if(english.length()>0) english.append(_T(", "));
296 english.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
298 else {
299 switch(c) {
300 case _T('T'): /* Change "t mode" */
301 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
302 So, by making a copy of wxString and performing the conversion in the copy, we get around this.
303 This ugly kludge is repeated twice below for frequency and grade level. */
304 wxString(sTemp.substr(1)).ToLong(&tmode);
305 #ifdef DEBUG
306 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
307 #endif
308 break;
309 case _T('B'): /* Bushu radical */
310 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
311 break;
312 case _T('C'): /* Classical radical */
313 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
314 break;
315 case _T('F'): /* Frequency */
316 wxString(sTemp.substr(1)).ToLong(&frequency);
317 break;
318 case _T('G'): /* Grade level */
319 wxString(sTemp.substr(1)).ToLong(&grade);
320 break;
321 case _T('S'): /* Stroke count */
322 if(strokes.length()==0) {
323 strokes = sTemp.substr(1);
324 } else if(!strokes.find(_T(' '))!=wxString::npos) {
325 strokes.append(_T(" (Miscounts: "))
326 .append(sTemp.substr(1))
327 .append(_T(")"));
328 } else {
329 strokes = strokes.substr(0, strokes.length()-1)
330 .append(_T(", "))
331 .append(sTemp.substr(1))
332 .append(_T(")"));
334 break;
335 case _T('U'): /* Unicode value */
336 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
337 break;
338 /* From here, it's all dictionary codes */
339 case _T('H'):
340 if((dictionaries & KDD_NJECD)!=0)
341 dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
342 .append(sTemp.substr(1)).append(_T("</li>"));
343 break;
344 case _T('N'):
345 if((dictionaries & KDD_MRJECD)!=0)
346 dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
347 .append(sTemp.substr(1)).append(_T("</li>"));
348 break;
349 case _T('V'):
350 if((dictionaries & KDD_NNJECD)!=0)
351 dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
352 .append(sTemp.substr(1)).append(_T("</li>"));
353 break;
354 case _T('P'):
355 /* SKIP codes. */
356 /* This is a thorny issue. If we want to include a stock KANJIDIC, then we */
357 /* need to add encryption to the file and prevent copy/pasting of that data. */
358 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
359 #ifdef USE_SKIP
360 if((dictionaries & KDD_SKIP)!=0)
361 dictionaryInfo.append(_T("<li>SKIP code: "))
362 .append(sTemp.substr(1)).append(_T("</li>"));
363 #endif
364 break;
365 case _T('I'): /* Spahn/Hadamitzky dictionaries */
366 if(sTemp[1]==_T('N')) {
367 if((dictionaries & KDD_KK)!=0) {
368 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
369 .append(sTemp.substr(2)).append(_T("</li>"));
371 } else {
372 if((dictionaries & KDD_KD)!=0) {
373 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
374 .append(sTemp.substr(1)).append(_T("</li>"));
377 break;
378 case _T('Q'):
379 if((dictionaries & KDD_FC)!=0) {
380 dictionaryInfo.append(_T("<li>Four Corner code: "))
381 .append(sTemp.substr(1)).append(_T("</li>"));
383 break;
384 case _T('M'):
385 c2 = sTemp[1];
386 if(c2==_T('N')) {
387 if((dictionaries & KDD_MOROI)!=0) {
388 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
389 .append(sTemp.substr(2)).append(_T("</li>"));
391 } else if(c2==_T('P')) {
392 if((dictionaries & KDD_MOROVP)!=0) {
393 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
394 .append(sTemp.substr(2)).append(_T("</li>"));
397 break;
398 case _T('E'):
399 if((dictionaries & KDD_GRJC)!=0) {
400 dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
401 .append(sTemp.substr(1)).append(_T("</li>"));
403 break;
404 case _T('K'):
405 if((dictionaries & KDD_GKD)!=0) {
406 dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
407 .append(sTemp.substr(1)).append(_T("</li>"));
409 break;
410 case _T('L'):
411 if((dictionaries & KDD_RTK)!=0) {
412 dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
413 .append(sTemp.substr(1)).append(_T("</li>"));
415 break;
416 case _T('O'):
417 if((dictionaries & KDD_JN)!=0) {
418 dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
419 .append(sTemp.substr(1)).append(_T("</li>"));
421 break;
422 case _T('D'):
423 c2 = sTemp[1];
424 switch(c2) {
425 case _T('B'):
426 if((dictionaries & KDD_JBP)!=0) {
427 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
428 .append(sTemp.substr(2)).append(_T("</li>"));
430 break;
431 case _T('C'):
432 if((dictionaries & KDD_KWJLP)!=0) {
433 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
434 .append(sTemp.substr(2)).append(_T("</li>"));
436 break;
437 case _T('F'):
438 if((dictionaries & KDD_JKF)!=0) {
439 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
440 .append(sTemp.substr(2)).append(_T("</li>"));
442 break;
443 case _T('G'):
444 if((dictionaries & KDD_KCKG)!=0) {
445 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
446 .append(sTemp.substr(2)).append(_T("</li>"));
448 break;
449 case _T('H'):
450 if((dictionaries & KDD_GTRWJH)!=0) {
451 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
452 .append(sTemp.substr(2)).append(_T("</li>"));
454 break;
455 case _T('J'):
456 if((dictionaries & KDD_KIC)!=0) {
457 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
458 .append(sTemp.substr(2)).append(_T("</li>"));
460 break;
461 case _T('K'):
462 if((dictionaries & KDD_KLD)!=0) {
463 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
464 .append(sTemp.substr(2)).append(_T("</li>"));
466 break;
467 case _T('O'):
468 if((dictionaries & KDD_EK)!=0) {
469 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
470 .append(sTemp.substr(2)).append(_T("</li>"));
472 break;
473 case _T('R'):
474 if((dictionaries & KDD_DR)!=0) {
475 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
476 .append(sTemp.substr(2)).append(_T("</li>"));
478 break;
479 case _T('S'):
480 if((dictionaries & KDD_GTRWJS)!=0) {
481 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
482 .append(sTemp.substr(2)).append(_T("</li>"));
484 break;
485 case _T('T'):
486 if((dictionaries & KDD_TKC)!=0) {
487 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
488 .append(sTemp.substr(2)).append(_T("</li>"));
490 break;
491 default:
492 if(unhandled.length()>0) unhandled.append(_T(" "));
493 unhandled.append(sTemp);
494 break;
496 break;
497 /* Crossreferences and miscodes */
498 case _T('X'):
499 if(crossReferences.length()>0) crossReferences.append(_T(", "));
500 crossReferences.append(sTemp.substr(1));
501 break;
502 case _T('Z'):
503 if(miscodes.length()>0) miscodes.append(_T(", "));
504 miscodes.append(sTemp.substr(1));
505 break;
506 /* Korean/Pinyin (Chinese) romanization */
507 case _T('W'):
508 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
509 koreanRomanization.append(sTemp.substr(1));
510 break;
511 case _T('Y'):
512 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
513 pinyinRomanization.append(sTemp.substr(1));
514 break;
515 default:
516 if(unhandled.length()>0) unhandled.append(_T(" "));
517 unhandled.append(sTemp);
518 break;
521 } /* while(t.HasMoreTokens()) */
523 if(header.length() > 0) result.append(header);
524 #ifdef DEBUG
525 printf("DEBUG: header=[%ls]\n", header.c_str());
526 #endif
527 result.append(_T("<ul>"));
528 if((options & KDO_READINGS) != 0) {
529 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
530 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
531 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
532 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
534 if((options & KDO_MEANINGS) != 0) {
535 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
537 if((options & KDO_HIGHIMPORTANCE) != 0) {
538 if(strokes.length() > 0)
539 result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
540 else
541 result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
542 result.append(_T("<li>Grade Level: "));
543 if(grade<=6 && grade >= 1) { /* Jouyou (Grade #) */
544 result.append(_T("Jouyou (Grade "))
545 .append(wxString::Format(_T("%d"), (int)grade))
546 .append(_T(")"));
547 } else if(grade==8) { /* Jouyou (General usage) */
548 result.append(_T("Jouyou (General usage)"));
549 } else if(grade==9) { /* Jinmeiyou (Characters for names) */
550 result.append(_T("Jinmeiyou (Characters for names)"));
551 } else if(grade==-1) { /* No flag specified in kanjidic string */
552 result.append(_T("Unspecified"));
553 } else {
554 result.append(_T("Unhandled grade level (Grade "))
555 .append(wxString::Format(_T("%d"), (int)grade))
556 .append(_T(")"));
558 result.append(_T("</li>"));
559 if(frequency!=-1)
560 result.append(_T("<li>Frequency Ranking: "))
561 .append(wxString::Format(_T("%d"), (int)frequency))
562 .append(_T("</li>"));
563 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
565 if((options & KDO_DICTIONARIES) != 0) {
566 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
568 if((options & KDO_VOCABCROSSREF) != 0) {
569 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
570 wxChar thisKanji = kanjidicStr[0];
571 vector<wxString> crossRefList;
572 vector<wxString>::iterator vIt;
573 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
574 if(vIt->find(thisKanji)!=wxString::npos) {
575 crossRefList.push_back(*vIt);
578 if(crossRefList.size()>0) {
579 result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
580 vIt = crossRefList.begin();
581 result.append(*vIt);
582 for(++vIt; vIt!=crossRefList.end(); vIt++) {
583 result.append(_T("&nbsp; ")).append(*vIt);
585 result.append(_T("</font></li>"));
588 if((options & KDO_LOWIMPORTANCE) != 0) {
589 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
590 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
591 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
592 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
593 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
595 if((options & KDO_UNHANDLED) != 0) {
596 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
598 result.append(_T("</ul>"));
600 return result;
603 int Kanjidic::GetIntField(wxChar kanji, const wxString& marker) const {
604 wxString markerStr, kanjiEntry, temp;
605 size_t index=0;
606 long value=-1;
607 int markerLen;
609 markerStr.Printf(_T(" %s"), marker.c_str());
610 markerLen=markerStr.length();
612 kanjiEntry = GetKanjidicStr(kanji);
613 if(kanjiEntry.length()>0) {
614 index = kanjiEntry.find(markerStr);
615 if(index!=wxString::npos) {
616 temp = kanjiEntry.substr(
617 index+markerLen,
618 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
619 temp.ToLong(&value);
623 return (int)value;
626 const BoostHM<wxChar,string>* const Kanjidic::GetHashTable() const {
627 return &kanjiHash;
630 enum {
631 KDR_Onyomi=1,
632 KDR_Kunyomi,
633 KDR_English
636 wxString Kanjidic::GetOnyomiStr(wxChar c) const {
637 return GetKanjidicReading(c, KDR_Onyomi);
640 wxString Kanjidic::GetKunyomiStr(wxChar c) const {
641 return GetKanjidicReading(c, KDR_Kunyomi);
644 wxString Kanjidic::GetEnglishStr(wxChar c) const {
645 return GetKanjidicReading(c, KDR_English);
648 wxString Kanjidic::GetKanjidicReading(wxChar c, int readingType) const {
649 wxString result;
650 wxString kanjidicStr = GetKanjidicStr(c);
652 long tmode = 0;
653 wxString sTemp, token;
654 wxStringTokenizer t(kanjidicStr, _T(' '));
656 /* The first two tokens are guaranteed not to be what we're looking for. Skip them. */
657 if(t.CountTokens()>1) {
658 t.GetNextToken();
659 t.GetNextToken();
661 while(t.HasMoreTokens()) {
662 token = t.GetNextToken();
663 sTemp = token;
664 c = sTemp[0];
665 /* If a preceding character is detected, strip it */
666 if(c == _T('(') || c == _T('〜')) {
667 sTemp = sTemp.substr(1);
668 c = sTemp[0];
670 if(tmode==0) {
671 if(IsKatakana(c) && readingType==KDR_Onyomi) {
672 /* Onyomi reading detected */
673 if(result.length()>0) result.append(_T(" "));
674 result.append(token); /* Copy the original string, including ()'s and 〜's */
675 continue;
677 else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
678 /* Kunyomi reading detected */
679 if(result.length()>0) result.append(_T(" "));
680 result.append(token); /* Copy the original string, including ()'s and 〜's */
681 continue;
684 if(c == _T('{') && readingType==KDR_English) {
685 /* English meaning detected
686 Special handling is needed to take care of spaces, though.
687 We'll "cheat" and mess with our iterator a bit if a space is detected. */
688 while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
689 sTemp.append(_T(" ")).append(t.GetNextToken());
691 if(result.length()>0) result.append(_T(", "));
692 result.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
694 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
697 return result;