Renamed kpengine to jben_kpengine and made its data dir relocatable.
[jben.git] / kdict.cpp
blob368eec4afbad0ec862b8fea14f21d3034bea466b
1 /*
2 Project: J-Ben
3 Author: Paul Goins
4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
8 File: kanjidic.cpp
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
24 #include "jben.h"
25 #include "kdict.h"
26 #include "file_utils.h"
27 #include "jutils.h"
28 #include "encoding_convert.h"
29 #include "string_utils.h"
30 #include "errorlog.h"
31 #include <sstream>
32 #include <iomanip>
33 #include <fstream>
34 #include <list>
35 using namespace std;
37 KDict* KDict::kdictSingleton = NULL;
39 const KDict *KDict::Get() {
40 if(!kdictSingleton)
41 kdictSingleton = new KDict;
42 return kdictSingleton;
45 KDict::KDict() {
46 Preferences *p = Preferences::Get();
47 LoadKanjidic(p->GetSetting("kdict_kanjidic").c_str());
48 LoadKradfile(p->GetSetting("kdict_kradfile").c_str());
49 LoadRadkfile(p->GetSetting("kdict_radkfile").c_str());
52 void KDict::Destroy() {
53 if(kdictSingleton) {
54 delete kdictSingleton;
55 kdictSingleton = NULL;
59 int KDict::LoadKanjidic(const char *filename) {
60 char *rawData = NULL;
61 unsigned int size;
62 int returnCode=KD_FAILURE;
64 ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
65 if(ifile) {
66 size = ifile.tellg();
67 ifile.seekg(0);
68 rawData = new char[size+1];
69 rawData[size] = '\0';
70 ifile.read(rawData, size);
71 if(strlen(rawData)!=size) {
72 ostringstream os;
73 os << "kanjidic file size: "
74 << strlen(rawData)
75 << ", read-in string: "
76 << size << "\n";
77 el.Push(EL_Warning, os.str());
80 /* Create the kanjidic object with our string data. */
81 this->KanjidicParser(rawData);
83 returnCode = KD_SUCCESS;
85 else
86 returnCode = KD_FAILURE;
88 if(rawData) delete[] rawData;
89 return returnCode;
92 int KDict::LoadKradfile(const char *filename) {
93 int returnCode = KD_FAILURE;
94 stringbuf sb;
95 ifstream f(filename, ios::in|ios::binary);
96 if(f.is_open()) {
97 f >> &sb;
98 f.close();
100 list<wstring> data =
101 StrTokenize<wchar_t>(utfconv_mw(sb.str()), L"\n");
102 while(data.size()>0) {
103 wstring token = data.front();
104 data.pop_front();
105 if(token.length()>0 && token[0]!=L'#') {
106 /* KRADFILE-specific stuff here */
107 /* Get rid of the spaces in the string */
108 token = TextReplace<wchar_t>(token, L" ", L"");
109 /* Now we can easily pull in the data */
110 if(!kradData.assign(token[0], token.substr(2))) {
111 ostringstream os;
112 os << "KRADFILE: Error assigning ("
113 << utfconv_wm(token.substr(0,1))
114 << ", "
115 << utfconv_wm(token.substr(2))
116 << ") to hash table!\n";
117 el.Push(EL_Error, os.str());
122 returnCode = KD_SUCCESS;
124 return returnCode;
127 int KDict::LoadRadkfile(const char *filename) {
128 int returnCode = KD_FAILURE;
129 stringbuf sb;
130 ifstream f(filename, ios::in|ios::binary);
131 if(f.is_open()) {
132 f >> &sb;
133 f.close();
135 /* RADKFILE entries all start with $.
136 Split on $, and discard the first entry since it is the explanation
137 preceding the first entry. */
138 list<wstring> data =
139 StrTokenize<wchar_t>(utfconv_mw(sb.str()), L"$");
140 data.pop_front();
142 while(data.size()>0) {
143 wstring entry = data.front();
144 data.pop_front();
145 if(entry.length()>0 && entry[0]!=L'#') {
146 /* RADKFILE-specific stuff here */
147 list<wstring> entryData =
148 StrTokenize<wchar_t>(entry, L"\n", false, 2);
149 if(entryData.size()!=2) {
150 cerr << "Error: entryData.size() == "
151 << entryData.size() << "!!" << endl;
152 } else {
153 wchar_t key;
154 int strokeCount;
155 wstring value;
156 /* entryData.front() contains our key.
157 It's a space delimited string,
158 first token is our kanji, second is the stroke count.
159 A third token may be present, but is irrelevant. */
160 list<wstring> keyData =
161 StrTokenize<wchar_t>(entryData.front(), L" ");
162 wistringstream wiss;
163 wiss.str(keyData.front());
164 wiss >> key;
165 keyData.pop_front();
166 wiss.str(keyData.front());
167 wiss >> strokeCount;
169 /* entryData.back() contains the characters our key
170 maps to. */
171 /* Get rid of the spaces in the string */
172 value = entryData.back();
173 value = TextReplace<wchar_t>(value, L"\n", L"");
174 value = TextReplace<wchar_t>(value, L" ", L"");
176 if(!radkData.assign(key, value)) {
177 ostringstream os;
178 os << "RADKFILE: Error assigning ("
179 << utfconv_wm(wstring().append(1,key))
180 << ", "
181 << utfconv_wm(value)
182 << ") to hash table!\n";
183 el.Push(EL_Error, os.str());
185 if(!radkDataStrokes.assign(key, strokeCount)) {
186 ostringstream os;
187 os << "RADKFILE: Error assigning ("
188 << utfconv_wm(wstring().append(1,key))
189 << ", " << strokeCount << ") to hash table!\n";
190 el.Push(EL_Error, os.str());
196 returnCode = KD_SUCCESS;
198 return returnCode;
201 /* This could be sped up: copy the first UTF-8 character into a string, then
202 run a conversion on that. Trivial though. */
203 void KDict::KanjidicParser(char *kanjidicRawData) {
204 char *token = strtok(kanjidicRawData, "\n");
205 wstring wToken;
206 while(token) {
207 if( (strlen(token)>0) && (token[0]!='#') ) {
208 wToken = utfconv_mw(token);
209 /* Convert token to proper format */
210 wToken = ConvertKanjidicEntry(wToken);
211 /* Add to hash table */
212 if(!kanjidicData.assign(wToken[0], token)) {
213 ostringstream os;
214 string temp = utfconv_wm(wToken);
215 os << "Error assigning (" << temp[0]
216 << ", " << temp << ") to hash table!\n";
217 el.Push(EL_Error, os.str());
220 token = strtok(NULL, "\n");
224 KDict::~KDict() {
225 /* Currently: nothing here. */
228 /* This function returns a wstring containing the desired line of the
229 kanjidic hash table. A conversion from string to wstring is included
230 in this call since standardstrings are only used for more compressed
231 internal storage. This is followed by a slight reformatting of the
232 string for better presentation. */
233 wstring KDict::GetKanjidicStr(wchar_t c) const {
234 BoostHM<wchar_t,string>::iterator it = kanjidicData.find(c);
235 if(it==kanjidicData.end()) return L"";
236 wstring s;
237 s = utfconv_mw(it->second);
238 return ConvertKanjidicEntry(s);
242 * Performs transformations on a KANJIDIC string for our internal usage.
243 * Currently, this includes the following:
244 * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
245 * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
247 wstring KDict::ConvertKanjidicEntry(const wstring& s) {
248 size_t index, lastIndex;
249 wstring temp = s;
251 /* First conversion: あ.いう to あ(いう) */
252 index = temp.find(L'.', 0);
253 while(index!=wstring::npos) {
254 /* Proceed if the character preceding the "." is hiragana/katakana. */
255 if(IsFurigana(temp[index-1])) {
256 temp[index] = L'(';
257 index = temp.find(L' ', index+1);
258 if(index==wstring::npos) {
259 temp.append(1, L')');
260 break;
261 } else
262 temp.insert(index, 1, L')');
264 lastIndex = index;
265 index = temp.find(L'.', lastIndex+1);
268 /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
269 index = temp.find(L'-', 0);
270 while(index!=wstring::npos) {
271 /* Proceed if the character before or after the "-" is hiragana/katakana. */
272 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
273 temp[index]=L'〜';
275 lastIndex = index;
276 index = temp.find(L'-', lastIndex+1);
279 /* Return the converted string */
280 return temp;
283 wstring KDict::KanjidicToHtml(const wstring& kanjidicStr) {
284 Preferences *prefs = Preferences::Get();
285 return KanjidicToHtml(kanjidicStr,
286 prefs->kanjidicOptions,
287 prefs->kanjidicDictionaries);
290 wstring KDict::KanjidicToHtml(const wstring& kanjidicStr,
291 long options, long dictionaries) {
292 /* return wstring(L"<p>")
293 .append(s[0])
294 .append(L"</p>");*/
296 wostringstream result;
297 wostringstream header;
298 wstring onyomi, kunyomi, nanori, radicalReading, english;
299 wstring dictionaryInfo;
300 wstring lowRelevance;
301 wstring unhandled;
302 long grade = -1, frequency = -1, tmode = 0;
303 wstring strokes;
304 wstring koreanRomanization, pinyinRomanization, crossReferences, miscodes;
305 wstring sTemp, token;
306 list<wstring> t = StrTokenize<wchar_t>(kanjidicStr, L" ");
307 wchar_t c, c2;
309 /* Special processing for the first 2 entries of the line. */
310 if(t.size()>1) {
311 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
312 /*header.append(L"<p style=\"font-size:32pt\">") */
313 header << L"<p><font size=\"7\">" << t.front() << L"</font></p>";
314 t.pop_front();
315 lowRelevance.append(L"<li>JIS code: 0x")
316 .append(t.front())
317 .append(L"</li>");
318 t.pop_front();
321 /* NEW! Temporary code for loading in SODs and SODAs from KanjiCafe! */
322 if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
323 string utfStr;
324 /* Get a UTF8-encoded string for the kanji. */
325 utfStr = utfconv_wm(kanjidicStr.substr(0,1));
327 /* Convert to a low-to-high-byte hex string. */
328 ostringstream ss;
329 for(unsigned int i=0;i<utfStr.length();i++) {
330 ss << hex << setw(2) << setfill('0')
331 << (unsigned int)((unsigned char)utfStr[i]);
334 wstringstream sod;
335 /* Load static SOD, if present */
336 if((options & KDO_SOD_STATIC) != 0) {
337 ostringstream fn;
338 fn << "sods" << DIRSEP
339 << "sod-utf8-hex" << DIRSEP
340 << ss.str() << ".png";
341 #ifdef DEBUG
342 printf("DEBUG: Checking for existance of file \"%s\"...\n", fn.str().c_str());
343 #endif
344 ifstream f(fn.str().c_str());
345 if(f.is_open()) {
346 f.close();
347 if(sod.str().length()>0) sod << L"<br />";
348 sod << L"<img src=\"" << utfconv_mw(fn.str()) << L"\" />";
351 /* Load animated SOD, if present */
352 if((options & KDO_SOD_ANIM) != 0) {
353 ostringstream fn;
354 fn << "sods" << DIRSEP
355 << "soda-utf8-hex" << DIRSEP
356 << ss.str() << ".gif";
357 #ifdef DEBUG
358 printf("DEBUG: Checking for existance of file \"%s\"...\n", fn.str().c_str());
359 #endif
360 ifstream f(fn.str().c_str());
361 if(f.is_open()) {
362 f.close();
363 if(sod.str().length()>0) sod << L"<br />";
364 sod << L"<img src=\"" << utfconv_mw(fn.str()) << L"\" />";
367 /* Append the chart(s) in a paragraph object. */
368 if(sod.str().length()>0) {
369 header << L"<p>" << sod.str() <<
370 L"<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>";
374 while(t.size()>0) {
375 token = t.front();
376 t.pop_front();
377 sTemp = token;
378 c = sTemp[0];
379 /* If a preceding character is detected, strip it */
380 if(c == L'(' || c == L'〜') {
381 sTemp = sTemp.substr(1);
382 c = sTemp[0];
384 if(tmode==0) {
385 if(IsKatakana(c)) {
386 /* Onyomi reading detected */
387 /*if(onyomi.length()>0) onyomi.append(L" "); */
388 if(onyomi.length()>0) onyomi.append(L"&nbsp; ");
389 onyomi.append(token); /* Copy the original string, including ()'s and 〜's */
390 continue;
392 else if(IsHiragana(c)) {
393 /* Kunyomi reading detected */
394 if(kunyomi.length()>0) kunyomi.append(L"&nbsp; ");
395 kunyomi.append(token); /* Copy the original string, including ()'s and 〜's */
396 continue;
398 } else if(tmode==1) {
399 if(IsFurigana(c)) {
400 /* Nanori reading detected */
401 if(nanori.length()>0) nanori.append(L"&nbsp; ");
402 nanori.append(token); /* Copy the original string, including ()'s and 〜's */
403 continue;
405 } else if(tmode==2) {
406 if(IsFurigana(c)) {
407 /* Special radical reading detected */
408 if(radicalReading.length()>0) radicalReading.append(L"&nbsp; ");
409 radicalReading.append(token);
410 continue;
413 if(c == L'{') {
414 /* English meaning detected
415 Special handling is needed to take care of spaces, though.
416 We'll "cheat" and mess with our iterator a bit if a space is detected. */
417 while(t.size()>0 && sTemp[sTemp.length()-1] != L'}') {
418 sTemp.append(L" ").append(t.front());
419 t.pop_front();
421 if(english.length()>0) english.append(L", ");
422 english.append(sTemp.substr(1,sTemp.length()-2)); /* Strip the {} */
424 else {
425 switch(c) {
426 case L'T': /* Change "t mode" */
427 /*wstring(sTemp.substr(1)).ToLong(&tmode);*/
428 wistringstream(sTemp.substr(1)) >> tmode;
429 #ifdef DEBUG
430 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
431 #endif
432 break;
433 case L'B': /* Bushu radical */
434 lowRelevance.append(L"<li>Bushu radical: ").append(sTemp.substr(1)).append(L"</li>");
435 break;
436 case L'C': /* Classical radical */
437 lowRelevance.append(L"<li>Classical radical: ").append(sTemp.substr(1)).append(L"</li>");
438 break;
439 case L'F': /* Frequency */
440 /*wstring(sTemp.substr(1)).ToLong(&frequency);*/
441 wistringstream(sTemp.substr(1)) >> frequency;
442 break;
443 case L'G': /* Grade level */
444 /*wstring(sTemp.substr(1)).ToLong(&grade);*/
445 wistringstream(sTemp.substr(1)) >> grade;
446 break;
447 case L'S': /* Stroke count */
448 if(strokes.length()==0) {
449 strokes = sTemp.substr(1);
450 } else if(!strokes.find(L' ')!=wstring::npos) {
451 strokes.append(L" (Miscounts: ")
452 .append(sTemp.substr(1))
453 .append(L")");
454 } else {
455 strokes = strokes.substr(0, strokes.length()-1)
456 .append(L", ")
457 .append(sTemp.substr(1))
458 .append(L")");
460 break;
461 case L'U': /* Unicode value */
462 lowRelevance.append(L"<li>Unicode: 0x").append(sTemp.substr(1)).append(L"</li>");
463 break;
464 /* From here, it's all dictionary codes */
465 case L'H':
466 if((dictionaries & KDD_NJECD)!=0)
467 dictionaryInfo.append(L"<li>New Japanese-English Character Dictionary (Halpern): ")
468 .append(sTemp.substr(1)).append(L"</li>");
469 break;
470 case L'N':
471 if((dictionaries & KDD_MRJECD)!=0)
472 dictionaryInfo.append(L"<li>Modern Reader's Japanese-English Character Dictionary (Nelson): ")
473 .append(sTemp.substr(1)).append(L"</li>");
474 break;
475 case L'V':
476 if((dictionaries & KDD_NNJECD)!=0)
477 dictionaryInfo.append(L"<li>The New Nelson's Japanese-English Character Dictionary: ")
478 .append(sTemp.substr(1)).append(L"</li>");
479 break;
480 case L'P':
481 /* SKIP codes. */
482 /* Thanks to changes in permissible SKIP code usage (change to
483 Creative Commons licensing in January 2008), we can now use
484 this without problems. */
485 if((dictionaries & KDD_SKIP)!=0)
486 dictionaryInfo.append(L"<li>SKIP code: ")
487 .append(sTemp.substr(1)).append(L"</li>");
488 break;
489 case L'I': /* Spahn/Hadamitzky dictionaries */
490 if(sTemp[1]==L'N') {
491 if((dictionaries & KDD_KK)!=0) {
492 dictionaryInfo.append(L"<li>Kanji & Kana (Spahn, Hadamitzky): ")
493 .append(sTemp.substr(2)).append(L"</li>");
495 } else {
496 if((dictionaries & KDD_KD)!=0) {
497 dictionaryInfo.append(L"<li>Kanji Dictionary (Spahn, Hadamitzky): ")
498 .append(sTemp.substr(1)).append(L"</li>");
501 break;
502 case L'Q':
503 if((dictionaries & KDD_FC)!=0) {
504 dictionaryInfo.append(L"<li>Four Corner code: ")
505 .append(sTemp.substr(1)).append(L"</li>");
507 break;
508 case L'M':
509 c2 = sTemp[1];
510 if(c2==L'N') {
511 if((dictionaries & KDD_MOROI)!=0) {
512 dictionaryInfo.append(L"<li>Morohashi Daikanwajiten Index: ")
513 .append(sTemp.substr(2)).append(L"</li>");
515 } else if(c2==L'P') {
516 if((dictionaries & KDD_MOROVP)!=0) {
517 dictionaryInfo.append(L"<li>Morohashi Daikanwajiten Volume/Page: ")
518 .append(sTemp.substr(2)).append(L"</li>");
521 break;
522 case L'E':
523 if((dictionaries & KDD_GRJC)!=0) {
524 dictionaryInfo.append(L"<li>A Guide to Remembering Japanese Characters (Henshal): ")
525 .append(sTemp.substr(1)).append(L"</li>");
527 break;
528 case L'K':
529 if((dictionaries & KDD_GKD)!=0) {
530 dictionaryInfo.append(L"<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): ")
531 .append(sTemp.substr(1)).append(L"</li>");
533 break;
534 case L'L':
535 if((dictionaries & KDD_RTK)!=0) {
536 dictionaryInfo.append(L"<li>Remembering the Kanji (Heisig): ")
537 .append(sTemp.substr(1)).append(L"</li>");
539 break;
540 case L'O':
541 if((dictionaries & KDD_JN)!=0) {
542 dictionaryInfo.append(L"<li>Japanese Names (O'Neill): ")
543 .append(sTemp.substr(1)).append(L"</li>");
545 break;
546 case L'D':
547 c2 = sTemp[1];
548 switch(c2) {
549 case L'B':
550 if((dictionaries & KDD_JBP)!=0) {
551 dictionaryInfo.append(L"<li>Japanese for Busy People (AJLT): ")
552 .append(sTemp.substr(2)).append(L"</li>");
554 break;
555 case L'C':
556 if((dictionaries & KDD_KWJLP)!=0) {
557 dictionaryInfo.append(L"<li>The Kanji Way to Japanese Language Power (Crowley): ")
558 .append(sTemp.substr(2)).append(L"</li>");
560 break;
561 case L'F':
562 if((dictionaries & KDD_JKF)!=0) {
563 dictionaryInfo.append(L"<li>Japanese Kanji Flashcards (White Rabbit Press): ")
564 .append(sTemp.substr(2)).append(L"</li>");
566 break;
567 case L'G':
568 if((dictionaries & KDD_KCKG)!=0) {
569 dictionaryInfo.append(L"<li>Kodansha Compact Kanji Guide: ")
570 .append(sTemp.substr(2)).append(L"</li>");
572 break;
573 case L'H':
574 if((dictionaries & KDD_GTRWJH)!=0) {
575 dictionaryInfo.append(L"<li>A Guide To Reading and Writing Japanese (Hensall): ")
576 .append(sTemp.substr(2)).append(L"</li>");
578 break;
579 case L'J':
580 if((dictionaries & KDD_KIC)!=0) {
581 dictionaryInfo.append(L"<li>Kanji in Context (Nishiguchi and Kono): ")
582 .append(sTemp.substr(2)).append(L"</li>");
584 break;
585 case L'K':
586 if((dictionaries & KDD_KLD)!=0) {
587 dictionaryInfo.append(L"<li>Kanji Learner's Dictionary (Halpern): ")
588 .append(sTemp.substr(2)).append(L"</li>");
590 break;
591 case L'O':
592 if((dictionaries & KDD_EK)!=0) {
593 dictionaryInfo.append(L"<li>Essential Kanji (O'Neill): ")
594 .append(sTemp.substr(2)).append(L"</li>");
596 break;
597 case L'R':
598 if((dictionaries & KDD_DR)!=0) {
599 dictionaryInfo.append(L"<li>2001 Kanji (De Roo): ")
600 .append(sTemp.substr(2)).append(L"</li>");
602 break;
603 case L'S':
604 if((dictionaries & KDD_GTRWJS)!=0) {
605 dictionaryInfo.append(L"<li>A Guide to Reading and Writing Japanese (Sakade): ")
606 .append(sTemp.substr(2)).append(L"</li>");
608 break;
609 case L'T':
610 if((dictionaries & KDD_TKC)!=0) {
611 dictionaryInfo.append(L"<li>Tuttle Kanji Cards (Kask): ")
612 .append(sTemp.substr(2)).append(L"</li>");
614 break;
615 default:
616 if(unhandled.length()>0) unhandled.append(L" ");
617 unhandled.append(sTemp);
618 break;
620 break;
621 /* Crossreferences and miscodes */
622 case L'X':
623 if(crossReferences.length()>0) crossReferences.append(L", ");
624 crossReferences.append(sTemp.substr(1));
625 break;
626 case L'Z':
627 if(miscodes.length()>0) miscodes.append(L", ");
628 miscodes.append(sTemp.substr(1));
629 break;
630 /* Korean/Pinyin (Chinese) romanization */
631 case L'W':
632 if(koreanRomanization.length()>0) koreanRomanization.append(L", ");
633 koreanRomanization.append(sTemp.substr(1));
634 break;
635 case L'Y':
636 if(pinyinRomanization.length()>0) pinyinRomanization.append(L", ");
637 pinyinRomanization.append(sTemp.substr(1));
638 break;
639 default:
640 if(unhandled.length()>0) unhandled.append(L" ");
641 unhandled.append(sTemp);
642 break;
645 } /* while(t.HasMoreTokens()) */
647 if(header.str().length() > 0) result << header.str();
648 #ifdef DEBUG
649 printf("DEBUG: header=[%ls]\n", header.str().c_str());
650 #endif
651 result << L"<ul>";
652 if((options & KDO_READINGS) != 0) {
653 if(onyomi.length() > 0)
654 result << L"<li>Onyomi Readings: " << onyomi << L"</li>";
655 if(kunyomi.length() > 0)
656 result << L"<li>Kunyomi Readings: " << kunyomi << L"</li>";
657 if(nanori.length() > 0)
658 result << L"<li>Nanori Readings: " << nanori << L"</li>";
659 if(radicalReading.length() > 0)
660 result << L"<li>Special Radical Reading: " << radicalReading <<
661 L"</li>";
663 if((options & KDO_MEANINGS) != 0) {
664 if(english.length() > 0)
665 result << L"<li>English Meanings: " << english << L"</li>";
667 if((options & KDO_HIGHIMPORTANCE) != 0) {
668 if(strokes.length() > 0)
669 result << L"<li>Stroke count: " << strokes << L"</li>";
670 else
671 result << L"<li>Stroke count: not specified in KANJIDIC</li>";
672 result << L"<li>Grade Level: ";
673 if(grade<=6 && grade >= 1) { /* Jouyou (Grade #) */
674 result << L"Jouyou (Grade " << grade << L")";
675 } else if(grade==8) { /* Jouyou (General usage) */
676 result << L"Jouyou (General usage)";
677 } else if(grade==9) { /* Jinmeiyou (Characters for names) */
678 result << L"Jinmeiyou (Characters for names)";
679 } else if(grade==-1) { /* No flag specified in kanjidic string */
680 result << L"Unspecified";
681 } else {
682 result << L"Unhandled grade level (Grade " << grade << L")";
684 result << L"</li>";
685 if(frequency!=-1)
686 result << L"<li>Frequency Ranking: " << frequency << L"</li>";
687 else result << L"<li>Frequency Ranking: Unspecified</li>";
689 if((options & KDO_DICTIONARIES) != 0) {
690 if(dictionaryInfo.length()>0)
691 result << L"<li>Dictionary Codes:<ul>" << dictionaryInfo
692 << L"</ul></li>";
694 if((options & KDO_VOCABCROSSREF) != 0) {
695 vector<wstring> *vList = &(jben->vocabList->GetVocabList());
696 wchar_t thisKanji = kanjidicStr[0];
697 vector<wstring> crossRefList;
698 vector<wstring>::iterator vIt;
699 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
700 if(vIt->find(thisKanji)!=wstring::npos) {
701 crossRefList.push_back(*vIt);
704 if(crossRefList.size()>0) {
705 result << L"<li>This kanji is used by words in your study list:<br><font size=\"7\">";
706 vIt = crossRefList.begin();
707 result << *vIt;
708 for(++vIt; vIt!=crossRefList.end(); vIt++) {
709 result << L"&nbsp; " << *vIt;
711 result << L"</font></li>";
714 if((options & KDO_LOWIMPORTANCE) != 0) {
715 if(koreanRomanization.length()>0) lowRelevance.append(L"<li>Korean romanization: ").append(koreanRomanization).append(L"</li>");
716 if(pinyinRomanization.length()>0) lowRelevance.append(L"<li>Pinyin romanization: ").append(pinyinRomanization).append(L"</li>");
717 if(crossReferences.length()>0) lowRelevance.append(L"<li>Cross reference codes: ").append(crossReferences).append(L"</li>");
718 if(miscodes.length()>0) lowRelevance.append(L"<li>Miscodes: ").append(miscodes).append(L"</li>");
719 if(lowRelevance.length()>0)
720 result << L"<li>Extra Information:<ul>" << lowRelevance
721 << L"</ul></li>";
723 if((options & KDO_UNHANDLED) != 0) {
724 if(unhandled.length()>0)
725 result << L"<li>Unhandled: " << unhandled << L"</li>";
727 result << L"</ul>";
729 return result.str();
732 int KDict::GetIntField(wchar_t kanji, const wstring& marker) const {
733 wstring markerStr, kanjiEntry, temp;
734 size_t index=0;
735 long value=-1;
736 int markerLen;
738 markerStr.append(L" ").append(marker);
739 markerLen=markerStr.length();
741 kanjiEntry = GetKanjidicStr(kanji);
742 if(kanjiEntry.length()>0) {
743 index = kanjiEntry.find(markerStr);
744 if(index!=wstring::npos) {
745 temp = kanjiEntry.substr(
746 index+markerLen,
747 kanjiEntry.find(L" ", index+1) - index - (markerLen-1));
748 /*temp.ToLong(&value);*/
749 wistringstream(temp) >> value;
753 return (int)value;
756 const BoostHM<wchar_t,string>* KDict::GetHashTable() const {
757 return &kanjidicData;
760 enum {
761 KDR_Onyomi=1,
762 KDR_Kunyomi,
763 KDR_English
766 wstring KDict::GetOnyomiStr(wchar_t c) const {
767 return GetKanjidicReading(c, KDR_Onyomi);
770 wstring KDict::GetKunyomiStr(wchar_t c) const {
771 return GetKanjidicReading(c, KDR_Kunyomi);
774 wstring KDict::GetEnglishStr(wchar_t c) const {
775 return GetKanjidicReading(c, KDR_English);
778 wstring KDict::GetKanjidicReading(wchar_t c, int readingType) const {
779 wostringstream result;
780 wstring kanjidicStr = GetKanjidicStr(c);
782 long tmode = 0;
783 wstring sTemp, token;
784 list<wstring> t = StrTokenize<wchar_t>(kanjidicStr, L" ");
786 /* The first two tokens are guaranteed not to be what we're looking for. Skip them. */
787 if(t.size()>1) {
788 t.pop_front();
789 t.pop_front();
791 while(t.size()>0) {
792 token = t.front();
793 t.pop_front();
794 sTemp = token;
795 c = sTemp[0];
796 /* If a preceding character is detected, strip it */
797 if(c == L'(' || c == L'〜') {
798 sTemp = sTemp.substr(1);
799 c = sTemp[0];
801 if(tmode==0) {
802 if(IsKatakana(c) && readingType==KDR_Onyomi) {
803 /* Onyomi reading detected */
804 if(result.str().length()>0) result << L" ";
805 result << token; /* Copy the original string,
806 including ()'s and 〜's */
807 continue;
809 else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
810 /* Kunyomi reading detected */
811 if(result.str().length()>0) result << L" ";
812 result << token; /* Copy the original string,
813 including ()'s and 〜's */
814 continue;
817 if(c == L'{' && readingType==KDR_English) {
818 /* English meaning detected
819 Special handling is needed to take care of spaces, though.
820 We'll "cheat" and mess with our iterator a bit if a space is detected. */
821 while(t.size()>0 && sTemp[sTemp.length()-1] != L'}') {
822 sTemp.append(L" ").append(t.front());
823 t.pop_front();
825 if(result.str().length()>0) result << L", ";
826 result << sTemp.substr(1,sTemp.length()-2); /* Strip the {} */
828 else if(c==L'T') {
829 /*wstring(sTemp.substr(1)).ToLong(&tmode);*/
830 wistringstream(sTemp.substr(1)) >> tmode;
834 return result.str();
837 bool KDict::MainDataLoaded() const {
838 if(kanjidicData.size()>0) return true;
839 return false;