4 Website: http://www.vultaire.net/software/jben/
5 License: GNU General Public License (GPL) version 2
6 (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>
26 #include "file_utils.h"
28 #include "encoding_convert.h"
29 #include "string_utils.h"
37 KDict
* KDict::kdictSingleton
= NULL
;
39 const KDict
*KDict::Get() {
41 kdictSingleton
= new KDict
;
42 return kdictSingleton
;
46 Preferences
*p
= Preferences::Get();
47 LoadKanjidic(p
->GetSetting("kdict_kanjidic").c_str());
48 LoadKradfile(p
->GetSetting("kdict_kradfile").c_str());
49 LoadRadkfile(p
->GetSetting("kdict_radkfile").c_str());
52 void KDict::Destroy() {
54 delete kdictSingleton
;
55 kdictSingleton
= NULL
;
59 int KDict::LoadKanjidic(const char *filename
) {
62 int returnCode
=KD_FAILURE
;
64 ifstream
ifile(filename
, ios::ate
); /* "at end" to get our file size */
68 rawData
= new char[size
+1];
70 ifile
.read(rawData
, size
);
71 if(strlen(rawData
)!=size
) {
73 os
<< "kanjidic file size: "
75 << ", read-in string: "
77 el
.Push(EL_Warning
, os
.str());
80 /* Create the kanjidic object with our string data. */
81 this->KanjidicParser(rawData
);
83 returnCode
= KD_SUCCESS
;
86 returnCode
= KD_FAILURE
;
88 if(rawData
) delete[] rawData
;
92 int KDict::LoadKradfile(const char *filename
) {
93 int returnCode
= KD_FAILURE
;
95 ifstream
f(filename
, ios::in
|ios::binary
);
101 StrTokenize
<wchar_t>(utfconv_mw(sb
.str()), L
"\n");
102 while(data
.size()>0) {
103 wstring token
= data
.front();
105 if(token
.length()>0 && token
[0]!=L
'#') {
106 /* KRADFILE-specific stuff here */
107 /* Get rid of the spaces in the string */
108 token
= TextReplace
<wchar_t>(token
, L
" ", L
"");
109 /* Now we can easily pull in the data */
110 if(!kradData
.assign(token
[0], token
.substr(2))) {
112 os
<< "KRADFILE: Error assigning ("
113 << utfconv_wm(token
.substr(0,1))
115 << utfconv_wm(token
.substr(2))
116 << ") to hash table!\n";
117 el
.Push(EL_Error
, os
.str());
122 returnCode
= KD_SUCCESS
;
127 int KDict::LoadRadkfile(const char *filename
) {
128 int returnCode
= KD_FAILURE
;
130 ifstream
f(filename
, ios::in
|ios::binary
);
135 /* RADKFILE entries all start with $.
136 Split on $, and discard the first entry since it is the explanation
137 preceding the first entry. */
139 StrTokenize
<wchar_t>(utfconv_mw(sb
.str()), L
"$");
142 while(data
.size()>0) {
143 wstring entry
= data
.front();
145 if(entry
.length()>0 && entry
[0]!=L
'#') {
146 /* RADKFILE-specific stuff here */
147 list
<wstring
> entryData
=
148 StrTokenize
<wchar_t>(entry
, L
"\n", false, 2);
149 if(entryData
.size()!=2) {
150 cerr
<< "Error: entryData.size() == "
151 << entryData
.size() << "!!" << endl
;
156 /* entryData.front() contains our key.
157 It's a space delimited string,
158 first token is our kanji, second is the stroke count.
159 A third token may be present, but is irrelevant. */
160 list
<wstring
> keyData
=
161 StrTokenize
<wchar_t>(entryData
.front(), L
" ");
163 wiss
.str(keyData
.front());
166 wiss
.str(keyData
.front());
169 /* entryData.back() contains the characters our key
171 /* Get rid of the spaces in the string */
172 value
= entryData
.back();
173 value
= TextReplace
<wchar_t>(value
, L
"\n", L
"");
174 value
= TextReplace
<wchar_t>(value
, L
" ", L
"");
176 if(!radkData
.assign(key
, value
)) {
178 os
<< "RADKFILE: Error assigning ("
179 << utfconv_wm(wstring().append(1,key
))
182 << ") to hash table!\n";
183 el
.Push(EL_Error
, os
.str());
185 if(!radkDataStrokes
.assign(key
, strokeCount
)) {
187 os
<< "RADKFILE: Error assigning ("
188 << utfconv_wm(wstring().append(1,key
))
189 << ", " << strokeCount
<< ") to hash table!\n";
190 el
.Push(EL_Error
, os
.str());
196 returnCode
= KD_SUCCESS
;
201 /* This could be sped up: copy the first UTF-8 character into a string, then
202 run a conversion on that. Trivial though. */
203 void KDict::KanjidicParser(char *kanjidicRawData
) {
204 char *token
= strtok(kanjidicRawData
, "\n");
207 if( (strlen(token
)>0) && (token
[0]!='#') ) {
208 wToken
= utfconv_mw(token
);
209 /* Convert token to proper format */
210 wToken
= ConvertKanjidicEntry(wToken
);
211 /* Add to hash table */
212 if(!kanjidicData
.assign(wToken
[0], token
)) {
214 string temp
= utfconv_wm(wToken
);
215 os
<< "Error assigning (" << temp
[0]
216 << ", " << temp
<< ") to hash table!\n";
217 el
.Push(EL_Error
, os
.str());
220 token
= strtok(NULL
, "\n");
225 /* Currently: nothing here. */
228 /* This function returns a wstring containing the desired line of the
229 kanjidic hash table. A conversion from string to wstring is included
230 in this call since standardstrings are only used for more compressed
231 internal storage. This is followed by a slight reformatting of the
232 string for better presentation. */
233 wstring
KDict::GetKanjidicStr(wchar_t c
) const {
234 BoostHM
<wchar_t,string
>::iterator it
= kanjidicData
.find(c
);
235 if(it
==kanjidicData
.end()) return L
"";
237 s
= utfconv_mw(it
->second
);
238 return ConvertKanjidicEntry(s
);
242 * Performs transformations on a KANJIDIC string for our internal usage.
243 * Currently, this includes the following:
244 * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
245 * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
247 wstring
KDict::ConvertKanjidicEntry(const wstring
& s
) {
248 size_t index
, lastIndex
;
251 /* First conversion: あ.いう to あ(いう) */
252 index
= temp
.find(L
'.', 0);
253 while(index
!=wstring::npos
) {
254 /* Proceed if the character preceding the "." is hiragana/katakana. */
255 if(IsFurigana(temp
[index
-1])) {
257 index
= temp
.find(L
' ', index
+1);
258 if(index
==wstring::npos
) {
259 temp
.append(1, L
')');
262 temp
.insert(index
, 1, L
')');
265 index
= temp
.find(L
'.', lastIndex
+1);
268 /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
269 index
= temp
.find(L
'-', 0);
270 while(index
!=wstring::npos
) {
271 /* Proceed if the character before or after the "-" is hiragana/katakana. */
272 if(IsFurigana(temp
[index
-1]) || IsFurigana(temp
[index
+1]))
276 index
= temp
.find(L
'-', lastIndex
+1);
279 /* Return the converted string */
283 wstring
KDict::KanjidicToHtml(const wstring
& kanjidicStr
) {
284 Preferences
*prefs
= Preferences::Get();
285 return KanjidicToHtml(kanjidicStr
,
286 prefs
->kanjidicOptions
,
287 prefs
->kanjidicDictionaries
);
290 wstring
KDict::KanjidicToHtml(const wstring
& kanjidicStr
,
291 long options
, long dictionaries
) {
292 /* return wstring(L"<p>")
296 wostringstream result
;
297 wostringstream header
;
298 wstring onyomi
, kunyomi
, nanori
, radicalReading
, english
;
299 wstring dictionaryInfo
;
300 wstring lowRelevance
;
302 long grade
= -1, frequency
= -1, tmode
= 0;
304 wstring koreanRomanization
, pinyinRomanization
, crossReferences
, miscodes
;
305 wstring sTemp
, token
;
306 list
<wstring
> t
= StrTokenize
<wchar_t>(kanjidicStr
, L
" ");
309 /* Special processing for the first 2 entries of the line. */
311 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
312 /*header.append(L"<p style=\"font-size:32pt\">") */
313 header
<< L
"<p><font size=\"7\">" << t
.front() << L
"</font></p>";
315 lowRelevance
.append(L
"<li>JIS code: 0x")
321 /* NEW! Temporary code for loading in SODs and SODAs from KanjiCafe! */
322 if(options
& (KDO_SOD_STATIC
| KDO_SOD_ANIM
) != 0) {
324 /* Get a UTF8-encoded string for the kanji. */
325 utfStr
= utfconv_wm(kanjidicStr
.substr(0,1));
327 /* Convert to a low-to-high-byte hex string. */
329 for(unsigned int i
=0;i
<utfStr
.length();i
++) {
330 ss
<< hex
<< setw(2) << setfill('0')
331 << (unsigned int)((unsigned char)utfStr
[i
]);
335 /* Load static SOD, if present */
336 if((options
& KDO_SOD_STATIC
) != 0) {
338 fn
<< "sods" << DIRSEP
339 << "sod-utf8-hex" << DIRSEP
340 << ss
.str() << ".png";
342 printf("DEBUG: Checking for existance of file \"%s\"...\n", fn
.str().c_str());
344 ifstream
f(fn
.str().c_str());
347 if(sod
.str().length()>0) sod
<< L
"<br />";
348 sod
<< L
"<img src=\"" << utfconv_mw(fn
.str()) << L
"\" />";
351 /* Load animated SOD, if present */
352 if((options
& KDO_SOD_ANIM
) != 0) {
354 fn
<< "sods" << DIRSEP
355 << "soda-utf8-hex" << DIRSEP
356 << ss
.str() << ".gif";
358 printf("DEBUG: Checking for existance of file \"%s\"...\n", fn
.str().c_str());
360 ifstream
f(fn
.str().c_str());
363 if(sod
.str().length()>0) sod
<< L
"<br />";
364 sod
<< L
"<img src=\"" << utfconv_mw(fn
.str()) << L
"\" />";
367 /* Append the chart(s) in a paragraph object. */
368 if(sod
.str().length()>0) {
369 header
<< L
"<p>" << sod
.str() <<
370 L
"<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>";
379 /* If a preceding character is detected, strip it */
380 if(c
== L
'(' || c
== L
'〜') {
381 sTemp
= sTemp
.substr(1);
386 /* Onyomi reading detected */
387 /*if(onyomi.length()>0) onyomi.append(L" "); */
388 if(onyomi
.length()>0) onyomi
.append(L
" ");
389 onyomi
.append(token
); /* Copy the original string, including ()'s and 〜's */
392 else if(IsHiragana(c
)) {
393 /* Kunyomi reading detected */
394 if(kunyomi
.length()>0) kunyomi
.append(L
" ");
395 kunyomi
.append(token
); /* Copy the original string, including ()'s and 〜's */
398 } else if(tmode
==1) {
400 /* Nanori reading detected */
401 if(nanori
.length()>0) nanori
.append(L
" ");
402 nanori
.append(token
); /* Copy the original string, including ()'s and 〜's */
405 } else if(tmode
==2) {
407 /* Special radical reading detected */
408 if(radicalReading
.length()>0) radicalReading
.append(L
" ");
409 radicalReading
.append(token
);
414 /* English meaning detected
415 Special handling is needed to take care of spaces, though.
416 We'll "cheat" and mess with our iterator a bit if a space is detected. */
417 while(t
.size()>0 && sTemp
[sTemp
.length()-1] != L
'}') {
418 sTemp
.append(L
" ").append(t
.front());
421 if(english
.length()>0) english
.append(L
", ");
422 english
.append(sTemp
.substr(1,sTemp
.length()-2)); /* Strip the {} */
426 case L
'T': /* Change "t mode" */
427 /*wstring(sTemp.substr(1)).ToLong(&tmode);*/
428 wistringstream(sTemp
.substr(1)) >> tmode
;
430 if(tmode
>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode
);
433 case L
'B': /* Bushu radical */
434 lowRelevance
.append(L
"<li>Bushu radical: ").append(sTemp
.substr(1)).append(L
"</li>");
436 case L
'C': /* Classical radical */
437 lowRelevance
.append(L
"<li>Classical radical: ").append(sTemp
.substr(1)).append(L
"</li>");
439 case L
'F': /* Frequency */
440 /*wstring(sTemp.substr(1)).ToLong(&frequency);*/
441 wistringstream(sTemp
.substr(1)) >> frequency
;
443 case L
'G': /* Grade level */
444 /*wstring(sTemp.substr(1)).ToLong(&grade);*/
445 wistringstream(sTemp
.substr(1)) >> grade
;
447 case L
'S': /* Stroke count */
448 if(strokes
.length()==0) {
449 strokes
= sTemp
.substr(1);
450 } else if(!strokes
.find(L
' ')!=wstring::npos
) {
451 strokes
.append(L
" (Miscounts: ")
452 .append(sTemp
.substr(1))
455 strokes
= strokes
.substr(0, strokes
.length()-1)
457 .append(sTemp
.substr(1))
461 case L
'U': /* Unicode value */
462 lowRelevance
.append(L
"<li>Unicode: 0x").append(sTemp
.substr(1)).append(L
"</li>");
464 /* From here, it's all dictionary codes */
466 if((dictionaries
& KDD_NJECD
)!=0)
467 dictionaryInfo
.append(L
"<li>New Japanese-English Character Dictionary (Halpern): ")
468 .append(sTemp
.substr(1)).append(L
"</li>");
471 if((dictionaries
& KDD_MRJECD
)!=0)
472 dictionaryInfo
.append(L
"<li>Modern Reader's Japanese-English Character Dictionary (Nelson): ")
473 .append(sTemp
.substr(1)).append(L
"</li>");
476 if((dictionaries
& KDD_NNJECD
)!=0)
477 dictionaryInfo
.append(L
"<li>The New Nelson's Japanese-English Character Dictionary: ")
478 .append(sTemp
.substr(1)).append(L
"</li>");
482 /* Thanks to changes in permissible SKIP code usage (change to
483 Creative Commons licensing in January 2008), we can now use
484 this without problems. */
485 if((dictionaries
& KDD_SKIP
)!=0)
486 dictionaryInfo
.append(L
"<li>SKIP code: ")
487 .append(sTemp
.substr(1)).append(L
"</li>");
489 case L
'I': /* Spahn/Hadamitzky dictionaries */
491 if((dictionaries
& KDD_KK
)!=0) {
492 dictionaryInfo
.append(L
"<li>Kanji & Kana (Spahn, Hadamitzky): ")
493 .append(sTemp
.substr(2)).append(L
"</li>");
496 if((dictionaries
& KDD_KD
)!=0) {
497 dictionaryInfo
.append(L
"<li>Kanji Dictionary (Spahn, Hadamitzky): ")
498 .append(sTemp
.substr(1)).append(L
"</li>");
503 if((dictionaries
& KDD_FC
)!=0) {
504 dictionaryInfo
.append(L
"<li>Four Corner code: ")
505 .append(sTemp
.substr(1)).append(L
"</li>");
511 if((dictionaries
& KDD_MOROI
)!=0) {
512 dictionaryInfo
.append(L
"<li>Morohashi Daikanwajiten Index: ")
513 .append(sTemp
.substr(2)).append(L
"</li>");
515 } else if(c2
==L
'P') {
516 if((dictionaries
& KDD_MOROVP
)!=0) {
517 dictionaryInfo
.append(L
"<li>Morohashi Daikanwajiten Volume/Page: ")
518 .append(sTemp
.substr(2)).append(L
"</li>");
523 if((dictionaries
& KDD_GRJC
)!=0) {
524 dictionaryInfo
.append(L
"<li>A Guide to Remembering Japanese Characters (Henshal): ")
525 .append(sTemp
.substr(1)).append(L
"</li>");
529 if((dictionaries
& KDD_GKD
)!=0) {
530 dictionaryInfo
.append(L
"<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): ")
531 .append(sTemp
.substr(1)).append(L
"</li>");
535 if((dictionaries
& KDD_RTK
)!=0) {
536 dictionaryInfo
.append(L
"<li>Remembering the Kanji (Heisig): ")
537 .append(sTemp
.substr(1)).append(L
"</li>");
541 if((dictionaries
& KDD_JN
)!=0) {
542 dictionaryInfo
.append(L
"<li>Japanese Names (O'Neill): ")
543 .append(sTemp
.substr(1)).append(L
"</li>");
550 if((dictionaries
& KDD_JBP
)!=0) {
551 dictionaryInfo
.append(L
"<li>Japanese for Busy People (AJLT): ")
552 .append(sTemp
.substr(2)).append(L
"</li>");
556 if((dictionaries
& KDD_KWJLP
)!=0) {
557 dictionaryInfo
.append(L
"<li>The Kanji Way to Japanese Language Power (Crowley): ")
558 .append(sTemp
.substr(2)).append(L
"</li>");
562 if((dictionaries
& KDD_JKF
)!=0) {
563 dictionaryInfo
.append(L
"<li>Japanese Kanji Flashcards (White Rabbit Press): ")
564 .append(sTemp
.substr(2)).append(L
"</li>");
568 if((dictionaries
& KDD_KCKG
)!=0) {
569 dictionaryInfo
.append(L
"<li>Kodansha Compact Kanji Guide: ")
570 .append(sTemp
.substr(2)).append(L
"</li>");
574 if((dictionaries
& KDD_GTRWJH
)!=0) {
575 dictionaryInfo
.append(L
"<li>A Guide To Reading and Writing Japanese (Hensall): ")
576 .append(sTemp
.substr(2)).append(L
"</li>");
580 if((dictionaries
& KDD_KIC
)!=0) {
581 dictionaryInfo
.append(L
"<li>Kanji in Context (Nishiguchi and Kono): ")
582 .append(sTemp
.substr(2)).append(L
"</li>");
586 if((dictionaries
& KDD_KLD
)!=0) {
587 dictionaryInfo
.append(L
"<li>Kanji Learner's Dictionary (Halpern): ")
588 .append(sTemp
.substr(2)).append(L
"</li>");
592 if((dictionaries
& KDD_EK
)!=0) {
593 dictionaryInfo
.append(L
"<li>Essential Kanji (O'Neill): ")
594 .append(sTemp
.substr(2)).append(L
"</li>");
598 if((dictionaries
& KDD_DR
)!=0) {
599 dictionaryInfo
.append(L
"<li>2001 Kanji (De Roo): ")
600 .append(sTemp
.substr(2)).append(L
"</li>");
604 if((dictionaries
& KDD_GTRWJS
)!=0) {
605 dictionaryInfo
.append(L
"<li>A Guide to Reading and Writing Japanese (Sakade): ")
606 .append(sTemp
.substr(2)).append(L
"</li>");
610 if((dictionaries
& KDD_TKC
)!=0) {
611 dictionaryInfo
.append(L
"<li>Tuttle Kanji Cards (Kask): ")
612 .append(sTemp
.substr(2)).append(L
"</li>");
616 if(unhandled
.length()>0) unhandled
.append(L
" ");
617 unhandled
.append(sTemp
);
621 /* Crossreferences and miscodes */
623 if(crossReferences
.length()>0) crossReferences
.append(L
", ");
624 crossReferences
.append(sTemp
.substr(1));
627 if(miscodes
.length()>0) miscodes
.append(L
", ");
628 miscodes
.append(sTemp
.substr(1));
630 /* Korean/Pinyin (Chinese) romanization */
632 if(koreanRomanization
.length()>0) koreanRomanization
.append(L
", ");
633 koreanRomanization
.append(sTemp
.substr(1));
636 if(pinyinRomanization
.length()>0) pinyinRomanization
.append(L
", ");
637 pinyinRomanization
.append(sTemp
.substr(1));
640 if(unhandled
.length()>0) unhandled
.append(L
" ");
641 unhandled
.append(sTemp
);
645 } /* while(t.HasMoreTokens()) */
647 if(header
.str().length() > 0) result
<< header
.str();
649 printf("DEBUG: header=[%ls]\n", header
.str().c_str());
652 if((options
& KDO_READINGS
) != 0) {
653 if(onyomi
.length() > 0)
654 result
<< L
"<li>Onyomi Readings: " << onyomi
<< L
"</li>";
655 if(kunyomi
.length() > 0)
656 result
<< L
"<li>Kunyomi Readings: " << kunyomi
<< L
"</li>";
657 if(nanori
.length() > 0)
658 result
<< L
"<li>Nanori Readings: " << nanori
<< L
"</li>";
659 if(radicalReading
.length() > 0)
660 result
<< L
"<li>Special Radical Reading: " << radicalReading
<<
663 if((options
& KDO_MEANINGS
) != 0) {
664 if(english
.length() > 0)
665 result
<< L
"<li>English Meanings: " << english
<< L
"</li>";
667 if((options
& KDO_HIGHIMPORTANCE
) != 0) {
668 if(strokes
.length() > 0)
669 result
<< L
"<li>Stroke count: " << strokes
<< L
"</li>";
671 result
<< L
"<li>Stroke count: not specified in KANJIDIC</li>";
672 result
<< L
"<li>Grade Level: ";
673 if(grade
<=6 && grade
>= 1) { /* Jouyou (Grade #) */
674 result
<< L
"Jouyou (Grade " << grade
<< L
")";
675 } else if(grade
==8) { /* Jouyou (General usage) */
676 result
<< L
"Jouyou (General usage)";
677 } else if(grade
==9) { /* Jinmeiyou (Characters for names) */
678 result
<< L
"Jinmeiyou (Characters for names)";
679 } else if(grade
==-1) { /* No flag specified in kanjidic string */
680 result
<< L
"Unspecified";
682 result
<< L
"Unhandled grade level (Grade " << grade
<< L
")";
686 result
<< L
"<li>Frequency Ranking: " << frequency
<< L
"</li>";
687 else result
<< L
"<li>Frequency Ranking: Unspecified</li>";
689 if((options
& KDO_DICTIONARIES
) != 0) {
690 if(dictionaryInfo
.length()>0)
691 result
<< L
"<li>Dictionary Codes:<ul>" << dictionaryInfo
694 if((options
& KDO_VOCABCROSSREF
) != 0) {
695 vector
<wstring
> *vList
= &(jben
->vocabList
->GetVocabList());
696 wchar_t thisKanji
= kanjidicStr
[0];
697 vector
<wstring
> crossRefList
;
698 vector
<wstring
>::iterator vIt
;
699 for(vIt
=vList
->begin(); vIt
!=vList
->end(); vIt
++) {
700 if(vIt
->find(thisKanji
)!=wstring::npos
) {
701 crossRefList
.push_back(*vIt
);
704 if(crossRefList
.size()>0) {
705 result
<< L
"<li>This kanji is used by words in your study list:<br><font size=\"7\">";
706 vIt
= crossRefList
.begin();
708 for(++vIt
; vIt
!=crossRefList
.end(); vIt
++) {
709 result
<< L
" " << *vIt
;
711 result
<< L
"</font></li>";
714 if((options
& KDO_LOWIMPORTANCE
) != 0) {
715 if(koreanRomanization
.length()>0) lowRelevance
.append(L
"<li>Korean romanization: ").append(koreanRomanization
).append(L
"</li>");
716 if(pinyinRomanization
.length()>0) lowRelevance
.append(L
"<li>Pinyin romanization: ").append(pinyinRomanization
).append(L
"</li>");
717 if(crossReferences
.length()>0) lowRelevance
.append(L
"<li>Cross reference codes: ").append(crossReferences
).append(L
"</li>");
718 if(miscodes
.length()>0) lowRelevance
.append(L
"<li>Miscodes: ").append(miscodes
).append(L
"</li>");
719 if(lowRelevance
.length()>0)
720 result
<< L
"<li>Extra Information:<ul>" << lowRelevance
723 if((options
& KDO_UNHANDLED
) != 0) {
724 if(unhandled
.length()>0)
725 result
<< L
"<li>Unhandled: " << unhandled
<< L
"</li>";
732 int KDict::GetIntField(wchar_t kanji
, const wstring
& marker
) const {
733 wstring markerStr
, kanjiEntry
, temp
;
738 markerStr
.append(L
" ").append(marker
);
739 markerLen
=markerStr
.length();
741 kanjiEntry
= GetKanjidicStr(kanji
);
742 if(kanjiEntry
.length()>0) {
743 index
= kanjiEntry
.find(markerStr
);
744 if(index
!=wstring::npos
) {
745 temp
= kanjiEntry
.substr(
747 kanjiEntry
.find(L
" ", index
+1) - index
- (markerLen
-1));
748 /*temp.ToLong(&value);*/
749 wistringstream(temp
) >> value
;
756 const BoostHM
<wchar_t,string
>* KDict::GetHashTable() const {
757 return &kanjidicData
;
766 wstring
KDict::GetOnyomiStr(wchar_t c
) const {
767 return GetKanjidicReading(c
, KDR_Onyomi
);
770 wstring
KDict::GetKunyomiStr(wchar_t c
) const {
771 return GetKanjidicReading(c
, KDR_Kunyomi
);
774 wstring
KDict::GetEnglishStr(wchar_t c
) const {
775 return GetKanjidicReading(c
, KDR_English
);
778 wstring
KDict::GetKanjidicReading(wchar_t c
, int readingType
) const {
779 wostringstream result
;
780 wstring kanjidicStr
= GetKanjidicStr(c
);
783 wstring sTemp
, token
;
784 list
<wstring
> t
= StrTokenize
<wchar_t>(kanjidicStr
, L
" ");
786 /* The first two tokens are guaranteed not to be what we're looking for. Skip them. */
796 /* If a preceding character is detected, strip it */
797 if(c
== L
'(' || c
== L
'〜') {
798 sTemp
= sTemp
.substr(1);
802 if(IsKatakana(c
) && readingType
==KDR_Onyomi
) {
803 /* Onyomi reading detected */
804 if(result
.str().length()>0) result
<< L
" ";
805 result
<< token
; /* Copy the original string,
806 including ()'s and 〜's */
809 else if(IsHiragana(c
) && readingType
==KDR_Kunyomi
) {
810 /* Kunyomi reading detected */
811 if(result
.str().length()>0) result
<< L
" ";
812 result
<< token
; /* Copy the original string,
813 including ()'s and 〜's */
817 if(c
== L
'{' && readingType
==KDR_English
) {
818 /* English meaning detected
819 Special handling is needed to take care of spaces, though.
820 We'll "cheat" and mess with our iterator a bit if a space is detected. */
821 while(t
.size()>0 && sTemp
[sTemp
.length()-1] != L
'}') {
822 sTemp
.append(L
" ").append(t
.front());
825 if(result
.str().length()>0) result
<< L
", ";
826 result
<< sTemp
.substr(1,sTemp
.length()-2); /* Strip the {} */
829 /*wstring(sTemp.substr(1)).ToLong(&tmode);*/
830 wistringstream(sTemp
.substr(1)) >> tmode
;
837 bool KDict::MainDataLoaded() const {
838 if(kanjidicData
.size()>0) return true;