kdict.cpp

   1 /*
   2 Project: J-Ben
   3 Author:  Paul Goins
   4 Website: http://www.vultaire.net/software/jben/
   5 License: GNU General Public License (GPL) version 2
   6          (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
   7
   8 File: kanjidic.cpp
   9
  10 This program is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2 of the License, or
  13 (at your option) any later version.
  14
  15 This program is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with this program.  If not, see <http://www.gnu.org/licenses/>
  22 */
  23
  24 #include "kdict.h"
  25 #include "file_utils.h"
  26 #include "jutils.h"
  27 #include "wx/tokenzr.h"
  28 #include "wx/file.h"
  29 #include "global.h"
  30 #include <fstream>
  31 using namespace std;
  32
  33 KDict* KDict::kdictSingleton = NULL;
  34
  35 const KDict *KDict::GetKDict() {
  36         if(kdictSingleton) return kdictSingleton;
  37         kdictSingleton = new KDict;
  38         kdictSingleton->LoadKanjidic();
  39         kdictSingleton->LoadKradfile();
  40         kdictSingleton->LoadRadkfile();
  41         return kdictSingleton;
  42 }
  43
  44 void KDict::Destroy() {
  45         if(kdictSingleton) {
  46                 delete kdictSingleton;
  47                 kdictSingleton = NULL;
  48         }
  49 }
  50
  51 int KDict::LoadKanjidic(const char *filename) {
  52         char *rawData = NULL;
  53         unsigned int size;
  54         int returnCode=0xDEADBEEF;
  55
  56         ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
  57         if(ifile) {
  58                 size = ifile.tellg();
  59                 ifile.seekg(0);
  60                 rawData = new char[size+1];
  61                 rawData[size] = '\0';
  62                 ifile.read(rawData, size);
  63 #ifdef DEBUG
  64                 if(strlen(rawData)!=size)
  65                         fprintf(stderr,
  66                           "WARNING: kanjidic file size: %d, read-in string: %d\n",
  67                           strlen(rawData),
  68                           size);
  69 #endif
  70
  71                 /* Create the kanjidic object with our string data. */
  72                 this->KanjidicParser(rawData);
  73
  74                 returnCode = KD_SUCCESS;
  75         }
  76         else
  77                 returnCode = KD_FAILURE;
  78
  79         if(rawData) delete[] rawData;
  80         return returnCode;
  81 }
  82
  83 int KDict::LoadKradfile(const char *filename) {
  84         int returnCode = 0xDEADBEEF;
  85         return returnCode;
  86 }
  87
  88 int KDict::LoadRadkfile(const char *filename) {
  89         int returnCode = 0xDEADBEEF;
  90         return returnCode;
  91 }
  92
  93 /* This could be sped up: copy the first UTF-8 character into a string, then
  94    run a conversion on that.  Trivial though. */
  95 void KDict::KanjidicParser(char *kanjidicRawData) {
  96         char *token = strtok(kanjidicRawData, "\n");
  97         wxString wxToken;
  98         while(token) {
  99                 if( (strlen(token)>0) && (token[0]!='#') ) {
 100                         UTF8ToWx(token, wxToken);
 101                         /* Convert token to proper format */
 102                         wxToken = ConvertKanjidicEntry(wxToken);
 103                         /* Add to hash table */
 104                         if(!kanjidicData.assign(wxToken[0], token)) {
 105 #ifdef DEBUG
 106                                 fprintf(stderr,
 107                                         "Error assigning (%lc, %ls) to hash table!\n",
 108                                         wxToken[0], wxToken.c_str());
 109 #endif
 110                         }
 111                 }
 112                 token = strtok(NULL, "\n");
 113         }
 114 }
 115
 116 KDict::~KDict() {
 117         /* Currently: nothing here. */
 118 }
 119
 120 /* This function returns a wxString containing the desired line of the
 121    kanjidic hash table.  A conversion from string to wxString is included
 122    in this call since strings are only used for more compressed internal
 123    storage.  This is followed by a slight reformatting of the string for
 124    better presentation. */
 125 wxString KDict::GetKanjidicStr(wxChar c) const {
 126         BoostHM<wxChar,string>::iterator it = kanjidicData.find(c);
 127         if(it==kanjidicData.end()) return _T("");
 128         wxString s;
 129         UTF8ToWx(it->second, s);
 130         return ConvertKanjidicEntry(s);
 131 }
 132
 133 /*
 134  * Performs transformations on a KANJIDIC string for our internal usage.
 135  * Currently, this includes the following:
 136  * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
 137  * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
 138  */
 139 wxString KDict::ConvertKanjidicEntry(const wxString& s) {
 140         size_t index, lastIndex;
 141         wxString temp = s;
 142
 143         /* First conversion: あ.いう to あ(いう) */
 144         index = temp.find(_T('.'), 0);
 145         while(index!=wxString::npos) {
 146                 /* Proceed if the character preceding the "." is hiragana/katakana. */
 147                 if(IsFurigana(temp[index-1])) {
 148                         temp[index] = _T('(');
 149                         index = temp.find(_T(' '), index+1);
 150                         if(index==wxString::npos) {
 151                                 temp.append(_T(')'));
 152                                 break;
 153                         } else
 154                                 temp.insert(index, _T(')'));
 155                 }
 156                 lastIndex = index;
 157                 index = temp.find(_T('.'), lastIndex+1);
 158         }
 159
 160         /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
 161         index = temp.find(_T('-'), 0);
 162         while(index!=wxString::npos) {
 163                 /* Proceed if the character before or after the "-" is hiragana/katakana. */
 164                 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
 165                         temp[index]=_T('〜');
 166
 167                 lastIndex = index;
 168                 index = temp.find(_T('-'), lastIndex+1);
 169         }
 170
 171         /* Return the converted string */
 172         return temp;
 173 }
 174
 175 wxString KDict::KanjidicToHtml(const wxString& kanjidicStr) {
 176         return KanjidicToHtml(kanjidicStr,
 177                                                   jben->prefs->kanjidicOptions,
 178                                                   jben->prefs->kanjidicDictionaries);
 179 }
 180
 181 wxString KDict::KanjidicToHtml(const wxString& kanjidicStr,
 182                                                                                  long options, long dictionaries) {
 183 /*      return wxString(_T("<p>"))
 184                 .append(s[0])
 185                 .append(_T("</p>"));*/
 186
 187         wxString result;
 188
 189         wxString header, onyomi, kunyomi, nanori, radicalReading, english;
 190         wxString dictionaryInfo;
 191         wxString lowRelevance;
 192         wxString unhandled;
 193         long grade = -1, frequency = -1, tmode = 0;
 194         wxString strokes;
 195         wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
 196         wxString sTemp, token;
 197         wxStringTokenizer t(kanjidicStr, _T(' '));
 198         wxChar c, c2;
 199
 200         /* Special processing for the first 2 entries of the line. */
 201         if(t.CountTokens()>1) {
 202                 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
 203                 /*header.append(_T("<p style=\"font-size:32pt\">")) */
 204                 header.append(_T("<p><font size=\"7\">"))
 205                         .append(t.GetNextToken())
 206                         .append(_T("</font></p>"));
 207                 lowRelevance.append(_T("<li>JIS code: 0x"))
 208                         .append(t.GetNextToken())
 209                         .append(_T("</li>"));
 210         }
 211
 212         /* NEW!  Temporary code for loading in SODs and SODAs from KanjiCafe! */
 213         if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
 214                 wxCSConv transcoder(_T("utf-8"));
 215                 if(transcoder.IsOk()) {
 216                         string utfStr;
 217                         /* Get a UTF8-encoded string for the kanji. */
 218                         WxToUTF8(kanjidicStr[0], utfStr);
 219                         sTemp.clear();
 220
 221                         /* Convert to a low-to-high-byte hex string. */
 222                         for(unsigned int i=0;i<utfStr.length();i++) {
 223                                 sTemp.Append(
 224                                         wxString::Format(_T("%02x"),
 225                                                                          (unsigned char)utfStr[i]));
 226                         }
 227
 228                         wxString sod;
 229                         /* Load static SOD, if present */
 230                         if((options & KDO_SOD_STATIC) != 0) {
 231                                 wxFileName fn;
 232                                 fn.AppendDir(_T("sods"));
 233                                 fn.AppendDir(_T("sod-utf8-hex"));
 234                                 fn.SetName(sTemp);
 235                                 fn.SetExt(_T("png"));
 236 #ifdef DEBUG
 237                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 238 #endif
 239                                 if(wxFile::Exists(
 240                                         fn.GetFullPath()
 241                                 )) {
 242                                         sod.append(wxString::Format(
 243                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 244                                         ));
 245                                 }
 246                         }
 247                         /* Load animated SOD, if present */
 248                         if((options & KDO_SOD_ANIM) != 0) {
 249                                 wxFileName fn;
 250                                 fn.AppendDir(_T("sods"));
 251                                 fn.AppendDir(_T("soda-utf8-hex"));
 252                                 fn.SetName(sTemp);
 253                                 fn.SetExt(_T("gif"));
 254 #ifdef DEBUG
 255                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 256 #endif
 257                                 if(wxFile::Exists(
 258                                         fn.GetFullPath()
 259                                 )) {
 260                                         if(sod.length()>0) sod.append(_T("<br />"));
 261                                         sod.append(wxString::Format(
 262                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 263                                         ));
 264                                 }
 265                         }
 266                         /* Append the chart(s) in a paragraph object. */
 267                         if(sod.length()>0) {
 268                                 header.append(wxString::Format(
 269                                         _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
 270                                 ));
 271                         }
 272                 } else {
 273                         fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
 274                 }
 275         }
 276         /* END OF EXPERIMENTAL NEW CODE */
 277
 278         while(t.HasMoreTokens()) {
 279                 token = t.GetNextToken();
 280                 sTemp = token;
 281                 c = sTemp[0];
 282                 /* If a preceding character is detected, strip it */
 283                 if(c == _T('(') || c == _T('〜')) {
 284                         sTemp = sTemp.substr(1);
 285                         c = sTemp[0];
 286                 }
 287                 if(tmode==0) {
 288                         if(IsKatakana(c)) {
 289                                 /* Onyomi reading detected */
 290                                 /*if(onyomi.length()>0) onyomi.append(_T("　")); */
 291                                 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
 292                                 onyomi.append(token);   /* Copy the original string, including ()'s and 〜's */
 293                                 continue;
 294                         }
 295                         else if(IsHiragana(c)) {
 296                                 /* Kunyomi reading detected */
 297                                 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
 298                                 kunyomi.append(token);  /* Copy the original string, including ()'s and 〜's */
 299                                 continue;
 300                         }
 301                 } else if(tmode==1) {
 302                         if(IsFurigana(c)) {
 303                                 /* Nanori reading detected */
 304                                 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
 305                                 nanori.append(token);   /* Copy the original string, including ()'s and 〜's */
 306                                 continue;
 307                         }
 308                 } else if(tmode==2) {
 309                         if(IsFurigana(c)) {
 310                                 /* Special radical reading detected */
 311                                 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
 312                                 radicalReading.append(token);
 313                                 continue;
 314                         }
 315                 }
 316                 if(c == _T('{')) {
 317                         /* English meaning detected
 318                            Special handling is needed to take care of spaces, though.
 319                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 320                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 321                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 322                         }
 323                         if(english.length()>0) english.append(_T(", "));
 324                         english.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 325                 }
 326                 else {
 327                         switch(c) {
 328                         case _T('T'):  /* Change "t mode" */
 329                                 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
 330                                    So, by making a copy of wxString and performing the conversion in the copy, we get around this.
 331                                    This ugly kludge is repeated twice below for frequency and grade level. */
 332                                 wxString(sTemp.substr(1)).ToLong(&tmode);
 333 #ifdef DEBUG
 334                                 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
 335 #endif
 336                                 break;
 337                         case _T('B'):  /* Bushu radical */
 338                                 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 339                                 break;
 340                         case _T('C'):  /* Classical radical */
 341                                 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 342                                 break;
 343                         case _T('F'):  /* Frequency */
 344                                 wxString(sTemp.substr(1)).ToLong(&frequency);
 345                                 break;
 346                         case _T('G'):  /* Grade level */
 347                                 wxString(sTemp.substr(1)).ToLong(&grade);
 348                                 break;
 349                         case _T('S'):  /* Stroke count */
 350                                 if(strokes.length()==0) {
 351                                         strokes = sTemp.substr(1);
 352                                 } else if(!strokes.find(_T(' '))!=wxString::npos) {
 353                                         strokes.append(_T(" (Miscounts: "))
 354                                                 .append(sTemp.substr(1))
 355                                                 .append(_T(")"));
 356                                 } else {
 357                                         strokes = strokes.substr(0, strokes.length()-1)
 358                                                 .append(_T(", "))
 359                                                 .append(sTemp.substr(1))
 360                                                 .append(_T(")"));
 361                                 }
 362                                 break;
 363                         case _T('U'):  /* Unicode value */
 364                                 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
 365                                 break;
 366                         /* From here, it's all dictionary codes */
 367                         case _T('H'):
 368                                 if((dictionaries & KDD_NJECD)!=0)
 369                                         dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
 370                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 371                                 break;
 372                         case _T('N'):
 373                                 if((dictionaries & KDD_MRJECD)!=0)
 374                                         dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
 375                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 376                                 break;
 377                         case _T('V'):
 378                                 if((dictionaries & KDD_NNJECD)!=0)
 379                                         dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
 380                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 381                                 break;
 382                         case _T('P'):
 383                                 /* SKIP codes. */
 384                                 /* This is a thorny issue.  If we want to include a stock KANJIDIC, then we */
 385                                 /* need to add encryption to the file and prevent copy/pasting of that data. */
 386                                 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
 387 #ifdef USE_SKIP
 388                                 if((dictionaries & KDD_SKIP)!=0)
 389                                         dictionaryInfo.append(_T("<li>SKIP code: "))
 390                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 391 #endif
 392                                 break;
 393                         case _T('I'):  /* Spahn/Hadamitzky dictionaries */
 394                                 if(sTemp[1]==_T('N')) {
 395                                         if((dictionaries & KDD_KK)!=0) {
 396                                                 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
 397                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 398                                         }
 399                                 } else {
 400                                         if((dictionaries & KDD_KD)!=0) {
 401                                                 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
 402                                                         .append(sTemp.substr(1)).append(_T("</li>"));
 403                                         }
 404                                 }
 405                                 break;
 406                         case _T('Q'):
 407                                 if((dictionaries & KDD_FC)!=0) {
 408                                         dictionaryInfo.append(_T("<li>Four Corner code: "))
 409                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 410                                 }
 411                                 break;
 412                         case _T('M'):
 413                                 c2 = sTemp[1];
 414                                 if(c2==_T('N')) {
 415                                         if((dictionaries & KDD_MOROI)!=0) {
 416                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
 417                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 418                                         }
 419                                 } else if(c2==_T('P')) {
 420                                         if((dictionaries & KDD_MOROVP)!=0) {
 421                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
 422                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 423                                         }
 424                                 }
 425                                 break;
 426                         case _T('E'):
 427                                 if((dictionaries & KDD_GRJC)!=0) {
 428                                         dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
 429                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 430                                 }
 431                                 break;
 432                         case _T('K'):
 433                                 if((dictionaries & KDD_GKD)!=0) {
 434                                         dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
 435                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 436                                 }
 437                                 break;
 438                         case _T('L'):
 439                                 if((dictionaries & KDD_RTK)!=0) {
 440                                         dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
 441                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 442                                 }
 443                                 break;
 444                         case _T('O'):
 445                                 if((dictionaries & KDD_JN)!=0) {
 446                                         dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
 447                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 448                                 }
 449                                 break;
 450                         case _T('D'):
 451                                 c2 = sTemp[1];
 452                                 switch(c2) {
 453                                 case _T('B'):
 454                                         if((dictionaries & KDD_JBP)!=0) {
 455                                                 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
 456                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 457                                         }
 458                                         break;
 459                                 case _T('C'):
 460                                         if((dictionaries & KDD_KWJLP)!=0) {
 461                                                 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
 462                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 463                                         }
 464                                         break;
 465                                 case _T('F'):
 466                                         if((dictionaries & KDD_JKF)!=0) {
 467                                                 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
 468                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 469                                         }
 470                                         break;
 471                                 case _T('G'):
 472                                         if((dictionaries & KDD_KCKG)!=0) {
 473                                                 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
 474                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 475                                         }
 476                                         break;
 477                                 case _T('H'):
 478                                         if((dictionaries & KDD_GTRWJH)!=0) {
 479                                                 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
 480                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 481                                         }
 482                                         break;
 483                                 case _T('J'):
 484                                         if((dictionaries & KDD_KIC)!=0) {
 485                                                 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
 486                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 487                                         }
 488                                         break;
 489                                 case _T('K'):
 490                                         if((dictionaries & KDD_KLD)!=0) {
 491                                                 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
 492                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 493                                         }
 494                                         break;
 495                                 case _T('O'):
 496                                         if((dictionaries & KDD_EK)!=0) {
 497                                                 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
 498                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 499                                         }
 500                                         break;
 501                                 case _T('R'):
 502                                         if((dictionaries & KDD_DR)!=0) {
 503                                                 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
 504                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 505                                         }
 506                                         break;
 507                                 case _T('S'):
 508                                         if((dictionaries & KDD_GTRWJS)!=0) {
 509                                                 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
 510                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 511                                         }
 512                                         break;
 513                                 case _T('T'):
 514                                         if((dictionaries & KDD_TKC)!=0) {
 515                                                 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
 516                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 517                                         }
 518                                         break;
 519                                 default:
 520                                         if(unhandled.length()>0) unhandled.append(_T(" "));
 521                                         unhandled.append(sTemp);
 522                                         break;
 523                                 }
 524                                 break;
 525                         /* Crossreferences and miscodes */
 526                         case _T('X'):
 527                                 if(crossReferences.length()>0) crossReferences.append(_T(", "));
 528                                 crossReferences.append(sTemp.substr(1));
 529                                 break;
 530                         case _T('Z'):
 531                                 if(miscodes.length()>0) miscodes.append(_T(", "));
 532                                 miscodes.append(sTemp.substr(1));
 533                                 break;
 534                         /* Korean/Pinyin (Chinese) romanization */
 535                         case _T('W'):
 536                                 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
 537                                 koreanRomanization.append(sTemp.substr(1));
 538                                 break;
 539                         case _T('Y'):
 540                                 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
 541                                 pinyinRomanization.append(sTemp.substr(1));
 542                                 break;
 543                         default:
 544                                 if(unhandled.length()>0) unhandled.append(_T(" "));
 545                                 unhandled.append(sTemp);
 546                                 break;
 547                         }
 548                 }
 549         } /* while(t.HasMoreTokens()) */
 550
 551         if(header.length() > 0) result.append(header);
 552 #ifdef DEBUG
 553         printf("DEBUG: header=[%ls]\n", header.c_str());
 554 #endif
 555         result.append(_T("<ul>"));
 556         if((options & KDO_READINGS) != 0) {
 557                 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
 558                 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
 559                 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
 560                 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
 561         }
 562         if((options & KDO_MEANINGS) != 0) {
 563                 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
 564         }
 565         if((options & KDO_HIGHIMPORTANCE) != 0) {
 566                 if(strokes.length() > 0)
 567                         result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
 568                 else
 569                         result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
 570                 result.append(_T("<li>Grade Level: "));
 571                 if(grade<=6 && grade >= 1) {  /* Jouyou (Grade #) */
 572                         result.append(_T("Jouyou (Grade "))
 573                                 .append(wxString::Format(_T("%d"), (int)grade))
 574                                 .append(_T(")"));
 575                 } else if(grade==8) {  /* Jouyou (General usage) */
 576                         result.append(_T("Jouyou (General usage)"));
 577                 } else if(grade==9) {  /* Jinmeiyou (Characters for names) */
 578                         result.append(_T("Jinmeiyou (Characters for names)"));
 579                 } else if(grade==-1) {  /* No flag specified in kanjidic string */
 580                         result.append(_T("Unspecified"));
 581                 } else {
 582                         result.append(_T("Unhandled grade level (Grade "))
 583                                 .append(wxString::Format(_T("%d"), (int)grade))
 584                                 .append(_T(")"));
 585                 }
 586                 result.append(_T("</li>"));
 587                 if(frequency!=-1)
 588                         result.append(_T("<li>Frequency Ranking: "))
 589                                 .append(wxString::Format(_T("%d"), (int)frequency))
 590                                 .append(_T("</li>"));
 591                 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
 592         }
 593         if((options & KDO_DICTIONARIES) != 0) {
 594                 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
 595         }
 596         if((options & KDO_VOCABCROSSREF) != 0) {
 597                 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
 598                 wxChar thisKanji = kanjidicStr[0];
 599                 vector<wxString> crossRefList;
 600                 vector<wxString>::iterator vIt;
 601                 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
 602                         if(vIt->find(thisKanji)!=wxString::npos) {
 603                                 crossRefList.push_back(*vIt);
 604                         }
 605                 }
 606                 if(crossRefList.size()>0) {
 607                         result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
 608                         vIt = crossRefList.begin();
 609                         result.append(*vIt);
 610                         for(++vIt; vIt!=crossRefList.end(); vIt++) {
 611                                 result.append(_T("&nbsp; ")).append(*vIt);
 612                         }
 613                         result.append(_T("</font></li>"));
 614                 }
 615         }
 616         if((options & KDO_LOWIMPORTANCE) != 0) {
 617                 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
 618                 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
 619                 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
 620                 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
 621                 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
 622         }
 623         if((options & KDO_UNHANDLED) != 0) {
 624                 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
 625         }
 626         result.append(_T("</ul>"));
 627
 628         return result;
 629 }
 630
 631 int KDict::GetIntField(wxChar kanji, const wxString& marker) const {
 632         wxString markerStr, kanjiEntry, temp;
 633         size_t index=0;
 634         long value=-1;
 635         int markerLen;
 636
 637         markerStr.Printf(_T(" %s"), marker.c_str());
 638         markerLen=markerStr.length();
 639
 640         kanjiEntry = GetKanjidicStr(kanji);
 641         if(kanjiEntry.length()>0) {
 642                 index = kanjiEntry.find(markerStr);
 643                 if(index!=wxString::npos) {
 644                         temp = kanjiEntry.substr(
 645                                 index+markerLen,
 646                                 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
 647                         temp.ToLong(&value);
 648                 }
 649         }
 650
 651         return (int)value;
 652 }
 653
 654 const BoostHM<wxChar,string>* KDict::GetHashTable() const {
 655         return &kanjidicData;
 656 }
 657
 658 enum {
 659         KDR_Onyomi=1,
 660         KDR_Kunyomi,
 661         KDR_English
 662 };
 663
 664 wxString KDict::GetOnyomiStr(wxChar c) const {
 665         return GetKanjidicReading(c, KDR_Onyomi);
 666 }
 667
 668 wxString KDict::GetKunyomiStr(wxChar c) const {
 669         return GetKanjidicReading(c, KDR_Kunyomi);
 670 }
 671
 672 wxString KDict::GetEnglishStr(wxChar c) const {
 673         return GetKanjidicReading(c, KDR_English);
 674 }
 675
 676 wxString KDict::GetKanjidicReading(wxChar c, int readingType) const {
 677         wxString result;
 678         wxString kanjidicStr = GetKanjidicStr(c);
 679
 680         long tmode = 0;
 681         wxString sTemp, token;
 682         wxStringTokenizer t(kanjidicStr, _T(' '));
 683
 684         /* The first two tokens are guaranteed not to be what we're looking for.  Skip them. */
 685         if(t.CountTokens()>1) {
 686                 t.GetNextToken();
 687                 t.GetNextToken();
 688         }
 689         while(t.HasMoreTokens()) {
 690                 token = t.GetNextToken();
 691                 sTemp = token;
 692                 c = sTemp[0];
 693                 /* If a preceding character is detected, strip it */
 694                 if(c == _T('(') || c == _T('〜')) {
 695                         sTemp = sTemp.substr(1);
 696                         c = sTemp[0];
 697                 }
 698                 if(tmode==0) {
 699                         if(IsKatakana(c) && readingType==KDR_Onyomi) {
 700                                 /* Onyomi reading detected */
 701                                 if(result.length()>0) result.append(_T("  "));
 702                                 result.append(token);   /* Copy the original string, including ()'s and 〜's */
 703                                 continue;
 704                         }
 705                         else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
 706                                 /* Kunyomi reading detected */
 707                                 if(result.length()>0) result.append(_T("  "));
 708                                 result.append(token);  /* Copy the original string, including ()'s and 〜's */
 709                                 continue;
 710                         }
 711                 }
 712                 if(c == _T('{') && readingType==KDR_English) {
 713                         /* English meaning detected
 714                            Special handling is needed to take care of spaces, though.
 715                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 716                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 717                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 718                         }
 719                         if(result.length()>0) result.append(_T(", "));
 720                         result.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 721                 }
 722                 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
 723         }
 724
 725         return result;
 726 }