kanjidic.cpp

   1 /*
   2 Project: J-Ben
   3 Author:  Paul Goins
   4 Website: http://www.vultaire.net/software/jben/
   5 License: GNU General Public License (GPL) version 2
   6          (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
   7
   8 File: kanjidic.cpp
   9
  10 This program is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2 of the License, or
  13 (at your option) any later version.
  14
  15 This program is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with this program.  If not, see <http://www.gnu.org/licenses/>
  22 */
  23
  24 #include "kanjidic.h"
  25 #include "file_utils.h"
  26 #include "jutils.h"
  27 #include "wx/tokenzr.h"
  28 #include "wx/file.h"
  29 #include "global.h"
  30 #include <fstream>
  31 using namespace std;
  32
  33 Kanjidic *Kanjidic::LoadKanjidic(const char *filename, int& returnCode) {
  34         Kanjidic *k=NULL;
  35         char *rawData = NULL;
  36         unsigned int size;
  37
  38         ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
  39         if(ifile) {
  40                 size = ifile.tellg();
  41                 ifile.seekg(0);
  42                 rawData = new char[size+1];
  43                 rawData[size] = '\0';
  44                 ifile.read(rawData, size);
  45 #ifdef DEBUG
  46                 if(strlen(rawData)!=size)
  47                         fprintf(stderr,
  48                           "WARNING: kanjidic file size: %d, read-in string: %d\n",
  49                           strlen(rawData),
  50                           size);
  51 #endif
  52
  53                 /* Create the kanjidic object with our string data. */
  54                 k = new Kanjidic(rawData);
  55
  56                 returnCode = KD_SUCCESS;
  57         }
  58         else
  59                 returnCode = KD_FAILURE;
  60
  61         if(rawData) delete[] rawData;
  62         return k;
  63 }
  64
  65 /* This could be sped up: copy the first UTF-8 character into a string, then
  66    run a conversion on that.  Trivial though. */
  67 Kanjidic::Kanjidic(char *kanjidicRawData) {
  68         char *token = strtok(kanjidicRawData, "\n");
  69         wxString wxToken;
  70         while(token) {
  71                 if( (strlen(token)>0) && (token[0]!='#') ) {
  72                         UTF8ToWx(token, wxToken);
  73                         /* Convert token to proper format */
  74                         wxToken = ConvertKanjidicEntry(wxToken);
  75                         /* Add to hash table */
  76                         if(!kanjiHash.assign(wxToken[0], token)) {
  77 #ifdef DEBUG
  78                                 fprintf(stderr,
  79                                         "Error assigning (%lc, %ls) to hash table!\n",
  80                                         wxToken[0], wxToken.c_str());
  81 #endif
  82                         }
  83                 }
  84                 token = strtok(NULL, "\n");
  85         }
  86 }
  87
  88 Kanjidic::~Kanjidic() {
  89         /* Currently: nothing here. */
  90 }
  91
  92 /* This function returns a wxString containing the desired line of the
  93    kanjidic hash table.  A conversion from string to wxString is included
  94    in this call since strings are only used for more compressed internal
  95    storage.  This is followed by a slight reformatting of the string for
  96    better presentation. */
  97 wxString Kanjidic::GetKanjidicStr(wxChar c) const {
  98         BoostHM<wxChar,string>::iterator it = kanjiHash.find(c);
  99         if(it==kanjiHash.end()) return _T("");
 100         wxString s;
 101         UTF8ToWx(it->second, s);
 102         return ConvertKanjidicEntry(s);
 103 }
 104
 105 /*
 106  * Performs transformations on a KANJIDIC string for our internal usage.
 107  * Currently, this includes the following:
 108  * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
 109  * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
 110  */
 111 wxString Kanjidic::ConvertKanjidicEntry(const wxString& s) {
 112         size_t index, lastIndex;
 113         wxString temp = s;
 114
 115         /* First conversion: あ.いう to あ(いう) */
 116         index = temp.find(_T('.'), 0);
 117         while(index!=wxString::npos) {
 118                 /* Proceed if the character preceding the "." is hiragana/katakana. */
 119                 if(IsFurigana(temp[index-1])) {
 120                         temp[index] = _T('(');
 121                         index = temp.find(_T(' '), index+1);
 122                         if(index==wxString::npos) {
 123                                 temp.append(_T(')'));
 124                                 break;
 125                         } else
 126                                 temp.insert(index, _T(')'));
 127                 }
 128                 lastIndex = index;
 129                 index = temp.find(_T('.'), lastIndex+1);
 130         }
 131
 132         /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
 133         index = temp.find(_T('-'), 0);
 134         while(index!=wxString::npos) {
 135                 /* Proceed if the character before or after the "-" is hiragana/katakana. */
 136                 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
 137                         temp[index]=_T('〜');
 138
 139                 lastIndex = index;
 140                 index = temp.find(_T('-'), lastIndex+1);
 141         }
 142
 143         /* Return the converted string */
 144         return temp;
 145 }
 146
 147 wxString Kanjidic::KanjidicToHtml(const wxString& kanjidicStr) {
 148         return KanjidicToHtml(kanjidicStr,
 149                                                   jben->prefs->kanjidicOptions,
 150                                                   jben->prefs->kanjidicDictionaries);
 151 }
 152
 153 wxString Kanjidic::KanjidicToHtml(const wxString& kanjidicStr,
 154                                                                                  long options, long dictionaries) {
 155 /*      return wxString(_T("<p>"))
 156                 .append(s[0])
 157                 .append(_T("</p>"));*/
 158
 159         wxString result;
 160
 161         wxString header, onyomi, kunyomi, nanori, radicalReading, english;
 162         wxString dictionaryInfo;
 163         wxString lowRelevance;
 164         wxString unhandled;
 165         long grade = -1, frequency = -1, tmode = 0;
 166         wxString strokes;
 167         wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
 168         wxString sTemp, token;
 169         wxStringTokenizer t(kanjidicStr, _T(' '));
 170         wxChar c, c2;
 171
 172         /* Special processing for the first 2 entries of the line. */
 173         if(t.CountTokens()>1) {
 174                 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
 175                 /*header.append(_T("<p style=\"font-size:32pt\">")) */
 176                 header.append(_T("<p><font size=\"7\">"))
 177                         .append(t.GetNextToken())
 178                         .append(_T("</font></p>"));
 179                 lowRelevance.append(_T("<li>JIS code: 0x"))
 180                         .append(t.GetNextToken())
 181                         .append(_T("</li>"));
 182         }
 183
 184         /* NEW!  Temporary code for loading in SODs and SODAs from KanjiCafe! */
 185         if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
 186                 wxCSConv transcoder(_T("utf-8"));
 187                 if(transcoder.IsOk()) {
 188                         string utfStr;
 189                         /* Get a UTF8-encoded string for the kanji. */
 190                         WxToUTF8(kanjidicStr[0], utfStr);
 191                         sTemp.clear();
 192
 193                         /* Convert to a low-to-high-byte hex string. */
 194                         for(unsigned int i=0;i<utfStr.length();i++) {
 195                                 sTemp.Append(
 196                                         wxString::Format(_T("%02x"),
 197                                                                          (unsigned char)utfStr[i]));
 198                         }
 199
 200                         wxString sod;
 201                         /* Load static SOD, if present */
 202                         if((options & KDO_SOD_STATIC) != 0) {
 203                                 wxFileName fn;
 204                                 fn.AppendDir(_T("sods"));
 205                                 fn.AppendDir(_T("sod-utf8-hex"));
 206                                 fn.SetName(sTemp);
 207                                 fn.SetExt(_T("png"));
 208 #ifdef DEBUG
 209                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 210 #endif
 211                                 if(wxFile::Exists(
 212                                         fn.GetFullPath()
 213                                 )) {
 214                                         sod.append(wxString::Format(
 215                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 216                                         ));
 217                                 }
 218                         }
 219                         /* Load animated SOD, if present */
 220                         if((options & KDO_SOD_ANIM) != 0) {
 221                                 wxFileName fn;
 222                                 fn.AppendDir(_T("sods"));
 223                                 fn.AppendDir(_T("soda-utf8-hex"));
 224                                 fn.SetName(sTemp);
 225                                 fn.SetExt(_T("gif"));
 226 #ifdef DEBUG
 227                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 228 #endif
 229                                 if(wxFile::Exists(
 230                                         fn.GetFullPath()
 231                                 )) {
 232                                         if(sod.length()>0) sod.append(_T("<br />"));
 233                                         sod.append(wxString::Format(
 234                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 235                                         ));
 236                                 }
 237                         }
 238                         /* Append the chart(s) in a paragraph object. */
 239                         if(sod.length()>0) {
 240                                 header.append(wxString::Format(
 241                                         _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
 242                                 ));
 243                         }
 244                 } else {
 245                         fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
 246                 }
 247         }
 248         /* END OF EXPERIMENTAL NEW CODE */
 249
 250         while(t.HasMoreTokens()) {
 251                 token = t.GetNextToken();
 252                 sTemp = token;
 253                 c = sTemp[0];
 254                 /* If a preceding character is detected, strip it */
 255                 if(c == _T('(') || c == _T('〜')) {
 256                         sTemp = sTemp.substr(1);
 257                         c = sTemp[0];
 258                 }
 259                 if(tmode==0) {
 260                         if(IsKatakana(c)) {
 261                                 /* Onyomi reading detected */
 262                                 /*if(onyomi.length()>0) onyomi.append(_T("　")); */
 263                                 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
 264                                 onyomi.append(token);   /* Copy the original string, including ()'s and 〜's */
 265                                 continue;
 266                         }
 267                         else if(IsHiragana(c)) {
 268                                 /* Kunyomi reading detected */
 269                                 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
 270                                 kunyomi.append(token);  /* Copy the original string, including ()'s and 〜's */
 271                                 continue;
 272                         }
 273                 } else if(tmode==1) {
 274                         if(IsFurigana(c)) {
 275                                 /* Nanori reading detected */
 276                                 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
 277                                 nanori.append(token);   /* Copy the original string, including ()'s and 〜's */
 278                                 continue;
 279                         }
 280                 } else if(tmode==2) {
 281                         if(IsFurigana(c)) {
 282                                 /* Special radical reading detected */
 283                                 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
 284                                 radicalReading.append(token);
 285                                 continue;
 286                         }
 287                 }
 288                 if(c == _T('{')) {
 289                         /* English meaning detected
 290                            Special handling is needed to take care of spaces, though.
 291                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 292                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 293                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 294                         }
 295                         if(english.length()>0) english.append(_T(", "));
 296                         english.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 297                 }
 298                 else {
 299                         switch(c) {
 300                         case _T('T'):  /* Change "t mode" */
 301                                 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
 302                                    So, by making a copy of wxString and performing the conversion in the copy, we get around this.
 303                                    This ugly kludge is repeated twice below for frequency and grade level. */
 304                                 wxString(sTemp.substr(1)).ToLong(&tmode);
 305 #ifdef DEBUG
 306                                 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
 307 #endif
 308                                 break;
 309                         case _T('B'):  /* Bushu radical */
 310                                 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 311                                 break;
 312                         case _T('C'):  /* Classical radical */
 313                                 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 314                                 break;
 315                         case _T('F'):  /* Frequency */
 316                                 wxString(sTemp.substr(1)).ToLong(&frequency);
 317                                 break;
 318                         case _T('G'):  /* Grade level */
 319                                 wxString(sTemp.substr(1)).ToLong(&grade);
 320                                 break;
 321                         case _T('S'):  /* Stroke count */
 322                                 if(strokes.length()==0) {
 323                                         strokes = sTemp.substr(1);
 324                                 } else if(!strokes.find(_T(' '))!=wxString::npos) {
 325                                         strokes.append(_T(" (Miscounts: "))
 326                                                 .append(sTemp.substr(1))
 327                                                 .append(_T(")"));
 328                                 } else {
 329                                         strokes = strokes.substr(0, strokes.length()-1)
 330                                                 .append(_T(", "))
 331                                                 .append(sTemp.substr(1))
 332                                                 .append(_T(")"));
 333                                 }
 334                                 break;
 335                         case _T('U'):  /* Unicode value */
 336                                 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
 337                                 break;
 338                         /* From here, it's all dictionary codes */
 339                         case _T('H'):
 340                                 if((dictionaries & KDD_NJECD)!=0)
 341                                         dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
 342                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 343                                 break;
 344                         case _T('N'):
 345                                 if((dictionaries & KDD_MRJECD)!=0)
 346                                         dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
 347                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 348                                 break;
 349                         case _T('V'):
 350                                 if((dictionaries & KDD_NNJECD)!=0)
 351                                         dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
 352                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 353                                 break;
 354                         case _T('P'):
 355                                 /* SKIP codes. */
 356                                 /* This is a thorny issue.  If we want to include a stock KANJIDIC, then we */
 357                                 /* need to add encryption to the file and prevent copy/pasting of that data. */
 358                                 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
 359 #ifdef USE_SKIP
 360                                 if((dictionaries & KDD_SKIP)!=0)
 361                                         dictionaryInfo.append(_T("<li>SKIP code: "))
 362                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 363 #endif
 364                                 break;
 365                         case _T('I'):  /* Spahn/Hadamitzky dictionaries */
 366                                 if(sTemp[1]==_T('N')) {
 367                                         if((dictionaries & KDD_KK)!=0) {
 368                                                 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
 369                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 370                                         }
 371                                 } else {
 372                                         if((dictionaries & KDD_KD)!=0) {
 373                                                 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
 374                                                         .append(sTemp.substr(1)).append(_T("</li>"));
 375                                         }
 376                                 }
 377                                 break;
 378                         case _T('Q'):
 379                                 if((dictionaries & KDD_FC)!=0) {
 380                                         dictionaryInfo.append(_T("<li>Four Corner code: "))
 381                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 382                                 }
 383                                 break;
 384                         case _T('M'):
 385                                 c2 = sTemp[1];
 386                                 if(c2==_T('N')) {
 387                                         if((dictionaries & KDD_MOROI)!=0) {
 388                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
 389                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 390                                         }
 391                                 } else if(c2==_T('P')) {
 392                                         if((dictionaries & KDD_MOROVP)!=0) {
 393                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
 394                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 395                                         }
 396                                 }
 397                                 break;
 398                         case _T('E'):
 399                                 if((dictionaries & KDD_GRJC)!=0) {
 400                                         dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
 401                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 402                                 }
 403                                 break;
 404                         case _T('K'):
 405                                 if((dictionaries & KDD_GKD)!=0) {
 406                                         dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
 407                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 408                                 }
 409                                 break;
 410                         case _T('L'):
 411                                 if((dictionaries & KDD_RTK)!=0) {
 412                                         dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
 413                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 414                                 }
 415                                 break;
 416                         case _T('O'):
 417                                 if((dictionaries & KDD_JN)!=0) {
 418                                         dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
 419                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 420                                 }
 421                                 break;
 422                         case _T('D'):
 423                                 c2 = sTemp[1];
 424                                 switch(c2) {
 425                                 case _T('B'):
 426                                         if((dictionaries & KDD_JBP)!=0) {
 427                                                 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
 428                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 429                                         }
 430                                         break;
 431                                 case _T('C'):
 432                                         if((dictionaries & KDD_KWJLP)!=0) {
 433                                                 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
 434                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 435                                         }
 436                                         break;
 437                                 case _T('F'):
 438                                         if((dictionaries & KDD_JKF)!=0) {
 439                                                 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
 440                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 441                                         }
 442                                         break;
 443                                 case _T('G'):
 444                                         if((dictionaries & KDD_KCKG)!=0) {
 445                                                 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
 446                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 447                                         }
 448                                         break;
 449                                 case _T('H'):
 450                                         if((dictionaries & KDD_GTRWJH)!=0) {
 451                                                 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
 452                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 453                                         }
 454                                         break;
 455                                 case _T('J'):
 456                                         if((dictionaries & KDD_KIC)!=0) {
 457                                                 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
 458                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 459                                         }
 460                                         break;
 461                                 case _T('K'):
 462                                         if((dictionaries & KDD_KLD)!=0) {
 463                                                 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
 464                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 465                                         }
 466                                         break;
 467                                 case _T('O'):
 468                                         if((dictionaries & KDD_EK)!=0) {
 469                                                 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
 470                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 471                                         }
 472                                         break;
 473                                 case _T('R'):
 474                                         if((dictionaries & KDD_DR)!=0) {
 475                                                 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
 476                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 477                                         }
 478                                         break;
 479                                 case _T('S'):
 480                                         if((dictionaries & KDD_GTRWJS)!=0) {
 481                                                 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
 482                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 483                                         }
 484                                         break;
 485                                 case _T('T'):
 486                                         if((dictionaries & KDD_TKC)!=0) {
 487                                                 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
 488                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 489                                         }
 490                                         break;
 491                                 default:
 492                                         if(unhandled.length()>0) unhandled.append(_T(" "));
 493                                         unhandled.append(sTemp);
 494                                         break;
 495                                 }
 496                                 break;
 497                         /* Crossreferences and miscodes */
 498                         case _T('X'):
 499                                 if(crossReferences.length()>0) crossReferences.append(_T(", "));
 500                                 crossReferences.append(sTemp.substr(1));
 501                                 break;
 502                         case _T('Z'):
 503                                 if(miscodes.length()>0) miscodes.append(_T(", "));
 504                                 miscodes.append(sTemp.substr(1));
 505                                 break;
 506                         /* Korean/Pinyin (Chinese) romanization */
 507                         case _T('W'):
 508                                 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
 509                                 koreanRomanization.append(sTemp.substr(1));
 510                                 break;
 511                         case _T('Y'):
 512                                 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
 513                                 pinyinRomanization.append(sTemp.substr(1));
 514                                 break;
 515                         default:
 516                                 if(unhandled.length()>0) unhandled.append(_T(" "));
 517                                 unhandled.append(sTemp);
 518                                 break;
 519                         }
 520                 }
 521         } /* while(t.HasMoreTokens()) */
 522
 523         if(header.length() > 0) result.append(header);
 524 #ifdef DEBUG
 525         printf("DEBUG: header=[%ls]\n", header.c_str());
 526 #endif
 527         result.append(_T("<ul>"));
 528         if((options & KDO_READINGS) != 0) {
 529                 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
 530                 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
 531                 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
 532                 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
 533         }
 534         if((options & KDO_MEANINGS) != 0) {
 535                 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
 536         }
 537         if((options & KDO_HIGHIMPORTANCE) != 0) {
 538                 if(strokes.length() > 0)
 539                         result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
 540                 else
 541                         result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
 542                 result.append(_T("<li>Grade Level: "));
 543                 if(grade<=6 && grade >= 1) {  /* Jouyou (Grade #) */
 544                         result.append(_T("Jouyou (Grade "))
 545                                 .append(wxString::Format(_T("%d"), (int)grade))
 546                                 .append(_T(")"));
 547                 } else if(grade==8) {  /* Jouyou (General usage) */
 548                         result.append(_T("Jouyou (General usage)"));
 549                 } else if(grade==9) {  /* Jinmeiyou (Characters for names) */
 550                         result.append(_T("Jinmeiyou (Characters for names)"));
 551                 } else if(grade==-1) {  /* No flag specified in kanjidic string */
 552                         result.append(_T("Unspecified"));
 553                 } else {
 554                         result.append(_T("Unhandled grade level (Grade "))
 555                                 .append(wxString::Format(_T("%d"), (int)grade))
 556                                 .append(_T(")"));
 557                 }
 558                 result.append(_T("</li>"));
 559                 if(frequency!=-1)
 560                         result.append(_T("<li>Frequency Ranking: "))
 561                                 .append(wxString::Format(_T("%d"), (int)frequency))
 562                                 .append(_T("</li>"));
 563                 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
 564         }
 565         if((options & KDO_DICTIONARIES) != 0) {
 566                 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
 567         }
 568         if((options & KDO_VOCABCROSSREF) != 0) {
 569                 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
 570                 wxChar thisKanji = kanjidicStr[0];
 571                 vector<wxString> crossRefList;
 572                 vector<wxString>::iterator vIt;
 573                 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
 574                         if(vIt->find(thisKanji)!=wxString::npos) {
 575                                 crossRefList.push_back(*vIt);
 576                         }
 577                 }
 578                 if(crossRefList.size()>0) {
 579                         result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
 580                         vIt = crossRefList.begin();
 581                         result.append(*vIt);
 582                         for(++vIt; vIt!=crossRefList.end(); vIt++) {
 583                                 result.append(_T("&nbsp; ")).append(*vIt);
 584                         }
 585                         result.append(_T("</font></li>"));
 586                 }
 587         }
 588         if((options & KDO_LOWIMPORTANCE) != 0) {
 589                 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
 590                 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
 591                 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
 592                 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
 593                 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
 594         }
 595         if((options & KDO_UNHANDLED) != 0) {
 596                 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
 597         }
 598         result.append(_T("</ul>"));
 599
 600         return result;
 601 }
 602
 603 int Kanjidic::GetIntField(wxChar kanji, const wxString& marker) const {
 604         wxString markerStr, kanjiEntry, temp;
 605         size_t index=0;
 606         long value=-1;
 607         int markerLen;
 608
 609         markerStr.Printf(_T(" %s"), marker.c_str());
 610         markerLen=markerStr.length();
 611
 612         kanjiEntry = GetKanjidicStr(kanji);
 613         if(kanjiEntry.length()>0) {
 614                 index = kanjiEntry.find(markerStr);
 615                 if(index!=wxString::npos) {
 616                         temp = kanjiEntry.substr(
 617                                 index+markerLen,
 618                                 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
 619                         temp.ToLong(&value);
 620                 }
 621         }
 622
 623         return (int)value;
 624 }
 625
 626 const BoostHM<wxChar,string>* const Kanjidic::GetHashTable() const {
 627         return &kanjiHash;
 628 }
 629
 630 enum {
 631         KDR_Onyomi=1,
 632         KDR_Kunyomi,
 633         KDR_English
 634 };
 635
 636 wxString Kanjidic::GetOnyomiStr(wxChar c) const {
 637         return GetKanjidicReading(c, KDR_Onyomi);
 638 }
 639
 640 wxString Kanjidic::GetKunyomiStr(wxChar c) const {
 641         return GetKanjidicReading(c, KDR_Kunyomi);
 642 }
 643
 644 wxString Kanjidic::GetEnglishStr(wxChar c) const {
 645         return GetKanjidicReading(c, KDR_English);
 646 }
 647
 648 wxString Kanjidic::GetKanjidicReading(wxChar c, int readingType) const {
 649         wxString result;
 650         wxString kanjidicStr = GetKanjidicStr(c);
 651
 652         long tmode = 0;
 653         wxString sTemp, token;
 654         wxStringTokenizer t(kanjidicStr, _T(' '));
 655
 656         /* The first two tokens are guaranteed not to be what we're looking for.  Skip them. */
 657         if(t.CountTokens()>1) {
 658                 t.GetNextToken();
 659                 t.GetNextToken();
 660         }
 661         while(t.HasMoreTokens()) {
 662                 token = t.GetNextToken();
 663                 sTemp = token;
 664                 c = sTemp[0];
 665                 /* If a preceding character is detected, strip it */
 666                 if(c == _T('(') || c == _T('〜')) {
 667                         sTemp = sTemp.substr(1);
 668                         c = sTemp[0];
 669                 }
 670                 if(tmode==0) {
 671                         if(IsKatakana(c) && readingType==KDR_Onyomi) {
 672                                 /* Onyomi reading detected */
 673                                 if(result.length()>0) result.append(_T("  "));
 674                                 result.append(token);   /* Copy the original string, including ()'s and 〜's */
 675                                 continue;
 676                         }
 677                         else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
 678                                 /* Kunyomi reading detected */
 679                                 if(result.length()>0) result.append(_T("  "));
 680                                 result.append(token);  /* Copy the original string, including ()'s and 〜's */
 681                                 continue;
 682                         }
 683                 }
 684                 if(c == _T('{') && readingType==KDR_English) {
 685                         /* English meaning detected
 686                            Special handling is needed to take care of spaces, though.
 687                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 688                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 689                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 690                         }
 691                         if(result.length()>0) result.append(_T(", "));
 692                         result.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 693                 }
 694                 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
 695         }
 696
 697         return result;
 698 }