kanjidic.cpp

   1 /*
   2 Project: J-Ben
   3 Author:  Paul Goins
   4 Website: http://www.vultaire.net/software/jben/
   5 License: GNU General Public License (GPL) version 2
   6          (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
   7
   8 File: kanjidic.cpp
   9
  10 This program is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2 of the License, or
  13 (at your option) any later version.
  14
  15 This program is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with this program.  If not, see <http://www.gnu.org/licenses/>
  22 */
  23
  24 #include "kanjidic.h"
  25 #include "file_utils.h"
  26 #include "jutils.h"
  27 #include "wx/tokenzr.h"
  28 #include "wx/file.h"
  29 #include "global.h"
  30 #include <fstream>
  31 using namespace std;
  32
  33 KanjiDic *KanjiDic::LoadKanjiDic(const char *filename, int& returnCode) {
  34         KanjiDic *k=NULL;
  35         char *rawData = NULL;
  36         unsigned int size;
  37
  38         ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
  39         if(ifile) {
  40                 size = ifile.tellg();
  41                 ifile.seekg(0);
  42                 rawData = new char[size+1];
  43                 rawData[size] = '\0';
  44                 ifile.read(rawData, size);
  45 #ifdef DEBUG
  46                 if(strlen(rawData)!=size)
  47                         fprintf(stderr,
  48                           "WARNING: kanjidic file size: %d, read-in string: %d\n",
  49                           strlen(rawData),
  50                           size);
  51 #endif
  52
  53                 /* Create the kanjidic object with our string data. */
  54                 k = new KanjiDic(rawData);
  55
  56                 returnCode = KD_SUCCESS;
  57         }
  58         else
  59                 returnCode = KD_FAILURE;
  60
  61         if(rawData) delete[] rawData;
  62         return k;
  63 }
  64
  65 /* This could be sped up: copy the first UTF-8 character into a string, then
  66    run a conversion on that.  Trivial though. */
  67 KanjiDic::KanjiDic(char *kanjidicRawData) {
  68         char *token = strtok(kanjidicRawData, "\n");
  69         wxString wxToken;
  70         while(token) {
  71                 if( (strlen(token)>0) && (token[0]!='#') ) {
  72                         UTF8ToWx(token, wxToken);
  73                         /* Convert token to proper format */
  74                         wxToken = ConvertKanjidicEntry(wxToken);
  75                         /* Add to hash table */
  76                         if(!kanjiHash.assign(wxToken[0], token)) {
  77 #ifdef DEBUG
  78                                 fprintf(stderr,
  79                                         "Error assigning (%lc, %ls) to hash table!\n",
  80                                         wxToken[0], wxToken.c_str());
  81 #endif
  82                         }
  83                 }
  84                 token = strtok(NULL, "\n");
  85         }
  86 }
  87
  88 KanjiDic::~KanjiDic() {
  89         /* Currently: nothing here. */
  90 }
  91
  92 /* This function returns a wxString containing the desired line of the
  93    kanjidic hash table.  A conversion from string to wxString is included
  94    in this call since strings are only used for more compressed internal
  95    storage.  This is followed by a slight reformatting of the string for
  96    better presentation. */
  97 wxString KanjiDic::GetKanjidicStr(wxChar c) {
  98         BoostHM<wxChar,string>::iterator it = kanjiHash.find(c);
  99         if(it==kanjiHash.end()) return _T("");
 100         wxString s;
 101         UTF8ToWx(it->second, s);
 102         return ConvertKanjidicEntry(s);
 103 }
 104
 105 /*
 106  * Performs transformations on a KANJIDIC string for our internal usage.
 107  * Currently, this includes the following:
 108  * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
 109  * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
 110  */
 111 wxString KanjiDic::ConvertKanjidicEntry(const wxString& s) {
 112         size_t index, lastIndex;
 113         wxString temp = s;
 114
 115         /* First conversion: あ.いう to あ(いう) */
 116         index = temp.find(_T('.'), 0);
 117         while(index!=wxString::npos) {
 118                 /* Proceed if the character preceding the "." is hiragana/katakana. */
 119                 if(IsFurigana(temp[index-1])) {
 120                         temp[index] = _T('(');
 121                         index = temp.find(_T(' '), index+1);
 122                         if(index==wxString::npos) {
 123                                 temp.append(_T(')'));
 124                                 break;
 125                         } else
 126                                 temp.insert(index, _T(')'));
 127                 }
 128                 lastIndex = index;
 129                 index = temp.find(_T('.'), lastIndex+1);
 130         }
 131
 132         /* Second conversion: - to 〜, when a neighboring character is hiragana/katakana */
 133         index = temp.find(_T('-'), 0);
 134         while(index!=wxString::npos) {
 135                 /* Proceed if the character before or after the "-" is hiragana/katakana. */
 136                 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
 137                         temp[index]=_T('〜');
 138
 139                 lastIndex = index;
 140                 index = temp.find(_T('-'), lastIndex+1);
 141         }
 142
 143         /* Return the converted string */
 144         return temp;
 145 }
 146
 147 wxString KanjiDic::KanjidicToHtml(const wxString& kanjidicStr) {
 148         return KanjidicToHtml(kanjidicStr, prefs->kanjidicOptions, prefs->kanjidicDictionaries);
 149 }
 150
 151 wxString KanjiDic::KanjidicToHtml(const wxString& kanjidicStr, long options, long dictionaries) {
 152 /*      return wxString(_T("<p>"))
 153                 .append(s[0])
 154                 .append(_T("</p>"));*/
 155
 156         wxString result;
 157
 158         wxString header, onyomi, kunyomi, nanori, radicalReading, english;
 159         wxString dictionaryInfo;
 160         wxString lowRelevance;
 161         wxString unhandled;
 162         long grade = -1, frequency = -1, tmode = 0;
 163         wxString strokes;
 164         wxString koreanRomanization, pinyinRomanization, crossReferences, miscodes;
 165         wxString sTemp, token;
 166         wxStringTokenizer t(kanjidicStr, _T(' '));
 167         wxChar c, c2;
 168
 169         /* Special processing for the first 2 entries of the line. */
 170         if(t.CountTokens()>1) {
 171                 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
 172                 /*header.append(_T("<p style=\"font-size:32pt\">")) */
 173                 header.append(_T("<p><font size=\"7\">"))
 174                         .append(t.GetNextToken())
 175                         .append(_T("</font></p>"));
 176                 lowRelevance.append(_T("<li>JIS code: 0x"))
 177                         .append(t.GetNextToken())
 178                         .append(_T("</li>"));
 179         }
 180
 181         /* NEW!  Temporary code for loading in SODs and SODAs from KanjiCafe! */
 182         if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
 183                 wxCSConv transcoder(_T("utf-8"));
 184                 if(transcoder.IsOk()) {
 185                         string utfStr;
 186                         /* Get a UTF8-encoded string for the kanji. */
 187                         WxToUTF8(kanjidicStr[0], utfStr);
 188                         sTemp.clear();
 189
 190                         /* Convert to a low-to-high-byte hex string. */
 191                         for(unsigned int i=0;i<utfStr.length();i++) {
 192                                 sTemp.Append(
 193                                         wxString::Format(_T("%02x"),
 194                                                                          (unsigned char)utfStr[i]));
 195                         }
 196
 197                         wxString sod;
 198                         /* Load static SOD, if present */
 199                         if((options & KDO_SOD_STATIC) != 0) {
 200                                 wxFileName fn;
 201                                 fn.AppendDir(_T("sods"));
 202                                 fn.AppendDir(_T("sod-utf8-hex"));
 203                                 fn.SetName(sTemp);
 204                                 fn.SetExt(_T("png"));
 205 #ifdef DEBUG
 206                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 207 #endif
 208                                 if(wxFile::Exists(
 209                                         fn.GetFullPath()
 210                                 )) {
 211                                         sod.append(wxString::Format(
 212                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 213                                         ));
 214                                 }
 215                         }
 216                         /* Load animated SOD, if present */
 217                         if((options & KDO_SOD_ANIM) != 0) {
 218                                 wxFileName fn;
 219                                 fn.AppendDir(_T("sods"));
 220                                 fn.AppendDir(_T("soda-utf8-hex"));
 221                                 fn.SetName(sTemp);
 222                                 fn.SetExt(_T("gif"));
 223 #ifdef DEBUG
 224                                 printf("DEBUG: Checking for existance of file \"%ls\"...\n", fn.GetFullPath().c_str());
 225 #endif
 226                                 if(wxFile::Exists(
 227                                         fn.GetFullPath()
 228                                 )) {
 229                                         if(sod.length()>0) sod.append(_T("<br />"));
 230                                         sod.append(wxString::Format(
 231                                                 _T("<img src=\"%s\" />"), fn.GetFullPath().c_str()
 232                                         ));
 233                                 }
 234                         }
 235                         /* Append the chart(s) in a paragraph object. */
 236                         if(sod.length()>0) {
 237                                 header.append(wxString::Format(
 238                                         _T("<p>%s<br /><font size=\"1\">(Kanji stroke order graphics used under license from KanjiCafe.com.)</font></p>"), sod.c_str()
 239                                 ));
 240                         }
 241                 } else {
 242                         fprintf(stderr, "[%s:%d]: Bad transcoder selected!\n", __FILE__, __LINE__);
 243                 }
 244         }
 245         /* END OF EXPERIMENTAL NEW CODE */
 246
 247         while(t.HasMoreTokens()) {
 248                 token = t.GetNextToken();
 249                 sTemp = token;
 250                 c = sTemp[0];
 251                 /* If a preceding character is detected, strip it */
 252                 if(c == _T('(') || c == _T('〜')) {
 253                         sTemp = sTemp.substr(1);
 254                         c = sTemp[0];
 255                 }
 256                 if(tmode==0) {
 257                         if(IsKatakana(c)) {
 258                                 /* Onyomi reading detected */
 259                                 /*if(onyomi.length()>0) onyomi.append(_T("　")); */
 260                                 if(onyomi.length()>0) onyomi.append(_T("&nbsp; "));
 261                                 onyomi.append(token);   /* Copy the original string, including ()'s and 〜's */
 262                                 continue;
 263                         }
 264                         else if(IsHiragana(c)) {
 265                                 /* Kunyomi reading detected */
 266                                 if(kunyomi.length()>0) kunyomi.append(_T("&nbsp; "));
 267                                 kunyomi.append(token);  /* Copy the original string, including ()'s and 〜's */
 268                                 continue;
 269                         }
 270                 } else if(tmode==1) {
 271                         if(IsFurigana(c)) {
 272                                 /* Nanori reading detected */
 273                                 if(nanori.length()>0) nanori.append(_T("&nbsp; "));
 274                                 nanori.append(token);   /* Copy the original string, including ()'s and 〜's */
 275                                 continue;
 276                         }
 277                 } else if(tmode==2) {
 278                         if(IsFurigana(c)) {
 279                                 /* Special radical reading detected */
 280                                 if(radicalReading.length()>0) radicalReading.append(_T("&nbsp; "));
 281                                 radicalReading.append(token);
 282                                 continue;
 283                         }
 284                 }
 285                 if(c == _T('{')) {
 286                         /* English meaning detected
 287                            Special handling is needed to take care of spaces, though.
 288                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 289                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 290                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 291                         }
 292                         if(english.length()>0) english.append(_T(", "));
 293                         english.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 294                 }
 295                 else {
 296                         switch(c) {
 297                         case _T('T'):  /* Change "t mode" */
 298                                 /* Note: substr() returns type wxStringBase, which disallows access to wxString::ToLong.
 299                                    So, by making a copy of wxString and performing the conversion in the copy, we get around this.
 300                                    This ugly kludge is repeated twice below for frequency and grade level. */
 301                                 wxString(sTemp.substr(1)).ToLong(&tmode);
 302 #ifdef DEBUG
 303                                 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
 304 #endif
 305                                 break;
 306                         case _T('B'):  /* Bushu radical */
 307                                 lowRelevance.append(_T("<li>Bushu radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 308                                 break;
 309                         case _T('C'):  /* Classical radical */
 310                                 lowRelevance.append(_T("<li>Classical radical: ")).append(sTemp.substr(1)).append(_T("</li>"));
 311                                 break;
 312                         case _T('F'):  /* Frequency */
 313                                 wxString(sTemp.substr(1)).ToLong(&frequency);
 314                                 break;
 315                         case _T('G'):  /* Grade level */
 316                                 wxString(sTemp.substr(1)).ToLong(&grade);
 317                                 break;
 318                         case _T('S'):  /* Stroke count */
 319                                 if(strokes.length()==0) {
 320                                         strokes = sTemp.substr(1);
 321                                 } else if(!strokes.find(_T(' '))!=wxString::npos) {
 322                                         strokes.append(_T(" (Miscounts: "))
 323                                                 .append(sTemp.substr(1))
 324                                                 .append(_T(")"));
 325                                 } else {
 326                                         strokes = strokes.substr(0, strokes.length()-1)
 327                                                 .append(_T(", "))
 328                                                 .append(sTemp.substr(1))
 329                                                 .append(_T(")"));
 330                                 }
 331                                 break;
 332                         case _T('U'):  /* Unicode value */
 333                                 lowRelevance.append(_T("<li>Unicode: 0x")).append(sTemp.substr(1)).append(_T("</li>"));
 334                                 break;
 335                         /* From here, it's all dictionary codes */
 336                         case _T('H'):
 337                                 if((dictionaries & KDD_NJECD)!=0)
 338                                         dictionaryInfo.append(_T("<li>New Japanese-English Character Dictionary (Halpern): "))
 339                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 340                                 break;
 341                         case _T('N'):
 342                                 if((dictionaries & KDD_MRJECD)!=0)
 343                                         dictionaryInfo.append(_T("<li>Modern Reader's Japanese-English Character Dictionary (Nelson): "))
 344                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 345                                 break;
 346                         case _T('V'):
 347                                 if((dictionaries & KDD_NNJECD)!=0)
 348                                         dictionaryInfo.append(_T("<li>The New Nelson's Japanese-English Character Dictionary: "))
 349                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 350                                 break;
 351                         case _T('P'):
 352                                 /* SKIP codes. */
 353                                 /* This is a thorny issue.  If we want to include a stock KANJIDIC, then we */
 354                                 /* need to add encryption to the file and prevent copy/pasting of that data. */
 355                                 /* I'll comply later on, but for now I'll use a stripped KANJIDIC. */
 356 #ifdef USE_SKIP
 357                                 if((dictionaries & KDD_SKIP)!=0)
 358                                         dictionaryInfo.append(_T("<li>SKIP code: "))
 359                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 360 #endif
 361                                 break;
 362                         case _T('I'):  /* Spahn/Hadamitzky dictionaries */
 363                                 if(sTemp[1]==_T('N')) {
 364                                         if((dictionaries & KDD_KK)!=0) {
 365                                                 dictionaryInfo.append(_T("<li>Kanji & Kana (Spahn, Hadamitzky): "))
 366                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 367                                         }
 368                                 } else {
 369                                         if((dictionaries & KDD_KD)!=0) {
 370                                                 dictionaryInfo.append(_T("<li>Kanji Dictionary (Spahn, Hadamitzky): "))
 371                                                         .append(sTemp.substr(1)).append(_T("</li>"));
 372                                         }
 373                                 }
 374                                 break;
 375                         case _T('Q'):
 376                                 if((dictionaries & KDD_FC)!=0) {
 377                                         dictionaryInfo.append(_T("<li>Four Corner code: "))
 378                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 379                                 }
 380                                 break;
 381                         case _T('M'):
 382                                 c2 = sTemp[1];
 383                                 if(c2==_T('N')) {
 384                                         if((dictionaries & KDD_MOROI)!=0) {
 385                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Index: "))
 386                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 387                                         }
 388                                 } else if(c2==_T('P')) {
 389                                         if((dictionaries & KDD_MOROVP)!=0) {
 390                                                 dictionaryInfo.append(_T("<li>Morohashi Daikanwajiten Volume/Page: "))
 391                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 392                                         }
 393                                 }
 394                                 break;
 395                         case _T('E'):
 396                                 if((dictionaries & KDD_GRJC)!=0) {
 397                                         dictionaryInfo.append(_T("<li>A Guide to Remembering Japanese Characters (Henshal): "))
 398                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 399                                 }
 400                                 break;
 401                         case _T('K'):
 402                                 if((dictionaries & KDD_GKD)!=0) {
 403                                         dictionaryInfo.append(_T("<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): "))
 404                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 405                                 }
 406                                 break;
 407                         case _T('L'):
 408                                 if((dictionaries & KDD_RTK)!=0) {
 409                                         dictionaryInfo.append(_T("<li>Remembering the Kanji (Heisig): "))
 410                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 411                                 }
 412                                 break;
 413                         case _T('O'):
 414                                 if((dictionaries & KDD_JN)!=0) {
 415                                         dictionaryInfo.append(_T("<li>Japanese Names (O'Neill): "))
 416                                                 .append(sTemp.substr(1)).append(_T("</li>"));
 417                                 }
 418                                 break;
 419                         case _T('D'):
 420                                 c2 = sTemp[1];
 421                                 switch(c2) {
 422                                 case _T('B'):
 423                                         if((dictionaries & KDD_JBP)!=0) {
 424                                                 dictionaryInfo.append(_T("<li>Japanese for Busy People (AJLT): "))
 425                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 426                                         }
 427                                         break;
 428                                 case _T('C'):
 429                                         if((dictionaries & KDD_KWJLP)!=0) {
 430                                                 dictionaryInfo.append(_T("<li>The Kanji Way to Japanese Language Power (Crowley): "))
 431                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 432                                         }
 433                                         break;
 434                                 case _T('F'):
 435                                         if((dictionaries & KDD_JKF)!=0) {
 436                                                 dictionaryInfo.append(_T("<li>Japanese Kanji Flashcards (White Rabbit Press): "))
 437                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 438                                         }
 439                                         break;
 440                                 case _T('G'):
 441                                         if((dictionaries & KDD_KCKG)!=0) {
 442                                                 dictionaryInfo.append(_T("<li>Kodansha Compact Kanji Guide: "))
 443                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 444                                         }
 445                                         break;
 446                                 case _T('H'):
 447                                         if((dictionaries & KDD_GTRWJH)!=0) {
 448                                                 dictionaryInfo.append(_T("<li>A Guide To Reading and Writing Japanese (Hensall): "))
 449                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 450                                         }
 451                                         break;
 452                                 case _T('J'):
 453                                         if((dictionaries & KDD_KIC)!=0) {
 454                                                 dictionaryInfo.append(_T("<li>Kanji in Context (Nishiguchi and Kono): "))
 455                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 456                                         }
 457                                         break;
 458                                 case _T('K'):
 459                                         if((dictionaries & KDD_KLD)!=0) {
 460                                                 dictionaryInfo.append(_T("<li>Kanji Learner's Dictionary (Halpern): "))
 461                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 462                                         }
 463                                         break;
 464                                 case _T('O'):
 465                                         if((dictionaries & KDD_EK)!=0) {
 466                                                 dictionaryInfo.append(_T("<li>Essential Kanji (O'Neill): "))
 467                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 468                                         }
 469                                         break;
 470                                 case _T('R'):
 471                                         if((dictionaries & KDD_DR)!=0) {
 472                                                 dictionaryInfo.append(_T("<li>2001 Kanji (De Roo): "))
 473                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 474                                         }
 475                                         break;
 476                                 case _T('S'):
 477                                         if((dictionaries & KDD_GTRWJS)!=0) {
 478                                                 dictionaryInfo.append(_T("<li>A Guide to Reading and Writing Japanese (Sakade): "))
 479                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 480                                         }
 481                                         break;
 482                                 case _T('T'):
 483                                         if((dictionaries & KDD_TKC)!=0) {
 484                                                 dictionaryInfo.append(_T("<li>Tuttle Kanji Cards (Kask): "))
 485                                                         .append(sTemp.substr(2)).append(_T("</li>"));
 486                                         }
 487                                         break;
 488                                 default:
 489                                         if(unhandled.length()>0) unhandled.append(_T(" "));
 490                                         unhandled.append(sTemp);
 491                                         break;
 492                                 }
 493                                 break;
 494                         /* Crossreferences and miscodes */
 495                         case _T('X'):
 496                                 if(crossReferences.length()>0) crossReferences.append(_T(", "));
 497                                 crossReferences.append(sTemp.substr(1));
 498                                 break;
 499                         case _T('Z'):
 500                                 if(miscodes.length()>0) miscodes.append(_T(", "));
 501                                 miscodes.append(sTemp.substr(1));
 502                                 break;
 503                         /* Korean/Pinyin (Chinese) romanization */
 504                         case _T('W'):
 505                                 if(koreanRomanization.length()>0) koreanRomanization.append(_T(", "));
 506                                 koreanRomanization.append(sTemp.substr(1));
 507                                 break;
 508                         case _T('Y'):
 509                                 if(pinyinRomanization.length()>0) pinyinRomanization.append(_T(", "));
 510                                 pinyinRomanization.append(sTemp.substr(1));
 511                                 break;
 512                         default:
 513                                 if(unhandled.length()>0) unhandled.append(_T(" "));
 514                                 unhandled.append(sTemp);
 515                                 break;
 516                         }
 517                 }
 518         } /* while(t.HasMoreTokens()) */
 519
 520         if(header.length() > 0) result.append(header);
 521 #ifdef DEBUG
 522         printf("DEBUG: header=[%ls]\n", header.c_str());
 523 #endif
 524         result.append(_T("<ul>"));
 525         if((options & KDO_READINGS) != 0) {
 526                 if(onyomi.length() > 0) result.append(_T("<li>Onyomi Readings: ")).append(onyomi).append(_T("</li>"));
 527                 if(kunyomi.length() > 0) result.append(_T("<li>Kunyomi Readings: ")).append(kunyomi).append(_T("</li>"));
 528                 if(nanori.length() > 0) result.append(_T("<li>Nanori Readings: ")).append(nanori).append(_T("</li>"));
 529                 if(radicalReading.length() > 0) result.append(_T("<li>Special Radical Reading: ")).append(radicalReading).append(_T("</li>"));
 530         }
 531         if((options & KDO_MEANINGS) != 0) {
 532                 if(english.length() > 0) result.append(_T("<li>English Meanings: ")).append(english).append(_T("</li>"));
 533         }
 534         if((options & KDO_HIGHIMPORTANCE) != 0) {
 535                 if(strokes.length() > 0)
 536                         result.append(_T("<li>Stroke count: ")).append(strokes).append(_T("</li>"));
 537                 else
 538                         result.append(_T("<li>Stroke count: not specified in KANJIDIC"));
 539                 result.append(_T("<li>Grade Level: "));
 540                 if(grade<=6 && grade >= 1) {  /* Jouyou (Grade #) */
 541                         result.append(_T("Jouyou (Grade "))
 542                                 .append(wxString::Format(_T("%d"), (int)grade))
 543                                 .append(_T(")"));
 544                 } else if(grade==8) {  /* Jouyou (General usage) */
 545                         result.append(_T("Jouyou (General usage)"));
 546                 } else if(grade==9) {  /* Jinmeiyou (Characters for names) */
 547                         result.append(_T("Jinmeiyou (Characters for names)"));
 548                 } else if(grade==-1) {  /* No flag specified in kanjidic string */
 549                         result.append(_T("Unspecified"));
 550                 } else {
 551                         result.append(_T("Unhandled grade level (Grade "))
 552                                 .append(wxString::Format(_T("%d"), (int)grade))
 553                                 .append(_T(")"));
 554                 }
 555                 result.append(_T("</li>"));
 556                 if(frequency!=-1)
 557                         result.append(_T("<li>Frequency Ranking: "))
 558                                 .append(wxString::Format(_T("%d"), (int)frequency))
 559                                 .append(_T("</li>"));
 560                 else result.append(_T("<li>Frequency Ranking: Unspecified</li>"));
 561         }
 562         if((options & KDO_DICTIONARIES) != 0) {
 563                 if(dictionaryInfo.length()>0) result.append(_T("<li>Dictionary Codes:<ul>")).append(dictionaryInfo).append(_T("</ul></li>"));
 564         }
 565         if((options & KDO_VOCABCROSSREF) != 0) {
 566                 vector<wxString> *vList = &(jben->vocabList->GetVocabList());
 567                 wxChar thisKanji = kanjidicStr[0];
 568                 vector<wxString> crossRefList;
 569                 vector<wxString>::iterator vIt;
 570                 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
 571                         if(vIt->find(thisKanji)!=wxString::npos) {
 572                                 crossRefList.push_back(*vIt);
 573                         }
 574                 }
 575                 if(crossRefList.size()>0) {
 576                         result.append(_T("<li>This kanji is used by words in your study list:<br><font size=\"7\">"));
 577                         vIt = crossRefList.begin();
 578                         result.append(*vIt);
 579                         for(++vIt; vIt!=crossRefList.end(); vIt++) {
 580                                 result.append(_T("&nbsp; ")).append(*vIt);
 581                         }
 582                         result.append(_T("</font></li>"));
 583                 }
 584         }
 585         if((options & KDO_LOWIMPORTANCE) != 0) {
 586                 if(koreanRomanization.length()>0) lowRelevance.append(_T("<li>Korean romanization: ")).append(koreanRomanization).append(_T("</li>"));
 587                 if(pinyinRomanization.length()>0) lowRelevance.append(_T("<li>Pinyin romanization: ")).append(pinyinRomanization).append(_T("</li>"));
 588                 if(crossReferences.length()>0) lowRelevance.append(_T("<li>Cross reference codes: ")).append(crossReferences).append(_T("</li>"));
 589                 if(miscodes.length()>0) lowRelevance.append(_T("<li>Miscodes: ")).append(miscodes).append(_T("</li>"));
 590                 if(lowRelevance.length()>0) result.append(_T("<li>Extra Information:<ul>")).append(lowRelevance).append(_T("</ul></li>"));
 591         }
 592         if((options & KDO_UNHANDLED) != 0) {
 593                 if(unhandled.length()>0) result.append(_T("<li>Unhandled: ")).append(unhandled).append(_T("</li>"));
 594         }
 595         result.append(_T("</ul>"));
 596
 597         return result;
 598 }
 599
 600 int KanjiDic::GetIntField(wxChar kanji, const wxString& marker) {
 601         wxString markerStr, kanjiEntry, temp;
 602         size_t index=0;
 603         long value=-1;
 604         int markerLen;
 605
 606         markerStr.Printf(_T(" %s"), marker.c_str());
 607         markerLen=markerStr.length();
 608
 609         kanjiEntry = GetKanjidicStr(kanji);
 610         if(kanjiEntry.length()>0) {
 611                 index = kanjiEntry.find(markerStr);
 612                 if(index!=wxString::npos) {
 613                         temp = kanjiEntry.substr(
 614                                 index+markerLen,
 615                                 kanjiEntry.find(_T(" "), index+1) - index - (markerLen-1));
 616                         temp.ToLong(&value);
 617                 }
 618         }
 619
 620         return (int)value;
 621 }
 622
 623 const BoostHM<wxChar,string> *KanjiDic::GetHashTable() {
 624         return &kanjiHash;
 625 }
 626
 627 enum {
 628         KDR_Onyomi=1,
 629         KDR_Kunyomi,
 630         KDR_English
 631 };
 632
 633 wxString KanjiDic::GetOnyomiStr(wxChar c) {
 634         return GetKanjidicReading(c, KDR_Onyomi);
 635 }
 636
 637 wxString KanjiDic::GetKunyomiStr(wxChar c) {
 638         return GetKanjidicReading(c, KDR_Kunyomi);
 639 }
 640
 641 wxString KanjiDic::GetEnglishStr(wxChar c) {
 642         return GetKanjidicReading(c, KDR_English);
 643 }
 644
 645 wxString KanjiDic::GetKanjidicReading(wxChar c, int readingType) {
 646         wxString result;
 647         wxString kanjidicStr = GetKanjidicStr(c);
 648
 649         long tmode = 0;
 650         wxString sTemp, token;
 651         wxStringTokenizer t(kanjidicStr, _T(' '));
 652
 653         /* The first two tokens are guaranteed not to be what we're looking for.  Skip them. */
 654         if(t.CountTokens()>1) {
 655                 t.GetNextToken();
 656                 t.GetNextToken();
 657         }
 658         while(t.HasMoreTokens()) {
 659                 token = t.GetNextToken();
 660                 sTemp = token;
 661                 c = sTemp[0];
 662                 /* If a preceding character is detected, strip it */
 663                 if(c == _T('(') || c == _T('〜')) {
 664                         sTemp = sTemp.substr(1);
 665                         c = sTemp[0];
 666                 }
 667                 if(tmode==0) {
 668                         if(IsKatakana(c) && readingType==KDR_Onyomi) {
 669                                 /* Onyomi reading detected */
 670                                 if(result.length()>0) result.append(_T("  "));
 671                                 result.append(token);   /* Copy the original string, including ()'s and 〜's */
 672                                 continue;
 673                         }
 674                         else if(IsHiragana(c) && readingType==KDR_Kunyomi) {
 675                                 /* Kunyomi reading detected */
 676                                 if(result.length()>0) result.append(_T("  "));
 677                                 result.append(token);  /* Copy the original string, including ()'s and 〜's */
 678                                 continue;
 679                         }
 680                 }
 681                 if(c == _T('{') && readingType==KDR_English) {
 682                         /* English meaning detected
 683                            Special handling is needed to take care of spaces, though.
 684                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 685                         while(t.HasMoreTokens() && sTemp[sTemp.length()-1] != _T('}')) {
 686                                 sTemp.append(_T(" ")).append(t.GetNextToken());
 687                         }
 688                         if(result.length()>0) result.append(_T(", "));
 689                         result.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 690                 }
 691                 else if(c==_T('T')) wxString(sTemp.substr(1)).ToLong(&tmode);
 692         }
 693
 694         return result;
 695 }