kdict.cpp

   1 /*
   2 Project: J-Ben
   3 Author:  Paul Goins
   4 Website: http://www.vultaire.net/software/jben/
   5 License: GNU General Public License (GPL) version 2
   6          (http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt)
   7
   8 File: kanjidic.cpp
   9
  10 This program is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 2 of the License, or
  13 (at your option) any later version.
  14
  15 This program is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with this program.  If not, see <http://www.gnu.org/licenses/>
  22 */
  23
  24 #include "kdict.h"
  25 #include "preferences.h"
  26 #include "encoding_convert.h"
  27 #include "string_utils.h"
  28 #include "file_utils.h"
  29 #include "jutils.h"
  30 #include "errorlog.h"
  31 #include <libxml/xmlreader.h>
  32 #include <iomanip>
  33 #include <fstream>
  34 #include <sstream>
  35 #include <list>
  36 using namespace std;
  37
  38 #ifdef __WXMSW__
  39 #       define FALLBACK_DICTDIR "dicts\\"
  40 #else
  41 #       define FALLBACK_DICTDIR "dicts/"
  42 #endif
  43
  44 KDict* KDict::kdictSingleton = NULL;
  45
  46 KInfo::KInfo() {
  47         radical = radicalNelson = (unsigned char) 0;
  48         grade = strokeCount = freq = 0;
  49 }
  50
  51 const KDict* KDict::Get() {
  52         if(!kdictSingleton)
  53                 kdictSingleton = new KDict;
  54         return kdictSingleton;
  55 }
  56
  57 KDict::KDict() {
  58         Preferences* p = Preferences::Get();
  59         int result;
  60         /* Load KANJIDIC2, if present. */
  61         result = LoadKanjidic2(p->GetSetting("kdict_kanjidic2").c_str());
  62         if(result!=KD_SUCCESS)
  63                 result = LoadKanjidic2(FALLBACK_DICTDIR "kanjidic2.xml");
  64
  65         /* If KANJIDIC2 is not present, load KANJIDIC and/or KANJD212 */
  66         if(result!=KD_SUCCESS) {
  67                 result = LoadKanjidic(p->GetSetting("kdict_kanjidic").c_str());
  68                 if(result!=KD_SUCCESS) LoadKanjidic(FALLBACK_DICTDIR "kanjidic");
  69                 result =
  70                         LoadKanjidic(p->GetSetting("kdict_kanjd212").c_str(), "jis212");
  71                 if(result!=KD_SUCCESS)
  72                         LoadKanjidic(FALLBACK_DICTDIR "kanjd212", "jis212");
  73         }
  74
  75         /* Load supplemental dictionary files */
  76         result = LoadKradfile(p->GetSetting("kdict_kradfile").c_str());
  77         if(result!=KD_SUCCESS) LoadKradfile(FALLBACK_DICTDIR "kradfile");
  78         result = LoadRadkfile(p->GetSetting("kdict_radkfile").c_str());
  79         if(result!=KD_SUCCESS) LoadRadkfile(FALLBACK_DICTDIR "radkfile");
  80 }
  81
  82 void KDict::Destroy() {
  83         if(kdictSingleton) {
  84                 delete kdictSingleton;
  85                 kdictSingleton = NULL;
  86         }
  87 }
  88
  89 int KDict::LoadKanjidic(const char* filename, const char* jisStd) {
  90         char* rawData = NULL;
  91         unsigned int size;
  92         int returnCode=KD_FAILURE;
  93
  94         ifstream ifile(filename, ios::ate); /* "at end" to get our file size */
  95         if(ifile) {
  96                 size = ifile.tellg();
  97                 ifile.seekg(0);
  98                 rawData = new char[size+1];
  99                 rawData[size] = '\0';
 100                 ifile.read(rawData, size);
 101                 if(strlen(rawData)!=size) {
 102                         ostringstream oss;
 103                         oss << ERR_PREF
 104                                 << "kanjidic file size: "
 105                                 << strlen(rawData)
 106                                 << ", read-in string: "
 107                                 << size;
 108                         el.Push(EL_Warning, oss.str());
 109                 }
 110
 111                 /* Create the kanjidic object with our string data. */
 112                 this->KanjidicParser(rawData, jisStd);
 113
 114                 returnCode = KD_SUCCESS;
 115                 el.Push(EL_Silent, string("Kanji dictionary file \"")
 116                                 .append(filename).append("\" loaded successfully."));
 117         }
 118         else
 119                 returnCode = KD_FAILURE;
 120
 121         if(rawData) delete[] rawData;
 122         return returnCode;
 123 }
 124
 125 int KDict::LoadKanjidic2(const char* filename) {
 126         int returnCode = KD_FAILURE;
 127         xmlTextReaderPtr reader;
 128         xmlChar* ptr;
 129         int ret;
 130
 131         /* Vars for navigating through the data */
 132         string element, d1element;
 133         map<string, string> attributes;
 134         map<string, string>::iterator mssi;
 135         int nodeType;
 136         bool isAttribute=false;
 137         /* Var for storing values of the entries */
 138         string sValue;
 139         /* GP vars */
 140         string temp;
 141
 142         reader = xmlNewTextReaderFilename(filename);
 143         KInfo* k=NULL;
 144         if(reader) {
 145                 ret = xmlTextReaderRead(reader);
 146                 while(ret==1) {
 147                         /* Act based on node type */
 148                         nodeType = xmlTextReaderNodeType(reader);
 149                         switch(nodeType) {
 150                         case XML_READER_TYPE_ELEMENT:
 151                                 element = (char*)xmlTextReaderName(reader);
 152                                 if(xmlTextReaderDepth(reader)==1) d1element=element;
 153                                 if(element=="character") {
 154                                         /* Opening of character entry - create new data object */
 155                                         if(k) delete k;
 156                                         k = new KInfo;
 157                                 }
 158                                 attributes.clear();
 159                                 break;
 160                         case XML_READER_TYPE_END_ELEMENT:
 161                                 element = (char*)xmlTextReaderName(reader);
 162                                 if(element=="character") {
 163                                         wchar_t wc = utfconv_mw(k->literal)[0];
 164                                         /* End of character entry: append to data list */
 165                                         if(!kdictData.assign(wc, *k)) {
 166                                                 ostringstream oss;
 167                                                 oss << ERR_PREF
 168                                                         << "Error assigning kanjidic2 entry to hash table!";
 169                                                 el.Push(EL_Error, oss.str());
 170                                         }
 171                                         delete k;
 172                                         k = NULL;
 173                                 }
 174                                 attributes.clear();
 175                                 break;
 176                         case XML_READER_TYPE_ATTRIBUTE:
 177                                 temp = (char*)xmlTextReaderName(reader);
 178                                 ptr = xmlTextReaderValue(reader);
 179                                 attributes[temp] = (char*)ptr;
 180                                 xmlFree(ptr);
 181                                 break;
 182                         case XML_READER_TYPE_TEXT:
 183                                 ptr = xmlTextReaderValue(reader);
 184                                 sValue = (char*)ptr;
 185                                 xmlFree(ptr);
 186
 187                                 if(d1element=="header") {
 188                                         if(element=="file_version") {
 189                                                 if(sValue!="4") {
 190                                                         ostringstream oss;
 191                                                         oss << ERR_PREF
 192                                                                 << "Warning: the KANJIDIC2 reader only"
 193                                                                 " supports KANJIDIC2 version 4!";
 194                                                         el.Push(EL_Warning, oss.str());
 195                                                 }
 196                                         }
 197                                 }
 198                                 if(d1element=="character") {
 199                                         if(!k) {
 200                                                 ostringstream oss;
 201                                                 oss << ERR_PREF << "k is NULL!";
 202                                                 el.Push(EL_Error, oss.str());
 203                                         } else if(element=="literal") {
 204                                                 k->literal = sValue;
 205                                         } else if(element=="cp_value") {
 206                                                 k->codepoint[attributes["cp_type"]] = sValue;
 207                                         } else if(element=="rad_value") {
 208                                                 temp = attributes["rad_type"];
 209                                                 if(temp == "classical")
 210                                                         k->radical
 211                                                                 = (unsigned char)atoi(sValue.c_str());
 212                                                 else if(temp == "nelson_c")
 213                                                         k->radicalNelson
 214                                                                 = (unsigned char)atoi(sValue.c_str());
 215                                                 else {
 216                                                         ostringstream oss;
 217                                                         oss << ERR_PREF
 218                                                                 << "Unhandled radical: "
 219                                                                 << "type=" << temp
 220                                                                 << ", value=[" << sValue
 221                                                                 << "]!";
 222                                                         el.Push(EL_Error, oss.str());
 223                                                 }
 224                                         } else if(element=="grade") {
 225                                                 k->grade = (unsigned char)atoi(sValue.c_str());
 226                                         } else if(element=="stroke_count") {
 227                                                 k->strokeCount = (unsigned char)atoi(sValue.c_str());
 228                                         } else if(element=="variant") {
 229                                                 k->variant[attributes["var_type"]] = sValue;
 230                                         } else if(element=="freq") {
 231                                                 k->freq = atoi(sValue.c_str());
 232                                         } else if(element=="rad_name") {
 233                                                 k->radicalName = sValue;
 234                                         } else if(element=="dic_ref") {
 235                                                 k->dictCode[attributes["dr_type"]] = sValue;
 236                                                 if(attributes["dr_type"]=="moro"
 237                                                    && attributes["m_vol"].length()>0) {
 238                                                         temp = "V";
 239                                                         temp.append(attributes["m_vol"]);
 240                                                         temp.append(1, 'P');
 241                                                         temp.append(attributes["m_page"]);
 242                                                         k->dictCode["moro"].append(temp);
 243                                                 }
 244                                         } else if(element=="q_code") {
 245                                                 if(attributes["qc_type"]=="skip"
 246                                                    && attributes["skip_misclass"].length()>0) {
 247                                                         k->skipMisclass.push_back(
 248                                                                 pair<string,string>(
 249                                                                         attributes["skip_misclass"],
 250                                                                         sValue));
 251                                                 } else {
 252                                                         k->queryCode[attributes["qc_type"]] = sValue;
 253                                                 }
 254                                         } else if(element=="reading") {
 255                                                 temp = attributes["r_type"];
 256                                                 if(temp=="pinyin") {
 257                                                         k->pinyin.push_back(sValue);
 258                                                 } else if(temp=="korean_r") {
 259                                                         k->korean_r.push_back(sValue);
 260                                                 } else if(temp=="korean_h") {
 261                                                         k->korean_h.push_back(sValue);
 262                                                 } else if(temp=="ja_on") {
 263                                                         /* Need to handle r_status and on_type! */
 264                                                         /* Need to convert xx.x to xx(x) notation. */
 265                                                         k->onyomi.push_back(sValue);
 266                                                 } else if(temp=="ja_kun") {
 267                                                         /* Need to handle r_status! */
 268                                                         /* Need to convert xx.x to xx(x) notation. */
 269                                                         k->kunyomi.push_back(sValue);
 270                                                 } else {
 271                                                         ostringstream oss;
 272                                                         oss << ERR_PREF << "Invalid r_type: " << temp;
 273                                                         el.Push(EL_Error, oss.str());
 274                                                 }
 275                                                 /* This section is "to-do" */
 276                                         } else if(element=="meaning") {
 277                                                 temp = attributes["m_lang"];
 278                                                 if(temp.length()==0) temp = "en";
 279                                                 k->meaning[temp].push_back(sValue);;
 280                                         } else if(element=="nanori") {
 281                                                 k->nanori.push_back(sValue);
 282                                         } else {
 283                                                 ostringstream oss;
 284                                                 oss << ERR_PREF << "UNHANDLED element: " << element;
 285                                                 el.Push(EL_Error, oss.str());
 286                                         }
 287                                 }
 288                                 /* default parsing */
 289                                 else {
 290                                         /*cout << "DEBUG: Depth 1 element is " << d1element
 291                                           << ", element is " << element
 292                                           << ", value is " << sValue << endl;*/
 293                                 }
 294                                 break;
 295                         default:
 296                                 /* do nothing */
 297                                 break;
 298                         }
 299
 300                         /* If element has attributes, go to the next attribute if present.
 301                            Otherwise, go to the next element. */
 302                         if(!isAttribute) ret = xmlTextReaderHasAttributes(reader);
 303                         if(isAttribute || ret==1) {
 304                                 ret = xmlTextReaderMoveToNextAttribute(reader);
 305                         }
 306                         /* ret==-1 is an error */
 307                         if(ret==-1) {
 308                                 ostringstream oss;
 309                                 oss << ERR_PREF
 310                                         << "xmlTextReaderMoveToNextAttribute returned an error!";
 311                                 el.Push(EL_Error, oss.str());
 312                         }
 313                         /* If ret==1, an attribute was loaded.
 314                            If not, go to the next element. */
 315                         if(ret==1) {
 316                                 isAttribute=true;
 317                         } else {
 318                                 isAttribute=false;
 319                                 ret = xmlTextReaderRead(reader);
 320                         }
 321                 }
 322                 xmlFreeTextReader(reader);
 323                 if(ret!=0) {
 324                         ostringstream oss;
 325                         oss << ERR_PREF
 326                                 << "Parsing error occurred in " << filename << ".";
 327                         el.Push(EL_Error, oss.str());
 328                 }
 329
 330                 returnCode = KD_SUCCESS;
 331                 el.Push(EL_Silent, string("Kanji dictionary file \"")
 332                                 .append(filename).append("\" loaded successfully."));
 333         } else return returnCode;
 334
 335         if(k) {
 336                 ostringstream oss;
 337                 oss << ERR_PREF << ": k is not NULL!  This shouldn't happen!";
 338                 el.Push(EL_Error, oss.str());
 339                 delete k;
 340                 k = NULL;
 341         }
 342
 343         return returnCode;
 344 }
 345
 346 int KDict::LoadKradfile(const char* filename) {
 347         int returnCode = KD_FAILURE;
 348         stringbuf sb;
 349         ifstream f(filename, ios::in|ios::binary);
 350         if(f.is_open()) {
 351                 f >> &sb;
 352                 f.close();
 353
 354                 list<wstring> data =
 355                         StrTokenize<wchar_t>(utfconv_mw(sb.str()), L"\n");
 356                 while(data.size()>0) {
 357                         wstring token = data.front();
 358                         data.pop_front();
 359                         if(token.length()>0 && token[0]!=L'#') {
 360                                 /* KRADFILE-specific stuff here */
 361                                 /* Get rid of the spaces in the string */
 362                                 token = TextReplace<wchar_t>(token, L" ", L"");
 363                                 /* Now we can easily pull in the data */
 364                                 if(!kradData.assign(token[0], token.substr(2))) {
 365                                         ostringstream oss;
 366                                         oss << ERR_PREF << "KRADFILE: Error assigning ("
 367                                                 << utfconv_wm(token.substr(0,1)) << ", "
 368                                                 << utfconv_wm(token.substr(2)) << ") to hash table!";
 369                                         el.Push(EL_Error, oss.str());
 370                                 }
 371                         }
 372                 }
 373
 374                 returnCode = KD_SUCCESS;
 375                 el.Push(EL_Silent, string("Kanji dictionary file \"")
 376                                 .append(filename).append("\" loaded successfully."));
 377         }
 378         return returnCode;
 379 }
 380
 381 int KDict::LoadRadkfile(const char* filename) {
 382         int returnCode = KD_FAILURE;
 383         stringbuf sb;
 384         ifstream f(filename, ios::in|ios::binary);
 385         if(f.is_open()) {
 386                 f >> &sb;
 387                 f.close();
 388
 389                 /* RADKFILE entries all start with $.
 390                    Split on $, and discard the first entry since it is the explanation
 391                    preceding the first entry. */
 392                 list<wstring> data =
 393                         StrTokenize<wchar_t>(utfconv_mw(sb.str()), L"$");
 394                 data.pop_front();
 395
 396                 while(data.size()>0) {
 397                         wstring entry = data.front();
 398                         data.pop_front();
 399                         if(entry.length()>0 && entry[0]!=L'#') {
 400                                 /* RADKFILE-specific stuff here */
 401                                 list<wstring> entryData =
 402                                         StrTokenize<wchar_t>(entry, L"\n", false, 2);
 403                                 if(entryData.size()!=2) {
 404                                         ostringstream oss;
 405                                         oss << ERR_PREF
 406                                                 << "Error: entryData.size() == " << entryData.size()
 407                                                 << " for entry \"" << utfconv_wm(entry) << "!!";
 408                                         el.Push(EL_Error, oss.str());
 409                                 } else {
 410                                         wchar_t key;
 411                                         int strokeCount;
 412                                         wstring value;
 413                                         /* entryData.front() contains our key.
 414                                            It's a space delimited string,
 415                                            first token is our kanji, second is the stroke count.
 416                                            A third token may be present, but is irrelevant. */
 417                                         list<wstring> keyData =
 418                                                 StrTokenize<wchar_t>(entryData.front(), L" ");
 419                                         wistringstream wiss;
 420                                         wiss.str(keyData.front());
 421                                         wiss >> key;
 422                                         keyData.pop_front();
 423                                         wiss.str(keyData.front());
 424                                         wiss >> strokeCount;
 425
 426                                         /* entryData.back() contains the characters our key
 427                                            maps to. */
 428                                         /* Get rid of the spaces in the string */
 429                                         value = entryData.back();
 430                                         value = TextReplace<wchar_t>(value, L"\n", L"");
 431                                         value = TextReplace<wchar_t>(value, L" ", L"");
 432
 433                                         if(!radkData.assign(key, value)) {
 434                                                 ostringstream oss;
 435                                                 oss << ERR_PREF << "RADKFILE: Error assigning ("
 436                                                         << utfconv_wm(wstring().append(1,key)) << ", "
 437                                                         << utfconv_wm(value) << ") to hash table!";
 438                                                 el.Push(EL_Error, oss.str());
 439                                         }
 440                                         if(!radkDataStrokes.assign(key, strokeCount)) {
 441                                                 ostringstream oss;
 442                                                 oss << ERR_PREF << "RADKFILE: Error assigning ("
 443                                                         << utfconv_wm(wstring().append(1,key))
 444                                                         << ", " << strokeCount << ") to hash table!";
 445                                                 el.Push(EL_Error, oss.str());
 446                                         }
 447                                 }
 448                         }
 449                 }
 450
 451                 returnCode = KD_SUCCESS;
 452                 el.Push(EL_Silent, string("Kanji dictionary file \"")
 453                                 .append(filename).append("\" loaded successfully."));
 454         }
 455         return returnCode;
 456 }
 457
 458 string JisHexToKuten(const string& jisHex) {
 459         int i;
 460         stringstream ss(jisHex);
 461         ss >> hex >> i;
 462         ss.clear();
 463         ss << (((i & 0xFF00) >> 8) - 0x20)
 464            << '-' << ((i & 0xFF) - 0x20);
 465         return ss.str();
 466 }
 467
 468 /* This function converts from KANJIDIC-style entries to internally used
 469    KInfo objects (which are structurally based off the newer KANJIDIC2). */
 470 void KDict::KanjidicToKInfo(const string& kanjidicEntry,
 471                                                         KInfo& k, const char* jisStd) {
 472         list<string> tl = StrTokenize<char>(kanjidicEntry, " ");
 473         if(tl.size()<2) return; /* KANJIDIC entries must AT LEAST have the char
 474                                                            and the JIS hex code. */
 475         int tmode = 0;
 476         string sTemp;
 477         wstring wsTemp;
 478         wchar_t cKanaTest;
 479
 480         /* First 2 fields are always the same: process them here */
 481         k.literal = tl.front(); tl.pop_front();
 482         /* JIS code needs to be converted to ku-ten
 483            format to coincide with KANJIDIC2. */
 484         k.codepoint[jisStd] = JisHexToKuten(tl.front()); tl.pop_front();
 485
 486         /* Now, just loop through the remaining entries in the list. */
 487         string* ps;
 488         while(tl.size()>0) {
 489                 ps = &(tl.front());
 490                 switch ((*ps)[0]) {
 491                 case 'T':  /* Change "t mode" */
 492                         tmode = atoi(ps->substr(1).c_str());
 493                         break;
 494                 case 'B':  /* Nelson-reclassified radical */
 495                         k.radicalNelson = (unsigned char)atoi(ps->substr(1).c_str());
 496                         break;
 497                 case 'C':  /* Classical radical (KangXi Zidian) */
 498                         k.radical = (unsigned char)atoi(ps->substr(1).c_str());
 499                         break;
 500                 case 'F':  /* Frequency */
 501                         k.freq = atoi(ps->substr(1).c_str());
 502                         break;
 503                 case 'G':  /* Grade level */
 504                         k.grade = atoi(ps->substr(1).c_str());
 505                         break;
 506                 case 'S':  /* Stroke count */
 507                         if(k.strokeCount==0)
 508                                 k.strokeCount = atoi(ps->substr(1).c_str());
 509                         else
 510                                 k.misstrokes.push_back(atoi(ps->substr(1).c_str()));
 511                         break;
 512                 case 'U':  /* Unicode value */
 513                         k.codepoint["ucs"] = ps->substr(1);
 514                         break;
 515                 /* Dictionary codes for most of the following */
 516                 case 'H':
 517                         /* New Japanese-English Character Dictionary (Halpern) */
 518                         k.dictCode["halpern_njecd"] = ps->substr(1);
 519                         break;
 520                 case 'N':
 521                         /* Modern Reader's Japanese-English Character Dictionary (Nelson) */
 522                         k.dictCode["nelson_c"] = ps->substr(1);
 523                         break;
 524                 case 'V':
 525                         /* The New Nelson's Japanese-English Character Dictionary */
 526                         k.dictCode["nelson_n"] = ps->substr(1);
 527                         break;
 528                 case 'P':
 529                         /* SKIP codes. */
 530                         /* Thanks to changes in permissible SKIP code usage (change to
 531                            Creative Commons licensing in January 2008), we can now use
 532                            this without problems. */
 533                         k.queryCode["skip"] = ps->substr(1);
 534                         break;
 535                 case 'I':  /* Spahn/Hadamitzky dictionaries */
 536                         if((*ps)[1]=='N') {
 537                                 /* Kanji & Kana (Spahn, Hadamitzky) */
 538                                 k.dictCode["sh_kk"] = ps->substr(2);
 539                         } else {
 540                                 /* Query Code: Kanji Dictionary (Spahn, Hadamitzky) */
 541                                 k.queryCode["sh_desc"] = ps->substr(1);
 542                         }
 543                         break;
 544                 case 'Q':
 545                         /* Four Corner code */
 546                         k.queryCode["four_corner"] = ps->substr(1);
 547                         break;
 548                 case 'M':
 549                         if((*ps)[1]=='N') {
 550                                 /* Morohashi Daikanwajiten Index */
 551                                 k.dictCode["moro"].insert(0, ps->substr(2));
 552                         } else if((*ps)[1]=='P') {
 553                                 /* Morohashi Daikanwajiten Volume/Page */
 554                                 k.dictCode["moro"]
 555                                         .append(1, '/')
 556                                         .append(ps->substr(2));
 557                         }
 558                         break;
 559                 case 'E':
 560                         /* A Guide to Remembering Japanese Characters (Henshall) */
 561                         k.dictCode["henshall"] = ps->substr(1);
 562                         break;
 563                 case 'K':
 564                         /* Gakken Kanji Dictionary ("A New Dictionary of Kanji Usage") */
 565                         k.dictCode["gakken"] = ps->substr(1);
 566                         break;
 567                 case 'L':
 568                         /* Remembering the Kanji (Heisig) */
 569                         k.dictCode["heisig"] = ps->substr(1);
 570                         break;
 571                 case 'O':
 572                         /* Japanese Names (O'Neill) */
 573                         k.dictCode["oneill_names"] = ps->substr(1);
 574                         break;
 575                 case 'D':
 576                         switch((*ps)[1]) {
 577                         case 'B':
 578                                 /* Japanese for Busy People (AJLT) */
 579                                 k.dictCode["busy_people"] = ps->substr(1);
 580                                 break;
 581                         case 'C':
 582                                 /* The Kanji Way to Japanese Language Power (Crowley) */
 583                                 k.dictCode["crowley"] = ps->substr(1);
 584                                 break;
 585                         case 'F':
 586                                 /* Japanese Kanji Flashcards (White Rabbit Press) */
 587                                 k.dictCode["jf_cards"] = ps->substr(1);
 588                                 break;
 589                         case 'G':
 590                                 /* Kodansha Compact Kanji Guide */
 591                                 k.dictCode["kodansha_compact"] = ps->substr(1);
 592                                 break;
 593                         case 'H':
 594                                 /* A Guide To Reading and Writing Japanese (Henshall) */
 595                                 k.dictCode["henshall3"] = ps->substr(1);
 596                                 break;
 597                         case 'J':
 598                                 /* Kanji in Context (Nishiguchi and Kono) */
 599                                 k.dictCode["kanji_in_context"] = ps->substr(1);
 600                                 break;
 601                         case 'K':
 602                                 /* Kodansha Kanji Learner's Dictionary (Halpern) */
 603                                 k.dictCode["halpern_kkld"] = ps->substr(1);
 604                                 break;
 605                         case 'O':
 606                                 /* Essential Kanji (O'Neill) */
 607                                 k.dictCode["oneill_kk"] = ps->substr(1);
 608                                 break;
 609                         case 'R':
 610                                 /* Query Code: 2001 Kanji (De Roo) */
 611                                 k.queryCode["deroo"] = ps->substr(1);
 612                                 break;
 613                         case 'S':
 614                                 /* A Guide to Reading and Writing Japanese (Sakade) */
 615                                 k.dictCode["sakade"] = ps->substr(1);
 616                                 break;
 617                         case 'T':
 618                                 /* Tuttle Kanji Cards (Kask) */
 619                                 k.dictCode["tutt_cards"] = ps->substr(1);
 620                                 break;
 621                         default:
 622                                 {
 623                                         ostringstream oss;
 624                                         oss << ERR_PREF << "Unhandled: " << *ps;
 625                                         el.Push(EL_Error, oss.str());
 626                                 }
 627                         break;
 628                         }
 629                         break;
 630                         /* Crossreferences and miscodes */
 631                 case 'X':
 632                         switch((*ps)[1]) {
 633                         case 'D':
 634                                 /* De Roo code */
 635                                 k.variant["deroo"]=ps->substr(2);
 636                                 break;
 637                         case 'H':
 638                                 /* NJECD code */
 639                                 k.variant["njecd"]=ps->substr(2);
 640                                 break;
 641                         case 'I':
 642                                 /* S_H code */
 643                                 k.variant["s_h"]=ps->substr(2);
 644                                 break;
 645                         case 'J':
 646                                 /* XJ# = JIS hex code: 0=jis208, 1=jis212, 2=jis213 */
 647                                 switch((*ps)[2]) {
 648                                 case '0':
 649                                         k.variant["jis208"]=JisHexToKuten(ps->substr(3));
 650                                         break;
 651                                 case '1':
 652                                         k.variant["jis212"]=JisHexToKuten(ps->substr(3));
 653                                         break;
 654                                 case '2':
 655                                         k.variant["jis213"]=JisHexToKuten(ps->substr(3));
 656                                         break;
 657                                 }
 658                                 break;
 659                         case 'N':
 660                                 /* nelson_c code */
 661                                 k.variant["nelson_c"]=ps->substr(2);
 662                                 break;
 663                         case 'O':
 664                                 /* oneill code */
 665                                 k.variant["oneill"]=ps->substr(2);
 666                                 break;
 667                         default:
 668                                 {
 669                                         ostringstream oss;
 670                                         oss << ERR_PREF << "Unknown entry \"" << *ps << "\" found!";
 671                                         el.Push(EL_Error, oss.str());
 672                                 }
 673                         }
 674                         break;
 675                 case 'Z':
 676                         sTemp = ps->substr(0,3);
 677                         if(sTemp == "ZBP")
 678                                 k.skipMisclass.push_back(
 679                                         pair<string,string>("stroke_and_posn", ps->substr(3)));
 680                         else if(sTemp == "ZPP") {
 681                                 k.skipMisclass.push_back(
 682                                         pair<string,string>("posn", ps->substr(3)));
 683                         } else if(sTemp == "ZRP") {
 684                                 k.skipMisclass.push_back(
 685                                         pair<string,string>("stroke_diff", ps->substr(3)));
 686                         } else if(sTemp == "ZSP") {
 687                                 k.skipMisclass.push_back(
 688                                         pair<string,string>("stroke_count", ps->substr(3)));
 689                         } else {
 690                                 ostringstream oss;
 691                                 oss << ERR_PREF << "Unknown entry \"" << *ps << "\" found!";
 692                                 el.Push(EL_Error, oss.str());
 693                         }
 694                         break;
 695                 /* Korean/Pinyin (Chinese) romanization */
 696                 case 'W':
 697                         k.korean_r.push_back(ps->substr(1));
 698                         break;
 699                 case 'Y':
 700                         k.pinyin.push_back(ps->substr(1));
 701                         break;
 702                 case '{':
 703                         /* MEANINGS */
 704                         sTemp = *ps;
 705                         /* Make sure we grab the whole meaning entry - pop more tokens and
 706                            append if necessary. */
 707                         while(*(sTemp.rbegin()) != '}') {
 708                                 tl.pop_front();
 709                                 if(tl.size()==0) break;
 710                                 sTemp.append(1, ' ');
 711                                 sTemp.append(tl.front());
 712                         }
 713                         if(*(sTemp.rbegin()) != '}') {
 714                                 /* Shouldn't happen, but I want to be safe. */
 715                                 ostringstream oss;
 716                                 oss << ERR_PREF << "Unable to find ending '}' character!\n"
 717                                         << "Entry responsible: [" << kanjidicEntry << "]";
 718                                 el.Push(EL_Error, oss.str());
 719                                 /* Strip only the starting {, since } is not present. */
 720                                 sTemp = sTemp.substr(1, sTemp.length()-1);
 721                         } else {
 722                                 /* Strip {} from around the string. */
 723                                 sTemp = sTemp.substr(1, sTemp.length()-2);
 724                         }
 725                         k.meaning["en"].push_back(sTemp);
 726                         break;
 727                 default:
 728                         switch(tmode) {
 729                         case 0:
 730                                 /* Check for readings */
 731                                 /* The first character may be 〜, but if so, it -will- be
 732                                    followed by a kana character. */
 733                                 wsTemp = utfconv_mw(*ps);
 734                                 if(wsTemp[0]==L'〜')
 735                                         cKanaTest = wsTemp[1];
 736                                 else cKanaTest = wsTemp[0];
 737
 738                                 if(IsHiragana(cKanaTest)) {
 739                                         k.kunyomi.push_back(*ps);
 740                                 } else if(IsKatakana(cKanaTest)) {
 741                                         k.onyomi.push_back(*ps);
 742                                 } else {
 743                                         ostringstream oss;
 744                                         oss << ERR_PREF
 745                                                 << "UNHANDLED entry \"" << *ps << "\" encountered!";
 746                                         el.Push(EL_Error, oss.str());
 747                                 }
 748
 749                                 break;
 750                         case 1:
 751                                 k.nanori.push_back(*ps);
 752                                 break;
 753                         case 2:
 754                                 k.radicalName = *ps;
 755                                 break;
 756                         default:
 757                                 {
 758                                         ostringstream oss;
 759                                         oss << ERR_PREF
 760                                                 << "Unknown tmode value (" << tmode << ") encountered!";
 761                                         el.Push(EL_Error, oss.str());
 762                                 }
 763                         }
 764
 765                         break;
 766                 }
 767                 tl.pop_front();
 768         }
 769 }
 770
 771 /* This could be sped up: copy the first UTF-8 character into a string, then
 772    run a conversion on that.  Trivial though. */
 773 void KDict::KanjidicParser(char* kanjidicRawData, const char* jisStd) {
 774         char* token = strtok(kanjidicRawData, "\n");
 775         wstring wToken;
 776         while(token) {
 777                 if( (strlen(token)>0) && (token[0]!='#') ) {
 778                         wToken = utfconv_mw(token);
 779                         /* Convert token to proper format */
 780                         wToken = ConvertKanjidicEntry(wToken);
 781                         /* Create new KInfo object.
 782                            If one already exists for this character, copy over the
 783                            information. */
 784                         KInfo k;
 785                         BoostHM<wchar_t, KInfo>::iterator it = kdictData.find(wToken[0]);
 786                         if(it!=kdictData.end()) k = it->second;
 787                         /* Fill the KInfo structure */
 788                         KanjidicToKInfo(utfconv_wm(wToken), k, jisStd);
 789
 790                         /* Add to hash table */
 791                         if(!kdictData.assign(wToken[0], k)) {
 792                                 ostringstream oss;
 793                                 string temp = utfconv_wm(wToken);
 794                                 oss << ERR_PREF << "Error assigning (" << temp[0]<< ", "
 795                                         << temp << ") to hash table!";
 796                                 el.Push(EL_Error, oss.str());
 797                         }
 798                 }
 799                 token = strtok(NULL, "\n");
 800         }
 801 }
 802
 803 KDict::~KDict() {
 804         /* Currently: nothing here. */
 805 }
 806
 807 /*
 808  * Performs transformations on a KANJIDIC string for our internal usage.
 809  * Currently, this includes the following:
 810  * - Changing あ.いう notation to あ(いう), a la JWPce/JFC.
 811  * - Changing -あい notation to 〜あい, also a la JWPce/JFC.
 812  */
 813 wstring KDict::ConvertKanjidicEntry(const wstring& s) {
 814         size_t index, lastIndex;
 815         wstring temp = s;
 816
 817         /* First conversion: あ.いう to あ(いう) */
 818         index = temp.find(L'.', 0);
 819         while(index!=wstring::npos) {
 820                 /* Proceed if the character preceding the "." is hiragana/katakana. */
 821                 if(IsFurigana(temp[index-1])) {
 822                         temp[index] = L'(';
 823                         index = temp.find(L' ', index+1);
 824                         if(index==wstring::npos) {
 825                                 temp.append(1, L')');
 826                                 break;
 827                         } else
 828                                 temp.insert(index, 1, L')');
 829                 }
 830                 lastIndex = index;
 831                 index = temp.find(L'.', lastIndex+1);
 832         }
 833
 834         /* Second conversion: - to 〜, when a neighboring
 835            character is hiragana/katakana */
 836         index = temp.find(L'-', 0);
 837         while(index!=wstring::npos) {
 838                 /* Proceed if the character before or after
 839                    the "-" is hiragana/katakana. */
 840                 if(IsFurigana(temp[index-1]) || IsFurigana(temp[index+1]))
 841                         temp[index]=L'〜';
 842
 843                 lastIndex = index;
 844                 index = temp.find(L'-', lastIndex+1);
 845         }
 846
 847         /* Return the converted string */
 848         return temp;
 849 }
 850
 851 wstring KDict::KInfoToHtml(const KInfo& kInfo) {
 852         Preferences* prefs = Preferences::Get();
 853         return KInfoToHtml(kInfo,
 854                                            prefs->kanjidicOptions,
 855                                            prefs->kanjidicDictionaries);
 856 }
 857
 858 wstring KDict::KInfoToHtml(const KInfo& kInfo,
 859                                                    long options, long dictionaries) {
 860 /*      return wstring(L"<p>")
 861                 .append(s[0])
 862                 .append(L"</p>");*/
 863         #warning KInfoToHtml currently is unimplemented!
 864 #if 0
 865         wostringstream result;
 866         wostringstream header;
 867         wstring onyomi, kunyomi, nanori, radicalReading, english;
 868         wstring dictionaryInfo;
 869         wstring lowRelevance;
 870         wstring unhandled;
 871         long grade = -1, frequency = -1, tmode = 0;
 872         wstring strokes;
 873         wstring koreanRomanization, pinyinRomanization, crossReferences, miscodes;
 874         wstring sTemp, token;
 875         list<wstring> t = StrTokenize<wchar_t>(kanjidicStr, L" ");
 876         wchar_t c, c2;
 877
 878         /* Special processing for the first 2 entries of the line. */
 879         if(t.size()>1) {
 880                 /* header = "<h1><font size=\"-6\">" + args[0] + "</font></h1>"; */
 881                 /*header.append(L"<p style=\"font-size:32pt\">") */
 882                 header << L"<p><font size=\"7\">" << t.front() << L"</font></p>";
 883                 t.pop_front();
 884                 lowRelevance.append(L"<li>JIS code: 0x")
 885                         .append(t.front())
 886                         .append(L"</li>");
 887                 t.pop_front();
 888         }
 889
 890         /* NEW!  Temporary code for loading in SODs and SODAs from KanjiCafe! */
 891         if(options & (KDO_SOD_STATIC | KDO_SOD_ANIM) != 0) {
 892                 string utfStr;
 893                 /* Get a UTF8-encoded string for the kanji. */
 894                 utfStr = utfconv_wm(kanjidicStr.substr(0,1));
 895
 896                 /* Convert to a low-to-high-byte hex string. */
 897                 ostringstream ss;
 898                 for(unsigned int i=0;i<utfStr.length();i++) {
 899                         ss << hex << setw(2) << setfill('0')
 900                            << (unsigned int)((unsigned char)utfStr[i]);
 901                 }
 902
 903                 wstringstream sod;
 904                 /* Load static SOD, if present */
 905                 if((options & KDO_SOD_STATIC) != 0) {
 906                         Preferences* p = Preferences::Get();
 907                         ostringstream fn;
 908                         string sodDir = p->GetSetting("sod_dir");
 909                         if(sodDir.length()==0) sodDir = "sods";
 910                         fn << sodDir << DSCHAR
 911                            << "sod-utf8-hex" << DSCHAR
 912                            << ss.str() << ".png";
 913
 914 #ifdef DEBUG
 915                         printf("DEBUG: Checking for existance of file \"%s\"...\n",
 916                                    fn.str().c_str());
 917 #endif
 918                         ifstream f(fn.str().c_str());
 919                         if(f.is_open()) {
 920                                 f.close();
 921                                 if(sod.str().length()>0) sod << L"<br />";
 922                                 sod << L"<img src=\"" << utfconv_mw(fn.str()) << L"\" />";
 923                         }
 924                 }
 925                 /* Load animated SOD, if present */
 926                 if((options & KDO_SOD_ANIM) != 0) {
 927                         ostringstream fn;
 928                         fn << "sods" << DSCHAR
 929                            << "soda-utf8-hex" << DSCHAR
 930                            << ss.str() << ".gif";
 931 #ifdef DEBUG
 932                         printf("DEBUG: Checking for existance of file \"%s\"...\n",
 933                                    fn.str().c_str());
 934 #endif
 935                         ifstream f(fn.str().c_str());
 936                         if(f.is_open()) {
 937                                 f.close();
 938                                 if(sod.str().length()>0) sod << L"<br />";
 939                                 sod << L"<img src=\"" << utfconv_mw(fn.str()) << L"\" />";
 940                         }
 941                 }
 942                 /* Append the chart(s) in a paragraph object. */
 943                 if(sod.str().length()>0) {
 944                         header << L"<p>" << sod.str() <<
 945                                 L"<br /><font size=\"1\">(Kanji stroke order graphics "
 946                                 L"used under license from KanjiCafe.com.)</font></p>";
 947                 }
 948         }
 949
 950         while(t.size()>0) {
 951                 token = t.front();
 952                 t.pop_front();
 953                 sTemp = token;
 954                 c = sTemp[0];
 955                 /* If a preceding character is detected, strip it */
 956                 if(c == L'(' || c == L'〜') {
 957                         sTemp = sTemp.substr(1);
 958                         c = sTemp[0];
 959                 }
 960                 if(tmode==0) {
 961                         if(IsKatakana(c)) {
 962                                 /* Onyomi reading detected */
 963                                 /*if(onyomi.length()>0) onyomi.append(L"　"); */
 964                                 if(onyomi.length()>0) onyomi.append(L"&nbsp; ");
 965                                 onyomi.append(token);   /* Copy the original string, including ()'s and 〜's */
 966                                 continue;
 967                         }
 968                         else if(IsHiragana(c)) {
 969                                 /* Kunyomi reading detected */
 970                                 if(kunyomi.length()>0) kunyomi.append(L"&nbsp; ");
 971                                 kunyomi.append(token);  /* Copy the original string, including ()'s and 〜's */
 972                                 continue;
 973                         }
 974                 } else if(tmode==1) {
 975                         if(IsFurigana(c)) {
 976                                 /* Nanori reading detected */
 977                                 if(nanori.length()>0) nanori.append(L"&nbsp; ");
 978                                 nanori.append(token);   /* Copy the original string, including ()'s and 〜's */
 979                                 continue;
 980                         }
 981                 } else if(tmode==2) {
 982                         if(IsFurigana(c)) {
 983                                 /* Special radical reading detected */
 984                                 if(radicalReading.length()>0) radicalReading.append(L"&nbsp; ");
 985                                 radicalReading.append(token);
 986                                 continue;
 987                         }
 988                 }
 989                 if(c == L'{') {
 990                         /* English meaning detected
 991                            Special handling is needed to take care of spaces, though.
 992                            We'll "cheat" and mess with our iterator a bit if a space is detected. */
 993                         while(t.size()>0 && sTemp[sTemp.length()-1] != L'}') {
 994                                 sTemp.append(L" ").append(t.front());
 995                                 t.pop_front();
 996                         }
 997                         if(english.length()>0) english.append(L", ");
 998                         english.append(sTemp.substr(1,sTemp.length()-2));  /* Strip the {} */
 999                 }
1000                 else {
1001                         switch(c) {
1002                         case L'T':  /* Change "t mode" */
1003                                 /*wstring(sTemp.substr(1)).ToLong(&tmode);*/
1004                                 wistringstream(sTemp.substr(1)) >> tmode;
1005 #ifdef DEBUG
1006                                 if(tmode>2) printf("WARNING: T-mode set to %d.\nT-modes above 2 are not currently documented!", (int)tmode);
1007 #endif
1008                                 break;
1009                         case L'B':  /* Bushu radical */
1010                                 lowRelevance.append(L"<li>Bushu radical: ").append(sTemp.substr(1)).append(L"</li>");
1011                                 break;
1012                         case L'C':  /* Classical radical */
1013                                 lowRelevance.append(L"<li>Classical radical: ").append(sTemp.substr(1)).append(L"</li>");
1014                                 break;
1015                         case L'F':  /* Frequency */
1016                                 /*wstring(sTemp.substr(1)).ToLong(&frequency);*/
1017                                 wistringstream(sTemp.substr(1)) >> frequency;
1018                                 break;
1019                         case L'G':  /* Grade level */
1020                                 /*wstring(sTemp.substr(1)).ToLong(&grade);*/
1021                                 wistringstream(sTemp.substr(1)) >> grade;
1022                                 break;
1023                         case L'S':  /* Stroke count */
1024                                 if(strokes.length()==0) {
1025                                         strokes = sTemp.substr(1);
1026                                 } else if(!strokes.find(L' ')!=wstring::npos) {
1027                                         strokes.append(L" (Miscounts: ")
1028                                                 .append(sTemp.substr(1))
1029                                                 .append(L")");
1030                                 } else {
1031                                         strokes = strokes.substr(0, strokes.length()-1)
1032                                                 .append(L", ")
1033                                                 .append(sTemp.substr(1))
1034                                                 .append(L")");
1035                                 }
1036                                 break;
1037                         case L'U':  /* Unicode value */
1038                                 lowRelevance.append(L"<li>Unicode: 0x").append(sTemp.substr(1)).append(L"</li>");
1039                                 break;
1040                         /* From here, it's all dictionary codes */
1041                         case L'H':
1042                                 if((dictionaries & KDD_NJECD)!=0)
1043                                         dictionaryInfo.append(L"<li>New Japanese-English Character Dictionary (Halpern): ")
1044                                                 .append(sTemp.substr(1)).append(L"</li>");
1045                                 break;
1046                         case L'N':
1047                                 if((dictionaries & KDD_MRJECD)!=0)
1048                                         dictionaryInfo.append(L"<li>Modern Reader's Japanese-English Character Dictionary (Nelson): ")
1049                                                 .append(sTemp.substr(1)).append(L"</li>");
1050                                 break;
1051                         case L'V':
1052                                 if((dictionaries & KDD_NNJECD)!=0)
1053                                         dictionaryInfo.append(L"<li>The New Nelson's Japanese-English Character Dictionary: ")
1054                                                 .append(sTemp.substr(1)).append(L"</li>");
1055                                 break;
1056                         case L'P':
1057                                 /* SKIP codes. */
1058                                 /* Thanks to changes in permissible SKIP code usage (change to
1059                                    Creative Commons licensing in January 2008), we can now use
1060                                    this without problems. */
1061                                 if((dictionaries & KDD_SKIP)!=0)
1062                                         dictionaryInfo.append(L"<li>SKIP code: ")
1063                                                 .append(sTemp.substr(1)).append(L"</li>");
1064                                 break;
1065                         case L'I':  /* Spahn/Hadamitzky dictionaries */
1066                                 if(sTemp[1]==L'N') {
1067                                         if((dictionaries & KDD_KK)!=0) {
1068                                                 dictionaryInfo.append(L"<li>Kanji & Kana (Spahn, Hadamitzky): ")
1069                                                         .append(sTemp.substr(2)).append(L"</li>");
1070                                         }
1071                                 } else {
1072                                         if((dictionaries & KDD_KD)!=0) {
1073                                                 dictionaryInfo.append(L"<li>Kanji Dictionary (Spahn, Hadamitzky): ")
1074                                                         .append(sTemp.substr(1)).append(L"</li>");
1075                                         }
1076                                 }
1077                                 break;
1078                         case L'Q':
1079                                 if((dictionaries & KDD_FC)!=0) {
1080                                         dictionaryInfo.append(L"<li>Four Corner code: ")
1081                                                 .append(sTemp.substr(1)).append(L"</li>");
1082                                 }
1083                                 break;
1084                         case L'M':
1085                                 c2 = sTemp[1];
1086                                 if(c2==L'N') {
1087                                         if((dictionaries & KDD_MOROI)!=0) {
1088                                                 dictionaryInfo.append(L"<li>Morohashi Daikanwajiten Index: ")
1089                                                         .append(sTemp.substr(2)).append(L"</li>");
1090                                         }
1091                                 } else if(c2==L'P') {
1092                                         if((dictionaries & KDD_MOROVP)!=0) {
1093                                                 dictionaryInfo.append(L"<li>Morohashi Daikanwajiten Volume/Page: ")
1094                                                         .append(sTemp.substr(2)).append(L"</li>");
1095                                         }
1096                                 }
1097                                 break;
1098                         case L'E':
1099                                 if((dictionaries & KDD_GRJC)!=0) {
1100                                         dictionaryInfo.append(L"<li>A Guide to Remembering Japanese Characters (Henshal): ")
1101                                                 .append(sTemp.substr(1)).append(L"</li>");
1102                                 }
1103                                 break;
1104                         case L'K':
1105                                 if((dictionaries & KDD_GKD)!=0) {
1106                                         dictionaryInfo.append(L"<li>Gakken Kanji Dictionary (\"A New Dictionary of Kanji Usage\"): ")
1107                                                 .append(sTemp.substr(1)).append(L"</li>");
1108                                 }
1109                                 break;
1110                         case L'L':
1111                                 if((dictionaries & KDD_RTK)!=0) {
1112                                         dictionaryInfo.append(L"<li>Remembering the Kanji (Heisig): ")
1113                                                 .append(sTemp.substr(1)).append(L"</li>");
1114                                 }
1115                                 break;
1116                         case L'O':
1117                                 if((dictionaries & KDD_JN)!=0) {
1118                                         dictionaryInfo.append(L"<li>Japanese Names (O'Neill): ")
1119                                                 .append(sTemp.substr(1)).append(L"</li>");
1120                                 }
1121                                 break;
1122                         case L'D':
1123                                 c2 = sTemp[1];
1124                                 switch(c2) {
1125                                 case L'B':
1126                                         if((dictionaries & KDD_JBP)!=0) {
1127                                                 dictionaryInfo.append(L"<li>Japanese for Busy People (AJLT): ")
1128                                                         .append(sTemp.substr(2)).append(L"</li>");
1129                                         }
1130                                         break;
1131                                 case L'C':
1132                                         if((dictionaries & KDD_KWJLP)!=0) {
1133                                                 dictionaryInfo.append(L"<li>The Kanji Way to Japanese Language Power (Crowley): ")
1134                                                         .append(sTemp.substr(2)).append(L"</li>");
1135                                         }
1136                                         break;
1137                                 case L'F':
1138                                         if((dictionaries & KDD_JKF)!=0) {
1139                                                 dictionaryInfo.append(L"<li>Japanese Kanji Flashcards (White Rabbit Press): ")
1140                                                         .append(sTemp.substr(2)).append(L"</li>");
1141                                         }
1142                                         break;
1143                                 case L'G':
1144                                         if((dictionaries & KDD_KCKG)!=0) {
1145                                                 dictionaryInfo.append(L"<li>Kodansha Compact Kanji Guide: ")
1146                                                         .append(sTemp.substr(2)).append(L"</li>");
1147                                         }
1148                                         break;
1149                                 case L'H':
1150                                         if((dictionaries & KDD_GTRWJH)!=0) {
1151                                                 dictionaryInfo.append(L"<li>A Guide To Reading and Writing Japanese (Hensall): ")
1152                                                         .append(sTemp.substr(2)).append(L"</li>");
1153                                         }
1154                                         break;
1155                                 case L'J':
1156                                         if((dictionaries & KDD_KIC)!=0) {
1157                                                 dictionaryInfo.append(L"<li>Kanji in Context (Nishiguchi and Kono): ")
1158                                                         .append(sTemp.substr(2)).append(L"</li>");
1159                                         }
1160                                         break;
1161                                 case L'K':
1162                                         if((dictionaries & KDD_KLD)!=0) {
1163                                                 dictionaryInfo.append(L"<li>Kanji Learner's Dictionary (Halpern): ")
1164                                                         .append(sTemp.substr(2)).append(L"</li>");
1165                                         }
1166                                         break;
1167                                 case L'O':
1168                                         if((dictionaries & KDD_EK)!=0) {
1169                                                 dictionaryInfo.append(L"<li>Essential Kanji (O'Neill): ")
1170                                                         .append(sTemp.substr(2)).append(L"</li>");
1171                                         }
1172                                         break;
1173                                 case L'R':
1174                                         if((dictionaries & KDD_DR)!=0) {
1175                                                 dictionaryInfo.append(L"<li>2001 Kanji (De Roo): ")
1176                                                         .append(sTemp.substr(2)).append(L"</li>");
1177                                         }
1178                                         break;
1179                                 case L'S':
1180                                         if((dictionaries & KDD_GTRWJS)!=0) {
1181                                                 dictionaryInfo.append(L"<li>A Guide to Reading and Writing Japanese (Sakade): ")
1182                                                         .append(sTemp.substr(2)).append(L"</li>");
1183                                         }
1184                                         break;
1185                                 case L'T':
1186                                         if((dictionaries & KDD_TKC)!=0) {
1187                                                 dictionaryInfo.append(L"<li>Tuttle Kanji Cards (Kask): ")
1188                                                         .append(sTemp.substr(2)).append(L"</li>");
1189                                         }
1190                                         break;
1191                                 default:
1192                                         if(unhandled.length()>0) unhandled.append(L" ");
1193                                         unhandled.append(sTemp);
1194                                         break;
1195                                 }
1196                                 break;
1197                         /* Crossreferences and miscodes */
1198                         case L'X':
1199                                 if(crossReferences.length()>0) crossReferences.append(L", ");
1200                                 crossReferences.append(sTemp.substr(1));
1201                                 break;
1202                         case L'Z':
1203                                 if(miscodes.length()>0) miscodes.append(L", ");
1204                                 miscodes.append(sTemp.substr(1));
1205                                 break;
1206                         /* Korean/Pinyin (Chinese) romanization */
1207                         case L'W':
1208                                 if(koreanRomanization.length()>0) koreanRomanization.append(L", ");
1209                                 koreanRomanization.append(sTemp.substr(1));
1210                                 break;
1211                         case L'Y':
1212                                 if(pinyinRomanization.length()>0) pinyinRomanization.append(L", ");
1213                                 pinyinRomanization.append(sTemp.substr(1));
1214                                 break;
1215                         default:
1216                                 if(unhandled.length()>0) unhandled.append(L" ");
1217                                 unhandled.append(sTemp);
1218                                 break;
1219                         }
1220                 }
1221         } /* while(t.HasMoreTokens()) */
1222
1223         if(header.str().length() > 0) result << header.str();
1224 #ifdef DEBUG
1225         printf("DEBUG: header=[%ls]\n", header.str().c_str());
1226 #endif
1227         result << L"<ul>";
1228         if((options & KDO_READINGS) != 0) {
1229                 if(onyomi.length() > 0)
1230                         result << L"<li>Onyomi Readings: " << onyomi << L"</li>";
1231                 if(kunyomi.length() > 0)
1232                         result << L"<li>Kunyomi Readings: " << kunyomi << L"</li>";
1233                 if(nanori.length() > 0)
1234                         result << L"<li>Nanori Readings: " << nanori << L"</li>";
1235                 if(radicalReading.length() > 0)
1236                         result << L"<li>Special Radical Reading: " << radicalReading <<
1237                                 L"</li>";
1238         }
1239         if((options & KDO_MEANINGS) != 0) {
1240                 if(english.length() > 0)
1241                         result << L"<li>English Meanings: " << english << L"</li>";
1242         }
1243         if((options & KDO_HIGHIMPORTANCE) != 0) {
1244                 if(strokes.length() > 0)
1245                         result << L"<li>Stroke count: " << strokes << L"</li>";
1246                 else
1247                         result << L"<li>Stroke count: not specified in KANJIDIC</li>";
1248                 result << L"<li>Grade Level: ";
1249                 if(grade<=6 && grade >= 1) {  /* Jouyou (Grade #) */
1250                         result << L"Jouyou (Grade " << grade << L")";
1251                 } else if(grade==8) {  /* Jouyou (General usage) */
1252                         result << L"Jouyou (General usage)";
1253                 } else if(grade==9) {  /* Jinmeiyou (Characters for names) */
1254                         result << L"Jinmeiyou (Characters for names)";
1255                 } else if(grade==-1) {  /* No flag specified in kanjidic string */
1256                         result << L"Unspecified";
1257                 } else {
1258                         result << L"Unhandled grade level (Grade " << grade << L")";
1259                 }
1260                 result << L"</li>";
1261                 if(frequency!=-1)
1262                         result << L"<li>Frequency Ranking: " << frequency << L"</li>";
1263                 else result << L"<li>Frequency Ranking: Unspecified</li>";
1264         }
1265         if((options & KDO_DICTIONARIES) != 0) {
1266                 if(dictionaryInfo.length()>0)
1267                         result << L"<li>Dictionary Codes:<ul>" << dictionaryInfo
1268                                    << L"</ul></li>";
1269         }
1270         if((options & KDO_VOCABCROSSREF) != 0) {
1271                 vector<wstring> *vList = &(jben->vocabList->GetVocabList());
1272                 wchar_t thisKanji = kanjidicStr[0];
1273                 vector<wstring> crossRefList;
1274                 vector<wstring>::iterator vIt;
1275                 for(vIt=vList->begin(); vIt!=vList->end(); vIt++) {
1276                         if(vIt->find(thisKanji)!=wstring::npos) {
1277                                 crossRefList.push_back(*vIt);
1278                         }
1279                 }
1280                 if(crossRefList.size()>0) {
1281                         result << L"<li>This kanji is used by words in your study list:<br><font size=\"7\">";
1282                         vIt = crossRefList.begin();
1283                         result << *vIt;
1284                         for(++vIt; vIt!=crossRefList.end(); vIt++) {
1285                                 result << L"&nbsp; " << *vIt;
1286                         }
1287                         result << L"</font></li>";
1288                 }
1289         }
1290         if((options & KDO_LOWIMPORTANCE) != 0) {
1291                 if(koreanRomanization.length()>0) lowRelevance.append(L"<li>Korean romanization: ").append(koreanRomanization).append(L"</li>");
1292                 if(pinyinRomanization.length()>0) lowRelevance.append(L"<li>Pinyin romanization: ").append(pinyinRomanization).append(L"</li>");
1293                 if(crossReferences.length()>0) lowRelevance.append(L"<li>Cross reference codes: ").append(crossReferences).append(L"</li>");
1294                 if(miscodes.length()>0) lowRelevance.append(L"<li>Miscodes: ").append(miscodes).append(L"</li>");
1295                 if(lowRelevance.length()>0)
1296                         result << L"<li>Extra Information:<ul>" << lowRelevance
1297                                    << L"</ul></li>";
1298         }
1299         if((options & KDO_UNHANDLED) != 0) {
1300                 if(unhandled.length()>0)
1301                         result << L"<li>Unhandled: " << unhandled << L"</li>";
1302         }
1303         result << L"</ul>";
1304
1305         return result.str();
1306 #endif
1307         return wstring();
1308 }
1309
1310 const BoostHM<wchar_t,KInfo>* KDict::GetHashTable() const {
1311         return &kdictData;
1312 }
1313
1314 bool KDict::MainDataLoaded() const {
1315         if(kdictData.size()>0) return true;
1316         return false;
1317 }
1318
1319 const KInfo* KDict::GetEntry(const wchar_t key) const {
1320         BoostHM<wchar_t, KInfo>::const_iterator kci = kdictData.find(key);
1321         if(kci != kdictData.end())
1322                 return &(kci->second);
1323         return NULL;;
1324 }