src/Trans.cpp

   1 /**
   2  * \file Trans.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Matthias Ettrich
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Trans.h"
  15
  16 #include "Buffer.h"
  17 #include "BufferView.h"
  18 #include "Cursor.h"
  19 #include "CutAndPaste.h"
  20 #include "Lexer.h"
  21 #include "LyXRC.h"
  22 #include "Text.h"
  23
  24 #include "support/convert.h"
  25 #include "support/debug.h"
  26 #include "support/docstream.h"
  27 #include "support/FileName.h"
  28 #include "support/filetools.h"
  29 #include "support/lstrings.h"
  30
  31 using namespace std;
  32 using namespace lyx::support;
  33
  34 namespace lyx {
  35
  36 /////////////////////////////////////////////////////////////////////
  37 //
  38 // TeXAccents
  39 //
  40 /////////////////////////////////////////////////////////////////////
  41
  42 /* the names used by TeX and XWindows for deadkeys/accents are not the same
  43    so here follows a table to clearify the differences. Please correct this
  44    if I got it wrong
  45
  46    |------------------|------------------|------------------|--------------|
  47    |      TeX         |     XWindows     |   \bind/LFUN     | used by intl |
  48    |------------------|------------------|------------------|--------------|
  49    |    grave         |    grave         |LFUN_ACCENT_GRAVE        | grave
  50    |    acute         |    acute         |LFUN_ACCENT_ACUTE        | acute
  51    |    circumflex    |    circumflex    |LFUN_ACCENT_CIRCUMFLEX   | circumflex
  52    | umlaut/dieresis  |    diaeresis     |LFUN_ACCENT_UMLAUT       | umlaut
  53    |    tilde         |    tilde         |LFUN_ACCENT_TILDE        | tilde
  54    |    macron        |    maron         |LFUN_ACCENT_MACRON       | macron
  55    |    dot           |    abovedot      |LFUN_ACCENT_DOT          | dot
  56    |    cedilla       |    cedilla       |LFUN_ACCENT_CEDILLA      | cedilla
  57    |    underdot      |                  |LFUN_ACCENT_UNDERDOT     | underdot
  58    |    underbar      |                  |LFUN_ACCENT_UNDERBAR     | underbar
  59    |    hácek         |    caron         |LFUN_ACCENT_CARON        | caron
  60    |    breve         |    breve         |LFUN_ACCENT_BREVE        | breve
  61    |    tie           |                  |LFUN_ACCENT_TIE          | tie
  62    | Hungarian umlaut |    doubleacute   |LFUN_ACCENT_HUNGARIAN_UMLAUT  | hungarian umlaut
  63    |    circle        |    abovering     |LFUN_ACCENT_CIRCLE       | circle
  64    |                  |    ogonek        |                  |
  65    |                  |    iota          |                  |
  66    |                  |    voiced_sound  |                  |
  67    |                  | semivoiced_sound |                  |
  68    */
  69 static TeXAccent lyx_accent_table[] = {
  70         {TEX_NOACCENT,   0,      "",                LFUN_NOACTION},
  71         {TEX_ACUTE,      0x0301, "acute",           LFUN_ACCENT_ACUTE},
  72         {TEX_GRAVE,      0x0300, "grave",           LFUN_ACCENT_GRAVE},
  73         {TEX_MACRON,     0x0304, "macron",          LFUN_ACCENT_MACRON},
  74         {TEX_TILDE,      0x0303, "tilde",           LFUN_ACCENT_TILDE},
  75         {TEX_UNDERBAR,   0x0320, "underbar",        LFUN_ACCENT_UNDERBAR},
  76         {TEX_CEDILLA,    0x0327, "cedilla",         LFUN_ACCENT_CEDILLA},
  77         {TEX_UNDERDOT,   0x0323, "underdot",        LFUN_ACCENT_UNDERDOT},
  78         {TEX_CIRCUMFLEX, 0x0302, "circumflex",      LFUN_ACCENT_CIRCUMFLEX},
  79         {TEX_CIRCLE,     0x030a, "circle",          LFUN_ACCENT_CIRCLE},
  80         {TEX_TIE,        0x0361, "tie",             LFUN_ACCENT_TIE},
  81         {TEX_BREVE,      0x0306, "breve",           LFUN_ACCENT_BREVE},
  82         {TEX_CARON,      0x030c, "caron",           LFUN_ACCENT_CARON},
  83         // Don't fix this typo for compatibility reasons!
  84         {TEX_HUNGUML,    0x030b, "hugarian_umlaut", LFUN_ACCENT_HUNGARIAN_UMLAUT},
  85         {TEX_UMLAUT,     0x0308, "umlaut",          LFUN_ACCENT_UMLAUT},
  86         {TEX_DOT,        0x0307, "dot",             LFUN_ACCENT_DOT},
  87         {TEX_OGONEK,     0x0328, "ogonek",          LFUN_ACCENT_OGONEK}
  88 };
  89
  90
  91 TeXAccent get_accent(FuncCode action)
  92 {
  93         int i = 0;
  94         while (i <= TEX_MAX_ACCENT) {
  95                 if (lyx_accent_table[i].action == action)
  96                         return lyx_accent_table[i];
  97                 ++i;
  98         }
  99         struct TeXAccent temp = { static_cast<tex_accent>(0), 0,
 100                                           0, static_cast<FuncCode>(0)};
 101         return temp;
 102 }
 103
 104
 105 static docstring const doAccent(docstring const & s, tex_accent accent)
 106 {
 107         if (s.empty())
 108                 return docstring(1, lyx_accent_table[accent].ucs4);
 109
 110         odocstringstream os;
 111         os.put(s[0]);
 112         os.put(lyx_accent_table[accent].ucs4);
 113         if (s.length() > 1) {
 114                 if (accent != TEX_TIE || s.length() > 2)
 115                         lyxerr << "Warning: Too many characters given for accent "
 116                                << lyx_accent_table[accent].name << '.' << endl;
 117                 os << s.substr(1);
 118         }
 119         return normalize_c(os.str());
 120 }
 121
 122
 123 static docstring const doAccent(char_type c, tex_accent accent)
 124 {
 125         return doAccent(docstring(1, c), accent);
 126 }
 127
 128
 129
 130 /////////////////////////////////////////////////////////////////////
 131 //
 132 // Trans
 133 //
 134 /////////////////////////////////////////////////////////////////////
 135
 136
 137 void Trans::insertException(KmodException & exclist, char_type c,
 138         docstring const & data, bool flag, tex_accent accent)
 139 {
 140         Keyexc p;
 141         p.c = c;
 142         p.data = data;
 143         p.combined = flag;
 144         p.accent = accent;
 145         exclist.insert(exclist.begin(), p);
 146         // or just
 147         // exclist.push_back(p);
 148 }
 149
 150
 151 void Trans::freeException(KmodException & exclist)
 152 {
 153         exclist.clear();
 154 }
 155
 156
 157 void Trans::freeKeymap()
 158 {
 159         kmod_list_.clear();
 160         keymap_.clear();
 161 }
 162
 163
 164 bool Trans::isDefined() const
 165 {
 166         return !name_.empty();
 167 }
 168
 169
 170 enum {
 171         KCOMB = 1,
 172         KMOD,
 173         KMAP,
 174         KXMOD,
 175 };
 176
 177
 178 tex_accent getkeymod(string const &);
 179
 180
 181 void Trans::addDeadkey(tex_accent accent, docstring const & keys)
 182 {
 183         KmodInfo tmp;
 184         tmp.data = keys;
 185         tmp.accent = accent;
 186         kmod_list_[accent] = tmp;
 187
 188         for (docstring::size_type i = 0; i < keys.length(); ++i) {
 189                 // FIXME This is a hack.
 190                 // tmp is no valid UCS4 string, but misused to store the
 191                 // accent.
 192                 docstring tmp;
 193                 tmp += char_type(0);
 194                 tmp += char_type(accent);
 195                 keymap_[keys[i]] = tmp;
 196         }
 197 }
 198
 199
 200 int Trans::load(Lexer & lex)
 201 {
 202         bool error = false;
 203
 204         while (lex.isOK() && !error) {
 205                 switch (lex.lex()) {
 206                 case KMOD:
 207                 {
 208                         LYXERR(Debug::KBMAP, "KMOD:\t" << lex.getString());
 209                         if (!lex.next(true))
 210                                 return -1;
 211
 212                         LYXERR(Debug::KBMAP, "key\t`" << lex.getString() << '\'');
 213
 214                         docstring const keys = lex.getDocString();
 215
 216                         if (!lex.next(true))
 217                                 return -1;
 218
 219                         LYXERR(Debug::KBMAP, "accent\t`" << lex.getString() << '\'');
 220
 221                         tex_accent accent = getkeymod(lex.getString());
 222
 223                         if (accent == TEX_NOACCENT)
 224                                 return -1;
 225
 226 #if 1
 227                         // FIXME: This code should be removed...
 228                         // But we need to fix up all the kmap files first
 229                         // so that this field is not present anymore.
 230                         if (!lex.next(true))
 231                                 return -1;
 232
 233                         LYXERR(Debug::KBMAP, "allowed\t`" << lex.getString() << '\'');
 234
 235                         /* string const allowed = lex.getString(); */
 236                         addDeadkey(accent, keys /*, allowed*/);
 237 #else
 238                         addDeadkey(accent, keys);
 239 #endif
 240                         break;
 241                 }
 242                 case KCOMB: {
 243                         string str;
 244
 245                         LYXERR(Debug::KBMAP, "KCOMB:");
 246                         if (!lex.next(true))
 247                                 return -1;
 248
 249                         str = lex.getString();
 250                         LYXERR(Debug::KBMAP, str);
 251
 252                         tex_accent accent_1 = getkeymod(str);
 253                         if (accent_1 == TEX_NOACCENT)
 254                                 return -1;
 255
 256                         if (!lex.next(true))
 257                                 return -1;
 258
 259                         str = lex.getString();
 260                         LYXERR(Debug::KBMAP, str);
 261
 262                         tex_accent accent_2 = getkeymod(str);
 263                         if (accent_2 == TEX_NOACCENT) return -1;
 264
 265                         map<tex_accent, KmodInfo>::iterator it1 =
 266                                 kmod_list_.find(accent_1);
 267                         map<tex_accent, KmodInfo>::iterator it2 =
 268                                 kmod_list_.find(accent_2);
 269                         if (it1 == kmod_list_.end() || it2 == kmod_list_.end())
 270                                 return -1;
 271
 272                         // Find what key accent_2 is on - should
 273                         // check about accent_1 also
 274                         map<char_type, docstring>::iterator it = keymap_.begin();
 275                         map<char_type, docstring>::iterator end = keymap_.end();
 276                         for (; it != end; ++it) {
 277                                 if (!it->second.empty()
 278                                     && it->second[0] == 0
 279                                     && it->second[1] == accent_2)
 280                                         break;
 281                         }
 282                         docstring allowed;
 283                         if (!lex.next())
 284                                 return -1;
 285
 286                         allowed = lex.getDocString();
 287                         LYXERR(Debug::KBMAP, "allowed: " << to_utf8(allowed));
 288
 289                         insertException(kmod_list_[accent_1].exception_list,
 290                                         it->first, allowed, true, accent_2);
 291                 }
 292                 break;
 293                 case KMAP: {
 294                         unsigned char key_from;
 295
 296                         LYXERR(Debug::KBMAP, "KMAP:\t" << lex.getString());
 297
 298                         if (!lex.next(true))
 299                                 return -1;
 300
 301                         key_from = lex.getString()[0];
 302                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
 303
 304                         if (!lex.next(true))
 305                                 return -1;
 306
 307                         docstring const string_to = lex.getDocString();
 308                         keymap_[key_from] = string_to;
 309                         LYXERR(Debug::KBMAP, "\t`" << to_utf8(string_to) << '\'');
 310                         break;
 311                 }
 312                 case KXMOD: {
 313                         tex_accent accent;
 314                         char_type key;
 315                         docstring str;
 316
 317                         LYXERR(Debug::KBMAP, "KXMOD:\t" << lex.getString());
 318
 319                         if (!lex.next(true))
 320                                 return -1;
 321
 322                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
 323                         accent = getkeymod(lex.getString());
 324
 325                         if (!lex.next(true))
 326                                 return -1;
 327
 328                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
 329                         key = lex.getDocString()[0];
 330
 331                         if (!lex.next(true))
 332                                 return -1;
 333
 334                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
 335                         str = lex.getDocString();
 336
 337                         insertException(kmod_list_[accent].exception_list,
 338                                         key, str);
 339                         break;
 340                 }
 341                 case Lexer::LEX_FEOF:
 342                         LYXERR(Debug::PARSER, "End of parsing");
 343                         break;
 344                 default:
 345                         lex.printError("ParseKeymapFile: Unknown tag: `$$Token'");
 346                         return -1;
 347                 }
 348         }
 349         return 0;
 350 }
 351
 352
 353 bool Trans::isAccentDefined(tex_accent accent, KmodInfo & i) const
 354 {
 355         map<tex_accent, KmodInfo>::const_iterator cit = kmod_list_.find(accent);
 356         if (cit == kmod_list_.end())
 357                 return false;
 358         i = cit->second;
 359         return true;
 360 }
 361
 362
 363 docstring const Trans::process(char_type c, TransManager & k)
 364 {
 365         docstring const t = match(c);
 366
 367         if (t.empty() && c != 0)
 368                 return k.normalkey(c);
 369
 370         if (!t.empty() && t[0] != 0)
 371                 return t; //return k.normalkey(c);
 372
 373         return k.deadkey(c, kmod_list_[static_cast<tex_accent>(t[1])]);
 374 }
 375
 376
 377 int Trans::load(string const & language)
 378 {
 379         LexerKeyword kmapTags[] = {
 380                 {"\\kcomb", KCOMB },
 381                 { "\\kmap", KMAP },
 382                 { "\\kmod", KMOD },
 383                 { "\\kxmod", KXMOD }
 384         };
 385
 386         FileName const filename = libFileSearch("kbd", language, "kmap");
 387         if (filename.empty())
 388                 return -1;
 389
 390         freeKeymap();
 391         Lexer lex(kmapTags);
 392         lex.setFile(filename);
 393
 394         int const res = load(lex);
 395
 396         if (res == 0)
 397                 name_ = language;
 398         else
 399                 name_.erase();
 400
 401         return res;
 402 }
 403
 404
 405 tex_accent getkeymod(string const & p)
 406         /* return modifier - decoded from p and update p */
 407 {
 408         for (int i = 1; i <= TEX_MAX_ACCENT; ++i) {
 409                 LYXERR(Debug::KBMAP, "p = " << p
 410                        << ", lyx_accent_table[" << i
 411                        << "].name = `" << lyx_accent_table[i].name << '\'');
 412
 413                 if (lyx_accent_table[i].name
 414                      && contains(p, lyx_accent_table[i].name)) {
 415                         LYXERR(Debug::KBMAP, "Found it!");
 416                         return static_cast<tex_accent>(i);
 417                 }
 418         }
 419         return TEX_NOACCENT;
 420 }
 421
 422
 423 /////////////////////////////////////////////////////////////////////
 424 //
 425 // TransState
 426 //
 427 /////////////////////////////////////////////////////////////////////
 428
 429
 430 // TransFSMData
 431 TransFSMData::TransFSMData()
 432 {
 433         deadkey_ = deadkey2_ = 0;
 434         deadkey_info_.accent = deadkey2_info_.accent = TEX_NOACCENT;
 435 }
 436
 437
 438 // TransState
 439 char_type const TransState::TOKEN_SEP = 4;
 440
 441
 442 // TransInitState
 443 TransInitState::TransInitState()
 444 {
 445         init_state_ = this;
 446 }
 447
 448
 449 docstring const TransInitState::normalkey(char_type c)
 450 {
 451         docstring res;
 452         res = c;
 453         return res;
 454 }
 455
 456
 457 docstring const TransInitState::deadkey(char_type c, KmodInfo d)
 458 {
 459         deadkey_ = c;
 460         deadkey_info_ = d;
 461         currentState = deadkey_state_;
 462         return docstring();
 463 }
 464
 465
 466 // TransDeadkeyState
 467 TransDeadkeyState::TransDeadkeyState()
 468 {
 469         deadkey_state_ = this;
 470 }
 471
 472
 473 docstring const TransDeadkeyState::normalkey(char_type c)
 474 {
 475         docstring res;
 476
 477         KmodException::iterator it = deadkey_info_.exception_list.begin();
 478         KmodException::iterator end = deadkey_info_.exception_list.end();
 479
 480         for (; it != end; ++it) {
 481                 if (it->c == c) {
 482                         res = it->data;
 483                         break;
 484                 }
 485         }
 486         if (it == end) {
 487                 res = doAccent(c, deadkey_info_.accent);
 488         }
 489         currentState = init_state_;
 490         return res;
 491 }
 492
 493
 494 docstring const TransDeadkeyState::deadkey(char_type c, KmodInfo d)
 495 {
 496         docstring res;
 497
 498         // Check if the same deadkey was typed twice
 499         if (deadkey_ == c) {
 500                 res = deadkey_;
 501                 deadkey_ = 0;
 502                 deadkey_info_.accent = TEX_NOACCENT;
 503                 currentState = init_state_;
 504                 return res;
 505         }
 506
 507         // Check if it is a combination or an exception
 508         KmodException::const_iterator cit = deadkey_info_.exception_list.begin();
 509         KmodException::const_iterator end = deadkey_info_.exception_list.end();
 510         for (; cit != end; ++cit) {
 511                 if (cit->combined == true && cit->accent == d.accent) {
 512                         deadkey2_ = c;
 513                         deadkey2_info_ = d;
 514                         comb_info_ = (*cit);
 515                         currentState = combined_state_;
 516                         return docstring();
 517                 }
 518                 if (cit->c == c) {
 519                         res = cit->data;
 520                         deadkey_ = 0;
 521                         deadkey_info_.accent = TEX_NOACCENT;
 522                         currentState = init_state_;
 523                         return res;
 524                 }
 525         }
 526
 527         // Not a combination or an exception.
 528         // Output deadkey1 and keep deadkey2
 529
 530         if (deadkey_!= 0)
 531                 res = deadkey_;
 532         deadkey_ = c;
 533         deadkey_info_ = d;
 534         currentState = deadkey_state_;
 535         return res;
 536 }
 537
 538
 539 TransCombinedState::TransCombinedState()
 540 {
 541         combined_state_ = this;
 542 }
 543
 544
 545 docstring const TransCombinedState::normalkey(char_type c)
 546 {
 547         docstring const temp = doAccent(c, deadkey2_info_.accent);
 548         docstring const res = doAccent(temp, deadkey_info_.accent);
 549         currentState = init_state_;
 550         return res;
 551 }
 552
 553
 554 docstring const TransCombinedState::deadkey(char_type c, KmodInfo d)
 555 {
 556         // Third key in a row. Output the first one and
 557         // reenter with shifted deadkeys
 558         docstring res;
 559         if (deadkey_ != 0)
 560                 res = deadkey_;
 561         res += TOKEN_SEP;
 562         deadkey_ = deadkey2_;
 563         deadkey_info_ = deadkey2_info_;
 564         res += deadkey_state_->deadkey(c, d);
 565         return res;
 566 }
 567
 568
 569 // TransFSM
 570 TransFSM::TransFSM()
 571         : TransFSMData(), TransInitState(), TransDeadkeyState(), TransCombinedState()
 572 {
 573         currentState = init_state_;
 574 }
 575
 576
 577 // TransManager
 578
 579 // Initialize static member.
 580 Trans TransManager::default_;
 581
 582
 583 TransManager::TransManager()
 584         : active_(0)
 585 {}
 586
 587
 588 int TransManager::setPrimary(string const & language)
 589 {
 590         if (t1_.getName() == language)
 591                 return 0;
 592
 593         return t1_.load(language);
 594 }
 595
 596
 597 int TransManager::setSecondary(string const & language)
 598 {
 599         if (t2_.getName() == language)
 600                 return 0;
 601
 602         return t2_.load(language);
 603 }
 604
 605
 606 void TransManager::enablePrimary()
 607 {
 608         if (t1_.isDefined())
 609                 active_ = &t1_;
 610
 611         LYXERR(Debug::KBMAP, "Enabling primary keymap");
 612 }
 613
 614
 615 void TransManager::enableSecondary()
 616 {
 617         if (t2_.isDefined())
 618                 active_ = &t2_;
 619         LYXERR(Debug::KBMAP, "Enabling secondary keymap");
 620 }
 621
 622
 623 void TransManager::disableKeymap()
 624 {
 625         active_ = &default_;
 626         LYXERR(Debug::KBMAP, "Disabling keymap");
 627 }
 628
 629
 630 void  TransManager::translateAndInsert(char_type c, Text * text, Cursor & cur)
 631 {
 632         docstring res = active_->process(c, *this);
 633
 634         // Process with tokens
 635         docstring temp;
 636
 637         while (res.length() > 0) {
 638                 res = split(res, temp, TransState::TOKEN_SEP);
 639                 insert(temp, text, cur);
 640         }
 641 }
 642
 643
 644 void TransManager::insert(docstring const & str, Text * text, Cursor & cur)
 645 {
 646         for (size_t i = 0, n = str.size(); i != n; ++i)
 647                 text->insertChar(cur, str[i]);
 648 }
 649
 650
 651 void TransManager::deadkey(char_type c, tex_accent accent, Text * t, Cursor & cur)
 652 {
 653         if (c == 0 && active_ != &default_) {
 654                 // A deadkey was pressed that cannot be printed
 655                 // or a accent command was typed in the minibuffer
 656                 KmodInfo i;
 657                 if (active_->isAccentDefined(accent, i) == true) {
 658                         docstring const res = trans_fsm_
 659                                 .currentState->deadkey(c, i);
 660                         insert(res, t, cur);
 661                         return;
 662                 }
 663         }
 664
 665         if (active_ == &default_ || c == 0) {
 666                 KmodInfo i;
 667                 i.accent = accent;
 668                 i.data.erase();
 669                 docstring res = trans_fsm_.currentState->deadkey(c, i);
 670                 insert(res, t, cur);
 671         } else {
 672                 // Go through the translation
 673                 translateAndInsert(c, t, cur);
 674         }
 675 }
 676
 677
 678 } // namespace lyx