src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  */
  12
  13 #include <config.h>
  14
  15 #include "BiblioInfo.h"
  16 #include "Buffer.h"
  17 #include "BufferParams.h"
  18 #include "buffer_funcs.h"
  19 #include "Encoding.h"
  20 #include "InsetIterator.h"
  21 #include "Paragraph.h"
  22
  23 #include "insets/Inset.h"
  24 #include "insets/InsetBibitem.h"
  25 #include "insets/InsetBibtex.h"
  26 #include "insets/InsetInclude.h"
  27
  28 #include "support/docstream.h"
  29 #include "support/gettext.h"
  30 #include "support/lassert.h"
  31 #include "support/lstrings.h"
  32 #include "support/textutils.h"
  33
  34 #include "boost/regex.hpp"
  35
  36 using namespace std;
  37 using namespace lyx::support;
  38
  39
  40 namespace lyx {
  41
  42 namespace {
  43
  44 // gets the "family name" from an author-type string
  45 docstring familyName(docstring const & name)
  46 {
  47         if (name.empty())
  48                 return docstring();
  49
  50         // first we look for a comma, and take the last name to be everything
  51         // preceding the right-most one, so that we also get the "jr" part.
  52         docstring::size_type idx = name.rfind(',');
  53         if (idx != docstring::npos)
  54                 return ltrim(name.substr(0, idx));
  55
  56         // OK, so now we want to look for the last name. We're going to
  57         // include the "von" part. This isn't perfect.
  58         // Split on spaces, to get various tokens.
  59         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  60         // If we only get two, assume the last one is the last name
  61         if (pieces.size() <= 2)
  62                 return pieces.back();
  63
  64         // Now we look for the first token that begins with a lower case letter.
  65         vector<docstring>::const_iterator it = pieces.begin();
  66         vector<docstring>::const_iterator en = pieces.end();
  67         for (; it != en; ++it) {
  68                 if ((*it).size() == 0)
  69                         continue;
  70                 char_type const c = (*it)[0];
  71                 if (isLower(c))
  72                         break;
  73         }
  74
  75         if (it == en) // we never found a "von"
  76                 return pieces.back();
  77
  78         // reconstruct what we need to return
  79         docstring retval;
  80         bool first = true;
  81         for (; it != en; ++it) {
  82                 if (!first)
  83                         retval += " ";
  84                 else
  85                         first = false;
  86                 retval += *it;
  87         }
  88         return retval;
  89 }
  90
  91 // converts a string containing LaTeX commands into unicode
  92 // for display.
  93 docstring convertLaTeXCommands(docstring const & str)
  94 {
  95         docstring val = str;
  96         docstring ret;
  97
  98         bool scanning_cmd = false;
  99         bool scanning_math = false;
 100         bool escaped = false; // used to catch \$, etc.
 101         while (val.size()) {
 102                 char_type const ch = val[0];
 103
 104                 // if we're scanning math, we output everything until we
 105                 // find an unescaped $, at which point we break out.
 106                 if (scanning_math) {
 107                         if (escaped)
 108                                 escaped = false;
 109                         else if (ch == '\\')
 110                                 escaped = true;
 111                         else if (ch == '$')
 112                                 scanning_math = false;
 113                         ret += ch;
 114                         val = val.substr(1);
 115                         continue;
 116                 }
 117
 118                 // if we're scanning a command name, then we just
 119                 // discard characters until we hit something that
 120                 // isn't alpha.
 121                 if (scanning_cmd) {
 122                         if (isAlphaASCII(ch)) {
 123                                 val = val.substr(1);
 124                                 escaped = false;
 125                                 continue;
 126                         }
 127                         // so we're done with this command.
 128                         // now we fall through and check this character.
 129                         scanning_cmd = false;
 130                 }
 131
 132                 // was the last character a \? If so, then this is something like: \\,
 133                 // or \$, so we'll just output it. That's probably not always right...
 134                 if (escaped) {
 135                         ret += ch;
 136                         val = val.substr(1);
 137                         escaped = false;
 138                         continue;
 139                 }
 140
 141                 if (ch == '$') {
 142                         ret += ch;
 143                         val = val.substr(1);
 144                         scanning_math = true;
 145                         continue;
 146                 }
 147
 148                 // we just ignore braces
 149                 if (ch == '{' || ch == '}') {
 150                         val = val.substr(1);
 151                         continue;
 152                 }
 153
 154                 // we're going to check things that look like commands, so if
 155                 // this doesn't, just output it.
 156                 if (ch != '\\') {
 157                         ret += ch;
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // ok, could be a command of some sort
 163                 // let's see if it corresponds to some unicode
 164                 // unicodesymbols has things in the form: \"{u},
 165                 // whereas we may see things like: \"u. So we'll
 166                 // look for that and change it, if necessary.
 167                 static boost::regex const reg("^\\\\\\W\\w");
 168                 if (boost::regex_search(to_utf8(val), reg)) {
 169                         val.insert(3, from_ascii("}"));
 170                         val.insert(2, from_ascii("{"));
 171                 }
 172                 docstring rem;
 173                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
 174                                                         Encodings::TEXT_CMD);
 175                 if (!cnvtd.empty()) {
 176                         // it did, so we'll take that bit and proceed with what's left
 177                         ret += cnvtd;
 178                         val = rem;
 179                         continue;
 180                 }
 181                 // it's a command of some sort
 182                 scanning_cmd = true;
 183                 escaped = true;
 184                 val = val.substr(1);
 185         }
 186         return ret;
 187 }
 188
 189 } // anon namespace
 190
 191
 192 //////////////////////////////////////////////////////////////////////
 193 //
 194 // BibTeXInfo
 195 //
 196 //////////////////////////////////////////////////////////////////////
 197
 198 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 199         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
 200 {}
 201
 202
 203 bool BibTeXInfo::hasField(docstring const & field) const
 204 {
 205         return count(field) == 1;
 206 }
 207
 208
 209 docstring const BibTeXInfo::getAbbreviatedAuthor() const
 210 {
 211         if (!is_bibtex_) {
 212                 docstring const opt = label();
 213                 if (opt.empty())
 214                         return docstring();
 215
 216                 docstring authors;
 217                 split(opt, authors, '(');
 218                 return authors;
 219         }
 220
 221         docstring author = convertLaTeXCommands(operator[]("author"));
 222         if (author.empty()) {
 223                 author = convertLaTeXCommands(operator[]("editor"));
 224                 if (author.empty())
 225                         return bib_key_;
 226         }
 227
 228         // OK, we've got some names. Let's format them.
 229         // Try to split the author list on " and "
 230         vector<docstring> const authors =
 231                 getVectorFromString(author, from_ascii(" and "));
 232
 233         if (authors.size() == 2)
 234                 return bformat(_("%1$s and %2$s"),
 235                         familyName(authors[0]), familyName(authors[1]));
 236
 237         if (authors.size() > 2)
 238                 return bformat(_("%1$s et al."), familyName(authors[0]));
 239
 240         return familyName(authors[0]);
 241 }
 242
 243
 244 docstring const BibTeXInfo::getYear() const
 245 {
 246         if (is_bibtex_)
 247                 return operator[]("year");
 248
 249         docstring const opt = label();
 250         if (opt.empty())
 251                 return docstring();
 252
 253         docstring authors;
 254         docstring const tmp = split(opt, authors, '(');
 255         docstring year;
 256         split(tmp, year, ')');
 257         return year;
 258 }
 259
 260
 261 docstring const BibTeXInfo::getXRef() const
 262 {
 263         if (!is_bibtex_)
 264                 return docstring();
 265         return operator[]("crossref");
 266 }
 267
 268
 269 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
 270 {
 271         if (!info_.empty())
 272                 return info_;
 273
 274         if (!is_bibtex_) {
 275                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 276                 info_ = it->second;
 277                 return info_;
 278         }
 279
 280         // FIXME
 281         // This could be made a lot better using the entry_type_
 282         // field to customize the output based upon entry type.
 283
 284         // Search for all possible "required" fields
 285         docstring author = getValueForKey("author", xref);
 286         if (author.empty())
 287                 author = getValueForKey("editor", xref);
 288
 289         docstring year   = getValueForKey("year", xref);
 290         docstring title  = getValueForKey("title", xref);
 291         docstring docLoc = getValueForKey("pages", xref);
 292         if (docLoc.empty()) {
 293                 docLoc = getValueForKey("chapter", xref);
 294                 if (!docLoc.empty())
 295                         docLoc = _("Ch. ") + docLoc;
 296         }       else {
 297                 docLoc = _("pp. ") + docLoc;
 298         }
 299
 300         docstring media = getValueForKey("journal", xref);
 301         if (media.empty()) {
 302                 media = getValueForKey("publisher", xref);
 303                 if (media.empty()) {
 304                         media = getValueForKey("school", xref);
 305                         if (media.empty())
 306                                 media = getValueForKey("institution");
 307                 }
 308         }
 309         docstring volume = getValueForKey("volume", xref);
 310
 311         odocstringstream result;
 312         if (!author.empty())
 313                 result << author << ", ";
 314         if (!title.empty())
 315                 result << title;
 316         if (!media.empty())
 317                 result << ", " << media;
 318         if (!year.empty())
 319                 result << " (" << year << ")";
 320         if (!docLoc.empty())
 321                 result << ", " << docLoc;
 322
 323         docstring const result_str = rtrim(result.str());
 324         if (!result_str.empty()) {
 325                 info_ = convertLaTeXCommands(result_str);
 326                 return info_;
 327         }
 328
 329         // This should never happen (or at least be very unusual!)
 330         static docstring e = docstring();
 331         return e;
 332 }
 333
 334
 335 docstring const & BibTeXInfo::operator[](docstring const & field) const
 336 {
 337         BibTeXInfo::const_iterator it = find(field);
 338         if (it != end())
 339                 return it->second;
 340         static docstring const empty_value = docstring();
 341         return empty_value;
 342 }
 343
 344
 345 docstring const & BibTeXInfo::operator[](string const & field) const
 346 {
 347         return operator[](from_ascii(field));
 348 }
 349
 350
 351 docstring BibTeXInfo::getValueForKey(string const & key,
 352                 BibTeXInfo const * const xref) const
 353 {
 354         docstring const ret = operator[](key);
 355         if (!ret.empty() || !xref)
 356                 return ret;
 357         return (*xref)[key];
 358 }
 359
 360
 361 //////////////////////////////////////////////////////////////////////
 362 //
 363 // BiblioInfo
 364 //
 365 //////////////////////////////////////////////////////////////////////
 366
 367 namespace {
 368 // A functor for use with sort, leading to case insensitive sorting
 369         class compareNoCase: public binary_function<docstring, docstring, bool>
 370         {
 371                 public:
 372                         bool operator()(docstring const & s1, docstring const & s2) const {
 373                                 return compare_no_case(s1, s2) < 0;
 374                         }
 375         };
 376 } // namespace anon
 377
 378
 379 vector<docstring> const BiblioInfo::getKeys() const
 380 {
 381         vector<docstring> bibkeys;
 382         BiblioInfo::const_iterator it  = begin();
 383         for (; it != end(); ++it)
 384                 bibkeys.push_back(it->first);
 385         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 386         return bibkeys;
 387 }
 388
 389
 390 vector<docstring> const BiblioInfo::getFields() const
 391 {
 392         vector<docstring> bibfields;
 393         set<docstring>::const_iterator it = field_names_.begin();
 394         set<docstring>::const_iterator end = field_names_.end();
 395         for (; it != end; ++it)
 396                 bibfields.push_back(*it);
 397         sort(bibfields.begin(), bibfields.end());
 398         return bibfields;
 399 }
 400
 401
 402 vector<docstring> const BiblioInfo::getEntries() const
 403 {
 404         vector<docstring> bibentries;
 405         set<docstring>::const_iterator it = entry_types_.begin();
 406         set<docstring>::const_iterator end = entry_types_.end();
 407         for (; it != end; ++it)
 408                 bibentries.push_back(*it);
 409         sort(bibentries.begin(), bibentries.end());
 410         return bibentries;
 411 }
 412
 413
 414 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
 415 {
 416         BiblioInfo::const_iterator it = find(key);
 417         if (it == end())
 418                 return docstring();
 419         BibTeXInfo const & data = it->second;
 420         return data.getAbbreviatedAuthor();
 421 }
 422
 423
 424 docstring const BiblioInfo::getYear(docstring const & key) const
 425 {
 426         BiblioInfo::const_iterator it = find(key);
 427         if (it == end())
 428                 return docstring();
 429         BibTeXInfo const & data = it->second;
 430         docstring year = data.getYear();
 431         if (!year.empty())
 432                 return year;
 433         // let's try the crossref
 434         docstring const xref = data.getXRef();
 435         if (xref.empty())
 436                 return _("No year"); // no luck
 437         BiblioInfo::const_iterator const xrefit = find(xref);
 438         if (xrefit == end())
 439                 return _("No year"); // no luck again
 440         BibTeXInfo const & xref_data = xrefit->second;
 441         return xref_data.getYear();
 442         return data.getYear();
 443 }
 444
 445
 446 docstring const BiblioInfo::getInfo(docstring const & key) const
 447 {
 448         BiblioInfo::const_iterator it = find(key);
 449         if (it == end())
 450                 return docstring();
 451         BibTeXInfo const & data = it->second;
 452         BibTeXInfo const * xrefptr = 0;
 453         docstring const xref = data.getXRef();
 454         if (!xref.empty()) {
 455                 BiblioInfo::const_iterator const xrefit = find(xref);
 456                 if (xrefit != end())
 457                         xrefptr = &(xrefit->second);
 458         }
 459         return data.getInfo(xrefptr);
 460 }
 461
 462
 463 vector<docstring> const BiblioInfo::getCiteStrings(
 464         docstring const & key, Buffer const & buf) const
 465 {
 466         CiteEngine const engine = buf.params().citeEngine();
 467         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
 468                 return getNumericalStrings(key, buf);
 469         else
 470                 return getAuthorYearStrings(key, buf);
 471 }
 472
 473
 474 vector<docstring> const BiblioInfo::getNumericalStrings(
 475         docstring const & key, Buffer const & buf) const
 476 {
 477         if (empty())
 478                 return vector<docstring>();
 479
 480         docstring const author = getAbbreviatedAuthor(key);
 481         docstring const year   = getYear(key);
 482         if (author.empty() || year.empty())
 483                 return vector<docstring>();
 484
 485         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 486
 487         vector<docstring> vec(styles.size());
 488         for (size_t i = 0; i != vec.size(); ++i) {
 489                 docstring str;
 490
 491                 switch (styles[i]) {
 492                         case CITE:
 493                         case CITEP:
 494                                 str = from_ascii("[#ID]");
 495                                 break;
 496
 497                         case NOCITE:
 498                                 str = _("Add to bibliography only.");
 499                                 break;
 500
 501                         case CITET:
 502                                 str = author + " [#ID]";
 503                                 break;
 504
 505                         case CITEALT:
 506                                 str = author + " #ID";
 507                                 break;
 508
 509                         case CITEALP:
 510                                 str = from_ascii("#ID");
 511                                 break;
 512
 513                         case CITEAUTHOR:
 514                                 str = author;
 515                                 break;
 516
 517                         case CITEYEAR:
 518                                 str = year;
 519                                 break;
 520
 521                         case CITEYEARPAR:
 522                                 str = '(' + year + ')';
 523                                 break;
 524                 }
 525
 526                 vec[i] = str;
 527         }
 528
 529         return vec;
 530 }
 531
 532
 533 vector<docstring> const BiblioInfo::getAuthorYearStrings(
 534         docstring const & key, Buffer const & buf) const
 535 {
 536         if (empty())
 537                 return vector<docstring>();
 538
 539         docstring const author = getAbbreviatedAuthor(key);
 540         docstring const year   = getYear(key);
 541         if (author.empty() || year.empty())
 542                 return vector<docstring>();
 543
 544         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 545
 546         vector<docstring> vec(styles.size());
 547         for (size_t i = 0; i != vec.size(); ++i) {
 548                 docstring str;
 549
 550                 switch (styles[i]) {
 551                         case CITE:
 552                 // jurabib only: Author/Annotator
 553                 // (i.e. the "before" field, 2nd opt arg)
 554                                 str = author + "/<" + _("before") + '>';
 555                                 break;
 556
 557                         case NOCITE:
 558                                 str = _("Add to bibliography only.");
 559                                 break;
 560
 561                         case CITET:
 562                                 str = author + " (" + year + ')';
 563                                 break;
 564
 565                         case CITEP:
 566                                 str = '(' + author + ", " + year + ')';
 567                                 break;
 568
 569                         case CITEALT:
 570                                 str = author + ' ' + year ;
 571                                 break;
 572
 573                         case CITEALP:
 574                                 str = author + ", " + year ;
 575                                 break;
 576
 577                         case CITEAUTHOR:
 578                                 str = author;
 579                                 break;
 580
 581                         case CITEYEAR:
 582                                 str = year;
 583                                 break;
 584
 585                         case CITEYEARPAR:
 586                                 str = '(' + year + ')';
 587                                 break;
 588                 }
 589                 vec[i] = str;
 590         }
 591         return vec;
 592 }
 593
 594
 595 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 596 {
 597         bimap_.insert(info.begin(), info.end());
 598 }
 599
 600
 601 //////////////////////////////////////////////////////////////////////
 602 //
 603 // CitationStyle
 604 //
 605 //////////////////////////////////////////////////////////////////////
 606
 607 namespace {
 608
 609
 610 char const * const citeCommands[] = {
 611         "cite", "citet", "citep", "citealt", "citealp",
 612         "citeauthor", "citeyear", "citeyearpar", "nocite" };
 613
 614 unsigned int const nCiteCommands =
 615                 sizeof(citeCommands) / sizeof(char *);
 616
 617 CiteStyle const citeStylesArray[] = {
 618         CITE, CITET, CITEP, CITEALT, CITEALP,
 619         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
 620
 621 unsigned int const nCiteStyles =
 622                 sizeof(citeStylesArray) / sizeof(CiteStyle);
 623
 624 CiteStyle const citeStylesFull[] = {
 625         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 626
 627 unsigned int const nCiteStylesFull =
 628                 sizeof(citeStylesFull) / sizeof(CiteStyle);
 629
 630 CiteStyle const citeStylesUCase[] = {
 631         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 632
 633 unsigned int const nCiteStylesUCase =
 634         sizeof(citeStylesUCase) / sizeof(CiteStyle);
 635
 636 } // namespace anon
 637
 638
 639 CitationStyle citationStyleFromString(string const & command)
 640 {
 641         CitationStyle s;
 642         if (command.empty())
 643                 return s;
 644
 645         string cmd = command;
 646         if (cmd[0] == 'C') {
 647                 s.forceUpperCase = true;
 648                 cmd[0] = 'c';
 649         }
 650
 651         size_t const n = cmd.size() - 1;
 652         if (cmd != "cite" && cmd[n] == '*') {
 653                 s.full = true;
 654                 cmd = cmd.substr(0, n);
 655         }
 656
 657         char const * const * const last = citeCommands + nCiteCommands;
 658         char const * const * const ptr = find(citeCommands, last, cmd);
 659
 660         if (ptr != last) {
 661                 size_t idx = ptr - citeCommands;
 662                 s.style = citeStylesArray[idx];
 663         }
 664         return s;
 665 }
 666
 667
 668 string citationStyleToString(const CitationStyle & s)
 669 {
 670         string cite = citeCommands[s.style];
 671         if (s.full) {
 672                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
 673                 if (std::find(citeStylesFull, last, s.style) != last)
 674                         cite += '*';
 675         }
 676
 677         if (s.forceUpperCase) {
 678                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
 679                 if (std::find(citeStylesUCase, last, s.style) != last)
 680                         cite[0] = 'C';
 681         }
 682
 683         return cite;
 684 }
 685
 686 vector<CiteStyle> citeStyles(CiteEngine engine)
 687 {
 688         unsigned int nStyles = 0;
 689         unsigned int start = 0;
 690
 691         switch (engine) {
 692                 case ENGINE_BASIC:
 693                         nStyles = 2;
 694                         start = 0;
 695                         break;
 696                 case ENGINE_NATBIB_AUTHORYEAR:
 697                 case ENGINE_NATBIB_NUMERICAL:
 698                         nStyles = nCiteStyles - 1;
 699                         start = 1;
 700                         break;
 701                 case ENGINE_JURABIB:
 702                         nStyles = nCiteStyles;
 703                         start = 0;
 704                         break;
 705         }
 706
 707         vector<CiteStyle> styles(nStyles);
 708         size_t i = 0;
 709         int j = start;
 710         for (; i != styles.size(); ++i, ++j)
 711                 styles[i] = citeStylesArray[j];
 712
 713         return styles;
 714 }
 715
 716 } // namespace lyx
 717