3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Alejandro Aguilar Sierra
7 * \author Lars Gullik Bjønnes
8 * \author Jean-Marc Lasgouttes
11 * Full author contact details are available in file CREDITS.
18 #include "support/convert.h"
19 #include "support/debug.h"
20 #include "support/FileName.h"
21 #include "support/filetools.h"
22 #include "support/gzstream.h"
23 #include "support/lassert.h"
24 #include "support/lstrings.h"
25 #include "support/lyxalgo.h"
26 #include "support/types.h"
34 using namespace lyx::support
;
38 //////////////////////////////////////////////////////////////////////
42 //////////////////////////////////////////////////////////////////////
49 Pimpl(LexerKeyword
* tab
, int num
);
51 string
const getString() const;
53 docstring
const getDocString() const;
55 void printError(string
const & message
) const;
57 void printTable(ostream
& os
);
59 void pushTable(LexerKeyword
* tab
, int num
);
63 bool setFile(FileName
const & filename
);
65 void setStream(istream
& i
);
67 void setCommentChar(char c
);
69 bool next(bool esc
= false);
71 int searchKeyword(char const * const tag
) const;
78 /// test if there is a pushed token or the stream is ok
79 bool inputAvailable();
81 void pushToken(string
const &);
82 /// fb_ is only used to open files, the stream is accessed through is.
85 /// gz_ is only used to open files, the stream is accessed through is.
88 /// the stream that we use.
106 /// used for error messages
110 Pimpl(Pimpl
const &);
111 void operator=(Pimpl
const &);
120 : table_elem(0), table_siz(0) {}
122 PushedTable(LexerKeyword
* ki
, int siz
)
123 : table_elem(ki
), table_siz(siz
) {}
125 LexerKeyword
* table_elem
;
130 stack
<PushedTable
> pushed
;
138 : public binary_function
<LexerKeyword
, LexerKeyword
, bool> {
140 // used by lower_bound, sort and sorted
141 bool operator()(LexerKeyword
const & a
, LexerKeyword
const & b
) const
143 // we use the ascii version, because in turkish, 'i'
144 // is not the lowercase version of 'I', and thus
145 // turkish locale breaks parsing of tags.
146 return compare_ascii_no_case(a
.tag
, b
.tag
) < 0;
150 } // end of anon namespace
153 Lexer::Pimpl::Pimpl(LexerKeyword
* tab
, int num
)
154 : is(&fb_
), table(tab
), no_items(num
),
155 status(0), lineno(0), commentChar('#')
161 string
const Lexer::Pimpl::getString() const
167 docstring
const Lexer::Pimpl::getDocString() const
169 return from_utf8(buff
);
173 void Lexer::Pimpl::printError(string
const & message
) const
175 string
const tmpmsg
= subst(message
, "$$Token", getString());
176 lyxerr
<< "LyX: " << tmpmsg
<< " [around line " << lineno
177 << " of file " << to_utf8(makeDisplayPath(name
))
178 << " current token: '" << getString() << "'"
179 << " context: '" << context
<< "']" << endl
;
183 void Lexer::Pimpl::printTable(ostream
& os
)
185 os
<< "\nNumber of tags: " << no_items
<< endl
;
186 for (int i
= 0; i
< no_items
; ++i
)
188 << "]: tag: `" << table
[i
].tag
189 << "' code:" << table
[i
].code
<< '\n';
194 void Lexer::Pimpl::verifyTable()
196 // Check if the table is sorted and if not, sort it.
198 && !lyx::sorted(table
, table
+ no_items
, CompareTags())) {
199 lyxerr
<< "The table passed to Lexer is not sorted!\n"
200 << "Tell the developers to fix it!" << endl
;
201 // We sort it anyway to avoid problems.
202 lyxerr
<< "\nUnsorted:" << endl
;
205 sort(table
, table
+ no_items
, CompareTags());
206 lyxerr
<< "\nSorted:" << endl
;
212 void Lexer::Pimpl::pushTable(LexerKeyword
* tab
, int num
)
214 PushedTable
tmppu(table
, no_items
);
224 void Lexer::Pimpl::popTable()
226 if (pushed
.empty()) {
227 lyxerr
<< "Lexer error: nothing to pop!" << endl
;
231 PushedTable tmp
= pushed
.top();
233 table
= tmp
.table_elem
;
234 no_items
= tmp
.table_siz
;
238 bool Lexer::Pimpl::setFile(FileName
const & filename
)
240 // Check the format of the file.
241 string
const format
= filename
.guessFormatFromContents();
243 if (format
== "gzip" || format
== "zip" || format
== "compress") {
244 LYXERR(Debug::LYXLEX
, "lyxlex: compressed");
245 // The check only outputs a debug message, because it triggers
246 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
247 // a fresh new filebuf. (JMarc)
248 if (gz_
.is_open() || istream::off_type(is
.tellg()) > -1)
249 LYXERR(Debug::LYXLEX
, "Error in LyXLex::setFile: "
250 "file or stream already set.");
251 gz_
.open(filename
.toFilesystemEncoding().c_str(), ios::in
);
253 name
= filename
.absFilename();
255 if (!gz_
.is_open() || !is
.good())
258 LYXERR(Debug::LYXLEX
, "lyxlex: UNcompressed");
260 // The check only outputs a debug message, because it triggers
261 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
262 // a fresh new filebuf. (JMarc)
263 if (fb_
.is_open() || istream::off_type(is
.tellg()) > 0) {
264 LYXERR(Debug::LYXLEX
, "Error in Lexer::setFile: "
265 "file or stream already set.");
267 fb_
.open(filename
.toFilesystemEncoding().c_str(), ios::in
);
269 name
= filename
.absFilename();
271 if (!fb_
.is_open() || !is
.good())
275 // Skip byte order mark.
276 if (is
.peek() == 0xef) {
278 if (is
.peek() == 0xbb) {
280 LASSERT(is
.get() == 0xbf, /**/);
289 void Lexer::Pimpl::setStream(istream
& i
)
291 if (fb_
.is_open() || istream::off_type(is
.tellg()) > 0) {
292 LYXERR(Debug::LYXLEX
, "Error in Lexer::setStream: "
293 "file or stream already set.");
300 void Lexer::Pimpl::setCommentChar(char c
)
306 bool Lexer::Pimpl::next(bool esc
/* = false */)
308 if (!pushTok
.empty()) {
309 // There can have been a whole line pushed so
310 // we extract the first word and leaves the rest
312 if (pushTok
[0] == '\\' && pushTok
.find(' ') != string::npos
) {
314 pushTok
= split(pushTok
, buff
, ' ');
324 unsigned char c
= 0; // getc() returns an int
327 while (is
&& !status
) {
331 if (c
== commentChar
) {
332 // Read rest of line (fast :-)
334 // That is not fast... (Lgb)
338 LYXERR(Debug::LYXLEX
, "Comment read: `" << c
<< dummy
<< '\'');
340 // unfortunately ignore is buggy (Lgb)
341 is
.ignore(100, '\n');
352 bool escaped
= false;
357 if (c
== '\r') continue;
359 // escape the next char
362 if (c
== '\"' || c
== '\\')
365 buff
.push_back('\\');
369 if (!escaped
&& c
== '\"')
371 } while (c
!= '\n' && is
);
380 } while (c
!= '\"' && c
!= '\n' && is
);
385 printError("Missing quote");
390 buff
.resize(buff
.size() - 1);
396 continue; /* Skip ','s */
398 // using relational operators with chars other
399 // than == and != is not safe. And if it is done
400 // the type _have_ to be unsigned. It usually a
401 // lot better to use the functions from cctype
406 if (esc
&& c
== '\\') {
407 // escape the next char
415 } while (c
> ' ' && c
!= ',' && is
);
419 if (c
== '\r' && is
) {
420 // The Windows support has lead to the
421 // possibility of "\r\n" at the end of
422 // a line. This will stop LyX choking
423 // when it expected to find a '\n'
435 status
= is
.eof() ? LEX_FEOF
: LEX_UNDEF
;
441 int Lexer::Pimpl::searchKeyword(char const * const tag
) const
443 LexerKeyword search_tag
= { tag
, 0 };
445 lower_bound(table
, table
+ no_items
,
446 search_tag
, CompareTags());
447 // use the compare_ascii_no_case instead of compare_no_case,
448 // because in turkish, 'i' is not the lowercase version of 'I',
449 // and thus turkish locale breaks parsing of tags.
450 if (res
!= table
+ no_items
451 && !compare_ascii_no_case(res
->tag
, tag
))
457 int Lexer::Pimpl::lex()
459 //NOTE: possible bug.
460 if (next() && status
== LEX_TOKEN
)
461 return searchKeyword(getString().c_str());
466 bool Lexer::Pimpl::eatLine()
470 unsigned char c
= '\0';
472 while (is
&& c
!= '\n') {
475 //LYXERR(Debug::LYXLEX, "Lexer::EatLine read char: `" << c << '\'');
482 buff
.resize(buff
.size() - 1);
485 } else if (buff
.length() > 0) { // last line
494 bool Lexer::Pimpl::nextToken()
496 if (!pushTok
.empty()) {
497 // There can have been a whole line pushed so
498 // we extract the first word and leaves the rest
500 if (pushTok
[0] == '\\' && pushTok
.find(' ') != string::npos
) {
502 pushTok
= split(pushTok
, buff
, ' ');
512 while (is
&& !status
) {
517 if ((c
>= ' ' || c
== '\t') && is
) {
520 if (c
== '\\') { // first char == '\\'
525 } while (c
> ' ' && c
!= '\\' && is
);
531 } while ((c
>= ' ' || c
== '\t') && c
!= '\\' && is
);
535 is
.putback(c
); // put it back
546 status
= is
.eof() ? LEX_FEOF
: LEX_UNDEF
;
552 bool Lexer::Pimpl::inputAvailable()
558 void Lexer::Pimpl::pushToken(string
const & pt
)
566 //////////////////////////////////////////////////////////////////////
570 //////////////////////////////////////////////////////////////////////
573 : pimpl_(new Pimpl(0, 0))
577 void Lexer::init(LexerKeyword
* tab
, int num
)
579 pimpl_
= new Pimpl(tab
, num
);
589 bool Lexer::isOK() const
591 return pimpl_
->inputAvailable();
595 void Lexer::setLineNumber(int l
)
601 int Lexer::lineNumber() const
603 return pimpl_
->lineno
;
607 istream
& Lexer::getStream()
613 void Lexer::pushTable(LexerKeyword
* tab
, int num
)
615 pimpl_
->pushTable(tab
, num
);
619 void Lexer::popTable()
625 void Lexer::printTable(ostream
& os
)
627 pimpl_
->printTable(os
);
631 void Lexer::printError(string
const & message
) const
633 pimpl_
->printError(message
);
637 bool Lexer::setFile(FileName
const & filename
)
639 return pimpl_
->setFile(filename
);
643 void Lexer::setStream(istream
& i
)
645 pimpl_
->setStream(i
);
649 void Lexer::setCommentChar(char c
)
651 pimpl_
->setCommentChar(c
);
657 return pimpl_
->lex();
661 int Lexer::getInteger() const
663 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
665 pimpl_
->printError("integer token missing");
669 if (isStrInt(pimpl_
->getString()))
670 return convert
<int>(pimpl_
->getString());
673 pimpl_
->printError("Bad integer `$$Token'");
678 double Lexer::getFloat() const
680 // replace comma with dot in case the file was written with
681 // the wrong locale (should be rare, but is easy enough to
683 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
685 pimpl_
->printError("float token missing");
689 string
const str
= subst(pimpl_
->getString(), ",", ".");
691 return convert
<double>(str
);
694 pimpl_
->printError("Bad float `$$Token'");
699 string
const Lexer::getString() const
701 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
704 return pimpl_
->getString();
710 docstring
const Lexer::getDocString() const
712 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
715 return pimpl_
->getDocString();
721 // I would prefer to give a tag number instead of an explicit token
722 // here, but it is not possible because Buffer::readDocument uses
723 // explicit tokens (JMarc)
724 string
const Lexer::getLongString(string
const & endtoken
)
728 bool firstline
= true;
730 while (pimpl_
->is
) { //< eatLine only reads from is, not from pushTok
732 // blank line in the file being read
735 string
const token
= trim(getString(), " \t");
737 LYXERR(Debug::PARSER
, "LongString: `" << getString() << '\'');
739 // We do a case independent comparison, like searchKeyword does.
740 if (compare_ascii_no_case(token
, endtoken
) == 0)
743 string tmpstr
= getString();
745 size_t i
= tmpstr
.find_first_not_of(' ');
746 if (i
!= string::npos
)
747 prefix
= tmpstr
.substr(0, i
);
749 LYXERR(Debug::PARSER
, "Prefix = `" << prefix
<< "\'");
752 // further lines in long strings may have the same
753 // whitespace prefix as the first line. Remove it.
754 if (prefix
.length() && prefixIs(tmpstr
, prefix
))
755 tmpstr
.erase(0, prefix
.length() - 1);
757 str
+= ltrim(tmpstr
, "\t") + '\n';
761 printError("Long string not ended by `" + endtoken
+ '\'');
767 bool Lexer::getBool() const
769 string
const s
= pimpl_
->getString();
770 if (s
== "false" || s
== "0") {
774 if (s
== "true" || s
== "1") {
778 pimpl_
->printError("Bad boolean `$$Token'. "
779 "Use \"false\" or \"true\"");
785 bool Lexer::eatLine()
787 return pimpl_
->eatLine();
791 bool Lexer::next(bool esc
)
793 return pimpl_
->next(esc
);
797 bool Lexer::nextToken()
799 return pimpl_
->nextToken();
803 void Lexer::pushToken(string
const & pt
)
805 pimpl_
->pushToken(pt
);
809 Lexer::operator void const *() const
811 // This behaviour is NOT the same as the streams which would
812 // use fail() here. However, our implementation of getString() et al.
813 // can cause the eof() and fail() bits to be set, even though we
814 // haven't tried to read 'em.
815 return lastReadOk_
? this : 0;
819 bool Lexer::operator!() const
825 Lexer
& Lexer::operator>>(string
& s
)
837 Lexer
& Lexer::operator>>(docstring
& s
)
849 Lexer
& Lexer::operator>>(double & s
)
861 Lexer
& Lexer::operator>>(int & s
)
873 Lexer
& Lexer::operator>>(unsigned int & s
)
885 Lexer
& Lexer::operator>>(bool & s
)
897 Lexer
& Lexer::operator>>(char & c
)
907 // quotes a string, e.g. for use in preferences files or as an argument
908 // of the "log" dialog
909 string
Lexer::quoteString(string
const & arg
)
913 res
+= subst(subst(arg
, "\\", "\\\\"), "\"", "\\\"");
919 // same for docstring
920 docstring
Lexer::quoteString(docstring
const & arg
)
924 res
+= subst(subst(arg
, from_ascii("\\"), from_ascii("\\\\")),
925 from_ascii("\""), from_ascii("\\\""));
931 Lexer
& Lexer::operator>>(char const * required
)
935 if (token
!= required
) {
936 LYXERR0("Missing '" << required
<< "'-tag in " << pimpl_
->context
937 << ". Got " << token
<< " instead. Line: " << lineNumber());
944 bool Lexer::checkFor(char const * required
)
948 if (token
== required
)
955 void Lexer::setContext(std::string
const & str
)
957 pimpl_
->context
= str
;