3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Alejandro Aguilar Sierra
7 * \author Lars Gullik Bjønnes
8 * \author Jean-Marc Lasgouttes
11 * Full author contact details are available in file CREDITS.
18 #include "support/convert.h"
19 #include "support/debug.h"
20 #include "support/FileName.h"
21 #include "support/filetools.h"
22 #include "support/gzstream.h"
23 #include "support/lstrings.h"
24 #include "support/lyxalgo.h"
25 #include "support/types.h"
33 using namespace lyx::support
;
37 //////////////////////////////////////////////////////////////////////
41 //////////////////////////////////////////////////////////////////////
48 Pimpl(LexerKeyword
* tab
, int num
);
50 string
const getString() const;
52 docstring
const getDocString() const;
54 void printError(string
const & message
) const;
56 void printTable(ostream
& os
);
58 void pushTable(LexerKeyword
* tab
, int num
);
62 bool setFile(FileName
const & filename
);
64 void setStream(istream
& i
);
66 void setCommentChar(char c
);
68 bool next(bool esc
= false);
70 int searchKeyword(char const * const tag
) const;
77 /// test if there is a pushed token or the stream is ok
78 bool inputAvailable();
80 void pushToken(string
const &);
81 /// fb_ is only used to open files, the stream is accessed through is.
84 /// gz_ is only used to open files, the stream is accessed through is.
87 /// the stream that we use.
105 /// used for error messages
109 Pimpl(Pimpl
const &);
110 void operator=(Pimpl
const &);
119 : table_elem(0), table_siz(0) {}
121 PushedTable(LexerKeyword
* ki
, int siz
)
122 : table_elem(ki
), table_siz(siz
) {}
124 LexerKeyword
* table_elem
;
129 stack
<PushedTable
> pushed
;
137 : public binary_function
<LexerKeyword
, LexerKeyword
, bool> {
139 // used by lower_bound, sort and sorted
140 bool operator()(LexerKeyword
const & a
, LexerKeyword
const & b
) const
142 // we use the ascii version, because in turkish, 'i'
143 // is not the lowercase version of 'I', and thus
144 // turkish locale breaks parsing of tags.
145 return compare_ascii_no_case(a
.tag
, b
.tag
) < 0;
149 } // end of anon namespace
152 Lexer::Pimpl::Pimpl(LexerKeyword
* tab
, int num
)
153 : is(&fb_
), table(tab
), no_items(num
),
154 status(0), lineno(0), commentChar('#')
160 string
const Lexer::Pimpl::getString() const
166 docstring
const Lexer::Pimpl::getDocString() const
168 return from_utf8(buff
);
172 void Lexer::Pimpl::printError(string
const & message
) const
174 string
const tmpmsg
= subst(message
, "$$Token", getString());
175 lyxerr
<< "LyX: " << tmpmsg
<< " [around line " << lineno
176 << " of file " << to_utf8(makeDisplayPath(name
))
177 << " current token: '" << getString() << "'"
178 << " context: '" << context
<< "']" << endl
;
182 void Lexer::Pimpl::printTable(ostream
& os
)
184 os
<< "\nNumber of tags: " << no_items
<< endl
;
185 for (int i
= 0; i
< no_items
; ++i
)
187 << "]: tag: `" << table
[i
].tag
188 << "' code:" << table
[i
].code
<< '\n';
193 void Lexer::Pimpl::verifyTable()
195 // Check if the table is sorted and if not, sort it.
197 && !lyx::sorted(table
, table
+ no_items
, CompareTags())) {
198 lyxerr
<< "The table passed to Lexer is not sorted!\n"
199 << "Tell the developers to fix it!" << endl
;
200 // We sort it anyway to avoid problems.
201 lyxerr
<< "\nUnsorted:" << endl
;
204 sort(table
, table
+ no_items
, CompareTags());
205 lyxerr
<< "\nSorted:" << endl
;
211 void Lexer::Pimpl::pushTable(LexerKeyword
* tab
, int num
)
213 PushedTable
tmppu(table
, no_items
);
223 void Lexer::Pimpl::popTable()
225 if (pushed
.empty()) {
226 lyxerr
<< "Lexer error: nothing to pop!" << endl
;
230 PushedTable tmp
= pushed
.top();
232 table
= tmp
.table_elem
;
233 no_items
= tmp
.table_siz
;
237 bool Lexer::Pimpl::setFile(FileName
const & filename
)
239 // Check the format of the file.
240 string
const format
= filename
.guessFormatFromContents();
242 if (format
== "gzip" || format
== "zip" || format
== "compress") {
243 LYXERR(Debug::LYXLEX
, "lyxlex: compressed");
244 // The check only outputs a debug message, because it triggers
245 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
246 // a fresh new filebuf. (JMarc)
247 if (gz_
.is_open() || istream::off_type(is
.tellg()) > -1)
248 LYXERR(Debug::LYXLEX
, "Error in LyXLex::setFile: "
249 "file or stream already set.");
250 gz_
.open(filename
.toFilesystemEncoding().c_str(), ios::in
);
252 name
= filename
.absFilename();
254 return gz_
.is_open() && is
.good();
256 LYXERR(Debug::LYXLEX
, "lyxlex: UNcompressed");
258 // The check only outputs a debug message, because it triggers
259 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
260 // a fresh new filebuf. (JMarc)
261 if (fb_
.is_open() || istream::off_type(is
.tellg()) > 0) {
262 LYXERR(Debug::LYXLEX
, "Error in Lexer::setFile: "
263 "file or stream already set.");
265 fb_
.open(filename
.toFilesystemEncoding().c_str(), ios::in
);
267 name
= filename
.absFilename();
269 return fb_
.is_open() && is
.good();
274 void Lexer::Pimpl::setStream(istream
& i
)
276 if (fb_
.is_open() || istream::off_type(is
.tellg()) > 0) {
277 LYXERR(Debug::LYXLEX
, "Error in Lexer::setStream: "
278 "file or stream already set.");
285 void Lexer::Pimpl::setCommentChar(char c
)
291 bool Lexer::Pimpl::next(bool esc
/* = false */)
293 if (!pushTok
.empty()) {
294 // There can have been a whole line pushed so
295 // we extract the first word and leaves the rest
297 if (pushTok
[0] == '\\' && pushTok
.find(' ') != string::npos
) {
299 pushTok
= split(pushTok
, buff
, ' ');
309 unsigned char c
= 0; // getc() returns an int
312 while (is
&& !status
) {
316 if (c
== commentChar
) {
317 // Read rest of line (fast :-)
319 // That is not fast... (Lgb)
323 LYXERR(Debug::LYXLEX
, "Comment read: `" << c
<< dummy
<< '\'');
325 // unfortunately ignore is buggy (Lgb)
326 is
.ignore(100, '\n');
337 bool escaped
= false;
342 if (c
== '\r') continue;
344 // escape the next char
347 if (c
== '\"' || c
== '\\')
350 buff
.push_back('\\');
354 if (!escaped
&& c
== '\"')
356 } while (c
!= '\n' && is
);
365 } while (c
!= '\"' && c
!= '\n' && is
);
370 printError("Missing quote");
375 buff
.resize(buff
.size() - 1);
381 continue; /* Skip ','s */
383 // using relational operators with chars other
384 // than == and != is not safe. And if it is done
385 // the type _have_ to be unsigned. It usually a
386 // lot better to use the functions from cctype
391 if (esc
&& c
== '\\') {
392 // escape the next char
400 } while (c
> ' ' && c
!= ',' && is
);
404 if (c
== '\r' && is
) {
405 // The Windows support has lead to the
406 // possibility of "\r\n" at the end of
407 // a line. This will stop LyX choking
408 // when it expected to find a '\n'
420 status
= is
.eof() ? LEX_FEOF
: LEX_UNDEF
;
426 int Lexer::Pimpl::searchKeyword(char const * const tag
) const
428 LexerKeyword search_tag
= { tag
, 0 };
430 lower_bound(table
, table
+ no_items
,
431 search_tag
, CompareTags());
432 // use the compare_ascii_no_case instead of compare_no_case,
433 // because in turkish, 'i' is not the lowercase version of 'I',
434 // and thus turkish locale breaks parsing of tags.
435 if (res
!= table
+ no_items
436 && !compare_ascii_no_case(res
->tag
, tag
))
442 int Lexer::Pimpl::lex()
444 //NOTE: possible bug.
445 if (next() && status
== LEX_TOKEN
)
446 return searchKeyword(getString().c_str());
451 bool Lexer::Pimpl::eatLine()
455 unsigned char c
= '\0';
457 while (is
&& c
!= '\n') {
460 //LYXERR(Debug::LYXLEX, "Lexer::EatLine read char: `" << c << '\'');
467 buff
.resize(buff
.size() - 1);
470 } else if (buff
.length() > 0) { // last line
479 bool Lexer::Pimpl::nextToken()
481 if (!pushTok
.empty()) {
482 // There can have been a whole line pushed so
483 // we extract the first word and leaves the rest
485 if (pushTok
[0] == '\\' && pushTok
.find(' ') != string::npos
) {
487 pushTok
= split(pushTok
, buff
, ' ');
497 while (is
&& !status
) {
502 if (c
>= ' ' && is
) {
505 if (c
== '\\') { // first char == '\\'
510 } while (c
> ' ' && c
!= '\\' && is
);
516 } while (c
>= ' ' && c
!= '\\' && is
);
520 is
.putback(c
); // put it back
531 status
= is
.eof() ? LEX_FEOF
: LEX_UNDEF
;
537 bool Lexer::Pimpl::inputAvailable()
543 void Lexer::Pimpl::pushToken(string
const & pt
)
551 //////////////////////////////////////////////////////////////////////
555 //////////////////////////////////////////////////////////////////////
558 : pimpl_(new Pimpl(0, 0))
562 void Lexer::init(LexerKeyword
* tab
, int num
)
564 pimpl_
= new Pimpl(tab
, num
);
574 bool Lexer::isOK() const
576 return pimpl_
->inputAvailable();
580 void Lexer::setLineNumber(int l
)
586 int Lexer::lineNumber() const
588 return pimpl_
->lineno
;
592 istream
& Lexer::getStream()
598 void Lexer::pushTable(LexerKeyword
* tab
, int num
)
600 pimpl_
->pushTable(tab
, num
);
604 void Lexer::popTable()
610 void Lexer::printTable(ostream
& os
)
612 pimpl_
->printTable(os
);
616 void Lexer::printError(string
const & message
) const
618 pimpl_
->printError(message
);
622 bool Lexer::setFile(FileName
const & filename
)
624 return pimpl_
->setFile(filename
);
628 void Lexer::setStream(istream
& i
)
630 pimpl_
->setStream(i
);
634 void Lexer::setCommentChar(char c
)
636 pimpl_
->setCommentChar(c
);
642 return pimpl_
->lex();
646 int Lexer::getInteger() const
648 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
650 pimpl_
->printError("integer token missing");
654 if (isStrInt(pimpl_
->getString()))
655 return convert
<int>(pimpl_
->getString());
658 pimpl_
->printError("Bad integer `$$Token'");
663 double Lexer::getFloat() const
665 // replace comma with dot in case the file was written with
666 // the wrong locale (should be rare, but is easy enough to
668 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
670 pimpl_
->printError("float token missing");
674 string
const str
= subst(pimpl_
->getString(), ",", ".");
676 return convert
<double>(str
);
679 pimpl_
->printError("Bad float `$$Token'");
684 string
const Lexer::getString() const
686 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
689 return pimpl_
->getString();
695 docstring
const Lexer::getDocString() const
697 lastReadOk_
= pimpl_
->status
== LEX_DATA
|| pimpl_
->status
== LEX_TOKEN
;
700 return pimpl_
->getDocString();
706 // I would prefer to give a tag number instead of an explicit token
707 // here, but it is not possible because Buffer::readDocument uses
708 // explicit tokens (JMarc)
709 string
const Lexer::getLongString(string
const & endtoken
)
713 bool firstline
= true;
715 while (pimpl_
->is
) { //< eatLine only reads from is, not from pushTok
717 // blank line in the file being read
720 string
const token
= trim(getString(), " \t");
722 LYXERR(Debug::PARSER
, "LongString: `" << getString() << '\'');
724 // We do a case independent comparison, like searchKeyword does.
725 if (compare_ascii_no_case(token
, endtoken
) == 0)
728 string tmpstr
= getString();
730 size_t i
= tmpstr
.find_first_not_of(' ');
731 if (i
!= string::npos
)
732 prefix
= tmpstr
.substr(0, i
);
734 LYXERR(Debug::PARSER
, "Prefix = `" << prefix
<< "\'");
737 // further lines in long strings may have the same
738 // whitespace prefix as the first line. Remove it.
739 if (prefix
.length() && prefixIs(tmpstr
, prefix
))
740 tmpstr
.erase(0, prefix
.length() - 1);
742 str
+= ltrim(tmpstr
, "\t") + '\n';
746 printError("Long string not ended by `" + endtoken
+ '\'');
752 bool Lexer::getBool() const
754 string
const s
= pimpl_
->getString();
755 if (s
== "false" || s
== "0") {
759 if (s
== "true" || s
== "1") {
763 pimpl_
->printError("Bad boolean `$$Token'. "
764 "Use \"false\" or \"true\"");
770 bool Lexer::eatLine()
772 return pimpl_
->eatLine();
776 bool Lexer::next(bool esc
)
778 return pimpl_
->next(esc
);
782 bool Lexer::nextToken()
784 return pimpl_
->nextToken();
788 void Lexer::pushToken(string
const & pt
)
790 pimpl_
->pushToken(pt
);
794 Lexer::operator void const *() const
796 // This behaviour is NOT the same as the streams which would
797 // use fail() here. However, our implementation of getString() et al.
798 // can cause the eof() and fail() bits to be set, even though we
799 // haven't tried to read 'em.
800 return lastReadOk_
? this : 0;
804 bool Lexer::operator!() const
810 Lexer
& Lexer::operator>>(string
& s
)
822 Lexer
& Lexer::operator>>(docstring
& s
)
834 Lexer
& Lexer::operator>>(double & s
)
846 Lexer
& Lexer::operator>>(int & s
)
858 Lexer
& Lexer::operator>>(unsigned int & s
)
870 Lexer
& Lexer::operator>>(bool & s
)
882 Lexer
& Lexer::operator>>(char & c
)
892 // quotes a string, e.g. for use in preferences files or as an argument
893 // of the "log" dialog
894 string
Lexer::quoteString(string
const & arg
)
898 res
+= subst(subst(arg
, "\\", "\\\\"), "\"", "\\\"");
904 Lexer
& Lexer::operator>>(char const * required
)
908 if (token
!= required
) {
909 LYXERR0("Missing '" << required
<< "'-tag in " << pimpl_
->context
910 << ". Got " << token
<< " instead. Line: " << lineNumber());
917 bool Lexer::checkFor(char const * required
)
921 if (token
== required
)
928 void Lexer::setContext(std::string
const & str
)
930 pimpl_
->context
= str
;