3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * Full author contact details are available in file CREDITS.
24 CatCode theCatcode
[256];
28 static bool init_done
= false;
33 fill(theCatcode
, theCatcode
+ 256, catOther
);
34 fill(theCatcode
+ 'a', theCatcode
+ 'z' + 1, catLetter
);
35 fill(theCatcode
+ 'A', theCatcode
+ 'Z' + 1, catLetter
);
37 theCatcode
[int('\\')] = catEscape
;
38 theCatcode
[int('{')] = catBegin
;
39 theCatcode
[int('}')] = catEnd
;
40 theCatcode
[int('$')] = catMath
;
41 theCatcode
[int('&')] = catAlign
;
42 theCatcode
[int('\n')] = catNewline
;
43 theCatcode
[int('#')] = catParameter
;
44 theCatcode
[int('^')] = catSuper
;
45 theCatcode
[int('_')] = catSub
;
46 theCatcode
[0x7f] = catIgnore
;
47 theCatcode
[int(' ')] = catSpace
;
48 theCatcode
[int('\t')] = catSpace
;
49 theCatcode
[int('\r')] = catNewline
;
50 theCatcode
[int('~')] = catActive
;
51 theCatcode
[int('%')] = catComment
;
54 theCatcode
[int('@')] = catLetter
;
58 * Translate a line ending to '\n'.
59 * \p c must have catcode catNewline, and it must be the last character read
62 char_type
getNewline(idocstream
& is
, char_type c
)
64 // we have to handle 3 different line endings:
71 if (is
.get(wc
) && wc
!= '\n') {
81 CatCode
catcode(char_type c
)
84 return theCatcode
[(unsigned char)c
];
95 ostream
& operator<<(ostream
& os
, Token
const & t
)
97 if (t
.cat() == catComment
)
98 os
<< '%' << t
.cs() << '\n';
99 else if (t
.cat() == catSpace
)
101 else if (t
.cat() == catEscape
)
102 os
<< '\\' << t
.cs() << ' ';
103 else if (t
.cat() == catLetter
)
105 else if (t
.cat() == catNewline
)
106 os
<< "[" << t
.cs().size() << "\\n," << t
.cat() << "]\n";
108 os
<< '[' << t
.cs() << ',' << t
.cat() << ']';
113 string
Token::asString() const
119 string
Token::asInput() const
121 if (cat_
== catComment
)
122 return '%' + cs_
+ '\n';
123 if (cat_
== catEscape
)
134 Parser::Parser(idocstream
& is
)
135 : lineno_(0), pos_(0), iss_(0), is_(is
), encoding_latex_("utf8")
140 Parser::Parser(string
const & s
)
141 : lineno_(0), pos_(0),
142 iss_(new idocstringstream(from_utf8(s
))), is_(*iss_
),
143 encoding_latex_("utf8")
154 void Parser::setEncoding(std::string
const & e
)
156 Encoding
const * enc
= encodings
.fromLaTeXName(e
);
158 cerr
<< "Unknown encoding " << e
<< ". Ignoring." << std::endl
;
161 //cerr << "setting encoding to " << enc->iconvName() << std::endl;
162 is_
<< lyx::setEncoding(enc
->iconvName());
167 void Parser::push_back(Token
const & t
)
169 tokens_
.push_back(t
);
173 // We return a copy here because the tokens_ vector may get reallocated
174 Token
const Parser::prev_token() const
176 static const Token dummy
;
177 return pos_
> 1 ? tokens_
[pos_
- 2] : dummy
;
181 // We return a copy here because the tokens_ vector may get reallocated
182 Token
const Parser::curr_token() const
184 static const Token dummy
;
185 return pos_
> 0 ? tokens_
[pos_
- 1] : dummy
;
189 // We return a copy here because the tokens_ vector may get reallocated
190 Token
const Parser::next_token()
192 static const Token dummy
;
193 return good() ? tokens_
[pos_
] : dummy
;
197 // We return a copy here because the tokens_ vector may get reallocated
198 Token
const Parser::get_token()
200 static const Token dummy
;
201 //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
202 return good() ? tokens_
[pos_
++] : dummy
;
206 bool Parser::isParagraph()
208 // A new paragraph in TeX ist started
209 // - either by a newline, following any amount of whitespace
210 // characters (including zero), and another newline
211 // - or the token \par
212 if (curr_token().cat() == catNewline
&&
213 (curr_token().cs().size() > 1 ||
214 (next_token().cat() == catSpace
&&
215 pos_
< tokens_
.size() - 1 &&
216 tokens_
[pos_
+ 1].cat() == catNewline
)))
218 if (curr_token().cat() == catEscape
&& curr_token().cs() == "par")
224 void Parser::skip_spaces(bool skip_comments
)
226 // We just silently return if we have no more tokens.
227 // skip_spaces() should be callable at any time,
228 // the caller must check p::good() anyway.
235 if ( curr_token().cat() == catSpace
||
236 curr_token().cat() == catNewline
||
237 (curr_token().cat() == catComment
&& curr_token().cs().empty()))
239 if (skip_comments
&& curr_token().cat() == catComment
)
240 cerr
<< " Ignoring comment: " << curr_token().asInput();
249 void Parser::unskip_spaces(bool skip_comments
)
252 if ( curr_token().cat() == catSpace
||
253 (curr_token().cat() == catNewline
&& curr_token().cs().size() == 1))
255 else if (skip_comments
&& curr_token().cat() == catComment
) {
256 // TODO: Get rid of this
257 cerr
<< "Unignoring comment: " << curr_token().asInput();
266 void Parser::putback()
274 if (pos_
< tokens_
.size())
277 return pos_
< tokens_
.size();
281 char Parser::getChar()
284 error("The input stream is not well...");
285 return get_token().character();
289 Parser::Arg
Parser::getFullArg(char left
, char right
)
293 // This is needed if a partial file ends with a command without arguments,
296 return make_pair(false, string());
303 return make_pair(false, string());
305 while ((c
= getChar()) != right
&& good()) {
307 if (curr_token().cat() == catComment
) {
308 if (!curr_token().cs().empty())
309 cerr
<< "Ignoring comment: " << curr_token().asInput();
312 result
+= curr_token().asInput();
315 return make_pair(true, result
);
319 string
Parser::getArg(char left
, char right
)
321 return getFullArg(left
, right
).second
;
325 string
Parser::getFullOpt()
327 Arg arg
= getFullArg('[', ']');
329 return '[' + arg
.second
+ ']';
334 string
Parser::getOpt()
336 string
const res
= getArg('[', ']');
337 return res
.empty() ? string() : '[' + res
+ ']';
341 string
Parser::getOptContent()
342 // the same as getOpt but without the brackets
344 string
const res
= getArg('[', ']');
345 return res
.empty() ? string() : res
;
349 string
Parser::getFullParentheseArg()
351 Arg arg
= getFullArg('(', ')');
353 return '(' + arg
.second
+ ')';
358 string
const Parser::verbatimEnvironment(string
const & name
)
364 for (Token t
= get_token(); good(); t
= get_token()) {
365 if (t
.cat() == catBegin
) {
367 os
<< '{' << verbatim_item() << '}';
368 } else if (t
.asInput() == "\\begin") {
369 string
const env
= getArg('{', '}');
370 os
<< "\\begin{" << env
<< '}'
371 << verbatimEnvironment(env
)
372 << "\\end{" << env
<< '}';
373 } else if (t
.asInput() == "\\end") {
374 string
const end
= getArg('{', '}');
376 cerr
<< "\\end{" << end
377 << "} does not match \\begin{" << name
383 cerr
<< "unexpected end of input" << endl
;
388 void Parser::tokenize_one()
395 switch (catcode(c
)) {
398 while (is_
.get(c
) && catcode(c
) == catSpace
)
400 if (catcode(c
) != catSpace
)
402 push_back(Token(s
, catSpace
));
408 docstring
s(1, getNewline(is_
, c
));
409 while (is_
.get(c
) && catcode(c
) == catNewline
) {
411 s
+= getNewline(is_
, c
);
413 if (catcode(c
) != catNewline
)
415 push_back(Token(s
, catNewline
));
420 // We don't treat "%\n" combinations here specially because
421 // we want to preserve them in the preamble
423 while (is_
.get(c
) && catcode(c
) != catNewline
)
425 // handle possible DOS line ending
426 if (catcode(c
) == catNewline
)
427 c
= getNewline(is_
, c
);
428 // Note: The '%' at the beginning and the '\n' at the end
429 // of the comment are not stored.
431 push_back(Token(s
, catComment
));
438 error("unexpected end of input");
441 if (catcode(c
) == catLetter
) {
443 while (is_
.get(c
) && catcode(c
) == catLetter
)
445 if (catcode(c
) != catLetter
)
448 push_back(Token(s
, catEscape
));
454 cerr
<< "ignoring a char: " << c
<< "\n";
459 push_back(Token(docstring(1, c
), catcode(c
)));
461 //cerr << tokens_.back();
465 void Parser::dump() const
467 cerr
<< "\nTokens: ";
468 for (unsigned i
= 0; i
< tokens_
.size(); ++i
) {
473 cerr
<< " pos: " << pos_
<< "\n";
477 void Parser::error(string
const & msg
)
479 cerr
<< "Line ~" << lineno_
<< ": parse error: " << msg
<< endl
;
485 string
Parser::verbatimOption()
488 if (next_token().character() == '[') {
489 Token t
= get_token();
490 for (t
= get_token(); t
.character() != ']' && good(); t
= get_token()) {
491 if (t
.cat() == catBegin
) {
493 res
+= '{' + verbatim_item() + '}';
502 string
Parser::verbatim_item()
507 if (next_token().cat() == catBegin
) {
508 Token t
= get_token(); // skip brace
510 for (Token t
= get_token(); t
.cat() != catEnd
&& good(); t
= get_token()) {
511 if (t
.cat() == catBegin
) {
513 res
+= '{' + verbatim_item() + '}';
520 return get_token().asInput();
530 void Parser::setCatCode(char c
, CatCode cat
)
532 theCatcode
[(unsigned char)c
] = cat
;
536 CatCode
Parser::getCatCode(char c
) const
538 return theCatcode
[(unsigned char)c
];