1 // Scintilla source code edit control
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
19 #include "Scintilla.h"
22 #include "PropSetSimple.h"
24 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "LexerBase.h"
32 using namespace Scintilla
;
37 struct latexFoldSave
{
38 latexFoldSave() : structLev(0) {
39 for (int i
= 0; i
< 8; ++i
) openBegins
[i
] = 0;
41 latexFoldSave(const latexFoldSave
&save
) : structLev(save
.structLev
) {
42 for (int i
= 0; i
< 8; ++i
) openBegins
[i
] = save
.openBegins
[i
];
48 class LexerLaTeX
: public LexerBase
{
51 void setMode(int line
, int mode
) {
52 if (line
>= static_cast<int>(modes
.size())) modes
.resize(line
+ 1, 0);
55 int getMode(int line
) {
56 if (line
>= 0 && line
< static_cast<int>(modes
.size())) return modes
[line
];
59 void truncModes(int numLines
) {
60 if (static_cast<int>(modes
.size()) > numLines
* 2 + 256)
61 modes
.resize(numLines
+ 128);
64 vector
<latexFoldSave
> saves
;
65 void setSave(int line
, const latexFoldSave
&save
) {
66 if (line
>= static_cast<int>(saves
.size())) saves
.resize(line
+ 1);
69 void getSave(int line
, latexFoldSave
&save
) {
70 if (line
>= 0 && line
< static_cast<int>(saves
.size())) save
= saves
[line
];
73 for (int i
= 0; i
< 8; ++i
) save
.openBegins
[i
] = 0;
76 void truncSaves(int numLines
) {
77 if (static_cast<int>(saves
.size()) > numLines
* 2 + 256)
78 saves
.resize(numLines
+ 128);
81 static ILexer
*LexerFactoryLaTeX() {
82 return new LexerLaTeX();
84 void SCI_METHOD
Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
85 void SCI_METHOD
Fold(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
);
88 static bool latexIsSpecial(int ch
) {
89 return (ch
== '#') || (ch
== '$') || (ch
== '%') || (ch
== '&') || (ch
== '_') ||
90 (ch
== '{') || (ch
== '}') || (ch
== ' ');
93 static bool latexIsBlank(int ch
) {
94 return (ch
== ' ') || (ch
== '\t');
97 static bool latexIsBlankAndNL(int ch
) {
98 return (ch
== ' ') || (ch
== '\t') || (ch
== '\r') || (ch
== '\n');
101 static bool latexIsLetter(int ch
) {
102 return IsASCII(ch
) && isalpha(ch
);
105 static bool latexIsTagValid(int &i
, int l
, Accessor
&styler
) {
107 if (styler
.SafeGetCharAt(i
) == '{') {
110 if (styler
.SafeGetCharAt(i
) == '}') {
112 } else if (!latexIsLetter(styler
.SafeGetCharAt(i
)) &&
113 styler
.SafeGetCharAt(i
)!='*') {
117 } else if (!latexIsBlank(styler
.SafeGetCharAt(i
))) {
125 static bool latexNextNotBlankIs(int i
, Accessor
&styler
, char needle
) {
127 while (i
< styler
.Length()) {
128 ch
= styler
.SafeGetCharAt(i
);
129 if (!latexIsBlankAndNL(ch
) && ch
!= '*') {
140 static bool latexLastWordIs(int start
, Accessor
&styler
, const char *needle
) {
142 unsigned int l
= static_cast<unsigned int>(strlen(needle
));
146 while (i
< l
&& i
< 31) {
147 s
[i
] = styler
.SafeGetCharAt(ini
+ i
);
152 return (strcmp(s
, needle
) == 0);
155 static bool latexLastWordIsMathEnv(int pos
, Accessor
&styler
) {
158 const char *mathEnvs
[] = { "align", "alignat", "flalign", "gather",
159 "multiline", "displaymath", "eqnarray", "equation" };
160 if (styler
.SafeGetCharAt(pos
) != '}') return false;
161 for (i
= pos
- 1; i
>= 0; --i
) {
162 if (styler
.SafeGetCharAt(i
) == '{') break;
163 if (pos
- i
>= 20) return false;
165 if (i
< 0 || i
== pos
- 1) return false;
167 for (j
= 0; i
+ j
< pos
; ++j
)
168 s
[j
] = styler
.SafeGetCharAt(i
+ j
);
170 if (j
== 0) return false;
171 if (s
[j
- 1] == '*') s
[--j
] = '\0';
172 for (i
= 0; i
< static_cast<int>(sizeof(mathEnvs
) / sizeof(const char *)); ++i
)
173 if (strcmp(s
, mathEnvs
[i
]) == 0) return true;
177 static inline void latexStateReset(int &mode
, int &state
) {
179 case 1: state
= SCE_L_MATH
; break;
180 case 2: state
= SCE_L_MATH2
; break;
181 default: state
= SCE_L_DEFAULT
; break;
185 // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
186 // But I think it's already good enough.
187 void SCI_METHOD
LexerLaTeX::Lex(unsigned int startPos
, int length
, int initStyle
, IDocument
*pAccess
) {
188 // startPos is assumed to be the first character of a line
189 Accessor
styler(pAccess
, &props
);
190 styler
.StartAt(startPos
);
191 int mode
= getMode(styler
.GetLine(startPos
) - 1);
192 int state
= initStyle
;
193 if (state
== SCE_L_ERROR
|| state
== SCE_L_SHORTCMD
|| state
== SCE_L_SPECIAL
) // should not happen
194 latexStateReset(mode
, state
);
196 char chNext
= styler
.SafeGetCharAt(startPos
);
197 char chVerbatimDelim
= '\0';
198 styler
.StartSegment(startPos
);
199 int lengthDoc
= startPos
+ length
;
201 for (int i
= startPos
; i
< lengthDoc
; i
++) {
203 chNext
= styler
.SafeGetCharAt(i
+ 1);
205 if (styler
.IsLeadByte(ch
)) {
207 chNext
= styler
.SafeGetCharAt(i
+ 1);
211 if (ch
== '\r' || ch
== '\n')
212 setMode(styler
.GetLine(i
), mode
);
218 styler
.ColourTo(i
- 1, state
);
219 if (latexIsLetter(chNext
)) {
220 state
= SCE_L_COMMAND
;
221 } else if (latexIsSpecial(chNext
)) {
222 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
224 chNext
= styler
.SafeGetCharAt(i
+ 1);
225 } else if (chNext
== '\r' || chNext
== '\n') {
226 styler
.ColourTo(i
, SCE_L_ERROR
);
227 } else if (IsASCII(chNext
)) {
228 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
232 } else if (chNext
== '[') {
237 chNext
= styler
.SafeGetCharAt(i
+ 1);
241 styler
.ColourTo(i
- 1, state
);
243 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
247 chNext
= styler
.SafeGetCharAt(i
+ 1);
249 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
255 styler
.ColourTo(i
- 1, state
);
256 state
= SCE_L_COMMENT
;
260 // These 3 will never be reached.
266 if (!latexIsLetter(chNext
)) {
267 styler
.ColourTo(i
, state
);
268 if (latexNextNotBlankIs(i
+ 1, styler
, '[' )) {
269 state
= SCE_L_CMDOPT
;
270 } else if (latexLastWordIs(i
, styler
, "\\begin")) {
272 } else if (latexLastWordIs(i
, styler
, "\\end")) {
274 } else if (latexLastWordIs(i
, styler
, "\\verb") && chNext
!= '*' && chNext
!= ' ') {
275 chVerbatimDelim
= chNext
;
276 state
= SCE_L_VERBATIM
;
278 latexStateReset(mode
, state
);
284 styler
.ColourTo(i
, state
);
285 latexStateReset(mode
, state
);
289 if (latexIsTagValid(i
, lengthDoc
, styler
)) {
290 styler
.ColourTo(i
, state
);
291 latexStateReset(mode
, state
);
292 if (latexLastWordIs(i
, styler
, "{verbatim}")) {
293 state
= SCE_L_VERBATIM
;
294 } else if (latexLastWordIs(i
, styler
, "{comment}")) {
295 state
= SCE_L_COMMENT2
;
296 } else if (latexLastWordIs(i
, styler
, "{math}") && mode
== 0) {
299 } else if (latexLastWordIsMathEnv(i
, styler
) && mode
== 0) {
304 styler
.ColourTo(i
, SCE_L_ERROR
);
305 latexStateReset(mode
, state
);
306 ch
= styler
.SafeGetCharAt(i
);
307 if (ch
== '\r' || ch
== '\n') setMode(styler
.GetLine(i
), mode
);
309 chNext
= styler
.SafeGetCharAt(i
+1);
312 if (latexIsTagValid(i
, lengthDoc
, styler
)) {
313 styler
.ColourTo(i
, state
);
314 latexStateReset(mode
, state
);
316 styler
.ColourTo(i
, SCE_L_ERROR
);
317 latexStateReset(mode
, state
);
318 ch
= styler
.SafeGetCharAt(i
);
319 if (ch
== '\r' || ch
== '\n') setMode(styler
.GetLine(i
), mode
);
321 chNext
= styler
.SafeGetCharAt(i
+1);
326 styler
.ColourTo(i
- 1, state
);
327 if (latexIsLetter(chNext
)) {
329 if (latexLastWordIs(match
, styler
, "\\end")) {
331 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
332 if (latexLastWordIs(match
, styler
, "{math}"))
336 state
= SCE_L_COMMAND
;
337 } else if (latexIsSpecial(chNext
)) {
338 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
340 chNext
= styler
.SafeGetCharAt(i
+ 1);
341 } else if (chNext
== '\r' || chNext
== '\n') {
342 styler
.ColourTo(i
, SCE_L_ERROR
);
343 } else if (IsASCII(chNext
)) {
346 state
= SCE_L_DEFAULT
;
348 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
350 chNext
= styler
.SafeGetCharAt(i
+ 1);
354 styler
.ColourTo(i
- 1, state
);
355 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
357 state
= SCE_L_DEFAULT
;
360 styler
.ColourTo(i
- 1, state
);
361 state
= SCE_L_COMMENT
;
368 styler
.ColourTo(i
- 1, state
);
369 if (latexIsLetter(chNext
)) {
371 if (latexLastWordIs(match
, styler
, "\\end")) {
373 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
374 if (latexLastWordIsMathEnv(match
, styler
))
378 state
= SCE_L_COMMAND
;
379 } else if (latexIsSpecial(chNext
)) {
380 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
382 chNext
= styler
.SafeGetCharAt(i
+ 1);
383 } else if (chNext
== '\r' || chNext
== '\n') {
384 styler
.ColourTo(i
, SCE_L_ERROR
);
385 } else if (IsASCII(chNext
)) {
388 state
= SCE_L_DEFAULT
;
390 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
392 chNext
= styler
.SafeGetCharAt(i
+ 1);
396 styler
.ColourTo(i
- 1, state
);
398 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
400 chNext
= styler
.SafeGetCharAt(i
+ 1);
402 state
= SCE_L_DEFAULT
;
403 } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
404 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
408 styler
.ColourTo(i
- 1, state
);
409 state
= SCE_L_COMMENT
;
414 if (ch
== '\r' || ch
== '\n') {
415 styler
.ColourTo(i
- 1, state
);
416 latexStateReset(mode
, state
);
419 case SCE_L_COMMENT2
:
422 if (latexLastWordIs(match
, styler
, "\\end")) {
424 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
425 if (latexLastWordIs(match
, styler
, "{comment}")) {
426 styler
.ColourTo(i
- 1, state
);
427 state
= SCE_L_COMMAND
;
433 case SCE_L_VERBATIM
:
436 if (latexLastWordIs(match
, styler
, "\\end")) {
438 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
439 if (latexLastWordIs(match
, styler
, "{verbatim}")) {
440 styler
.ColourTo(i
- 1, state
);
441 state
= SCE_L_COMMAND
;
445 } else if (chNext
== chVerbatimDelim
) {
446 styler
.ColourTo(i
+ 1, state
);
447 latexStateReset(mode
, state
);
448 chVerbatimDelim
= '\0';
450 chNext
= styler
.SafeGetCharAt(i
+ 1);
451 } else if (chVerbatimDelim
!= '\0' && (ch
== '\n' || ch
== '\r')) {
452 styler
.ColourTo(i
, SCE_L_ERROR
);
453 latexStateReset(mode
, state
);
454 chVerbatimDelim
= '\0';
459 if (lengthDoc
== styler
.Length()) truncModes(styler
.GetLine(lengthDoc
- 1));
460 styler
.ColourTo(lengthDoc
- 1, state
);
464 static int latexFoldSaveToInt(const latexFoldSave
&save
) {
466 for (int i
= 0; i
<= save
.structLev
; ++i
)
467 sum
+= save
.openBegins
[i
];
468 return ((sum
+ save
.structLev
+ SC_FOLDLEVELBASE
) & SC_FOLDLEVELNUMBERMASK
);
471 // Change folding state while processing a line
472 // Return the level before the first relevant command
473 void SCI_METHOD
LexerLaTeX::Fold(unsigned int startPos
, int length
, int, IDocument
*pAccess
) {
474 const char *structWords
[7] = {"part", "chapter", "section", "subsection",
475 "subsubsection", "paragraph", "subparagraph"};
476 Accessor
styler(pAccess
, &props
);
477 unsigned int endPos
= startPos
+ length
;
478 int curLine
= styler
.GetLine(startPos
);
480 getSave(curLine
- 1, save
);
484 bool needFold
= false;
485 for (i
= static_cast<int>(startPos
); i
< static_cast<int>(endPos
); ++i
) {
486 ch
= styler
.SafeGetCharAt(i
);
487 if (ch
== '\r' || ch
== '\n') break;
488 if (ch
!= '\\' || styler
.StyleAt(i
) != SCE_L_COMMAND
) continue;
489 for (j
= 0; j
< 15 && i
+ 1 < static_cast<int>(endPos
); ++j
, ++i
) {
490 buf
[j
] = styler
.SafeGetCharAt(i
+ 1);
491 if (!latexIsLetter(buf
[j
])) break;
494 if (strcmp(buf
, "begin") == 0) {
495 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
496 ++save
.openBegins
[save
.structLev
];
499 else if (strcmp(buf
, "end") == 0) {
500 while (save
.structLev
> 0 && save
.openBegins
[save
.structLev
] == 0)
502 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
503 if (save
.openBegins
[save
.structLev
] > 0) --save
.openBegins
[save
.structLev
];
506 for (j
= 0; j
< 7; ++j
)
507 if (strcmp(buf
, structWords
[j
]) == 0) break;
508 if (j
>= 7) continue;
509 save
.structLev
= j
; // level before the command
510 for (j
= save
.structLev
+ 1; j
< 8; ++j
) {
511 save
.openBegins
[save
.structLev
] += save
.openBegins
[j
];
512 save
.openBegins
[j
] = 0;
514 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
515 ++save
.structLev
; // level after the command
519 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
520 if (needFold
) lev
|= SC_FOLDLEVELHEADERFLAG
;
521 styler
.SetLevel(curLine
, lev
);
522 setSave(curLine
, save
);
524 startPos
= styler
.LineStart(curLine
);
525 if (static_cast<int>(startPos
) == styler
.Length()) {
526 lev
= latexFoldSaveToInt(save
);
527 styler
.SetLevel(curLine
, lev
);
528 setSave(curLine
, save
);
531 } while (startPos
< endPos
);
535 static const char *const emptyWordListDesc
[] = {
539 LexerModule
lmLatex(SCLEX_LATEX
, LexerLaTeX::LexerFactoryLaTeX
, "latex", emptyWordListDesc
);