1 // Scintilla source code edit control
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
19 #include "Scintilla.h"
22 #include "PropSetSimple.h"
24 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "DefaultLexer.h"
30 #include "LexerBase.h"
32 using namespace Scintilla
;
36 struct latexFoldSave
{
37 latexFoldSave() : structLev(0) {
38 for (int i
= 0; i
< 8; ++i
) openBegins
[i
] = 0;
40 latexFoldSave(const latexFoldSave
&save
) : structLev(save
.structLev
) {
41 for (int i
= 0; i
< 8; ++i
) openBegins
[i
] = save
.openBegins
[i
];
44 Sci_Position structLev
;
47 class LexerLaTeX
: public LexerBase
{
50 void setMode(Sci_Position line
, int mode
) {
51 if (line
>= static_cast<Sci_Position
>(modes
.size())) modes
.resize(line
+ 1, 0);
54 int getMode(Sci_Position line
) {
55 if (line
>= 0 && line
< static_cast<Sci_Position
>(modes
.size())) return modes
[line
];
58 void truncModes(Sci_Position numLines
) {
59 if (static_cast<Sci_Position
>(modes
.size()) > numLines
* 2 + 256)
60 modes
.resize(numLines
+ 128);
63 vector
<latexFoldSave
> saves
;
64 void setSave(Sci_Position line
, const latexFoldSave
&save
) {
65 if (line
>= static_cast<Sci_Position
>(saves
.size())) saves
.resize(line
+ 1);
68 void getSave(Sci_Position line
, latexFoldSave
&save
) {
69 if (line
>= 0 && line
< static_cast<Sci_Position
>(saves
.size())) save
= saves
[line
];
72 for (int i
= 0; i
< 8; ++i
) save
.openBegins
[i
] = 0;
75 void truncSaves(Sci_Position numLines
) {
76 if (static_cast<Sci_Position
>(saves
.size()) > numLines
* 2 + 256)
77 saves
.resize(numLines
+ 128);
80 static ILexer4
*LexerFactoryLaTeX() {
81 return new LexerLaTeX();
83 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
84 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
87 static bool latexIsSpecial(int ch
) {
88 return (ch
== '#') || (ch
== '$') || (ch
== '%') || (ch
== '&') || (ch
== '_') ||
89 (ch
== '{') || (ch
== '}') || (ch
== ' ');
92 static bool latexIsBlank(int ch
) {
93 return (ch
== ' ') || (ch
== '\t');
96 static bool latexIsBlankAndNL(int ch
) {
97 return (ch
== ' ') || (ch
== '\t') || (ch
== '\r') || (ch
== '\n');
100 static bool latexIsLetter(int ch
) {
101 return IsASCII(ch
) && isalpha(ch
);
104 static bool latexIsTagValid(Sci_Position
&i
, Sci_Position l
, Accessor
&styler
) {
106 if (styler
.SafeGetCharAt(i
) == '{') {
109 if (styler
.SafeGetCharAt(i
) == '}') {
111 } else if (!latexIsLetter(styler
.SafeGetCharAt(i
)) &&
112 styler
.SafeGetCharAt(i
)!='*') {
116 } else if (!latexIsBlank(styler
.SafeGetCharAt(i
))) {
124 static bool latexNextNotBlankIs(Sci_Position i
, Accessor
&styler
, char needle
) {
126 while (i
< styler
.Length()) {
127 ch
= styler
.SafeGetCharAt(i
);
128 if (!latexIsBlankAndNL(ch
) && ch
!= '*') {
139 static bool latexLastWordIs(Sci_Position start
, Accessor
&styler
, const char *needle
) {
141 Sci_PositionU l
= static_cast<Sci_PositionU
>(strlen(needle
));
142 Sci_Position ini
= start
-l
+1;
145 while (i
< l
&& i
< 31) {
146 s
[i
] = styler
.SafeGetCharAt(ini
+ i
);
151 return (strcmp(s
, needle
) == 0);
154 static bool latexLastWordIsMathEnv(Sci_Position pos
, Accessor
&styler
) {
157 const char *mathEnvs
[] = { "align", "alignat", "flalign", "gather",
158 "multiline", "displaymath", "eqnarray", "equation" };
159 if (styler
.SafeGetCharAt(pos
) != '}') return false;
160 for (i
= pos
- 1; i
>= 0; --i
) {
161 if (styler
.SafeGetCharAt(i
) == '{') break;
162 if (pos
- i
>= 20) return false;
164 if (i
< 0 || i
== pos
- 1) return false;
166 for (j
= 0; i
+ j
< pos
; ++j
)
167 s
[j
] = styler
.SafeGetCharAt(i
+ j
);
169 if (j
== 0) return false;
170 if (s
[j
- 1] == '*') s
[--j
] = '\0';
171 for (i
= 0; i
< static_cast<int>(sizeof(mathEnvs
) / sizeof(const char *)); ++i
)
172 if (strcmp(s
, mathEnvs
[i
]) == 0) return true;
176 static inline void latexStateReset(int &mode
, int &state
) {
178 case 1: state
= SCE_L_MATH
; break;
179 case 2: state
= SCE_L_MATH2
; break;
180 default: state
= SCE_L_DEFAULT
; break;
184 // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
185 // But I think it's already good enough.
186 void SCI_METHOD
LexerLaTeX::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
187 // startPos is assumed to be the first character of a line
188 Accessor
styler(pAccess
, &props
);
189 styler
.StartAt(startPos
);
190 int mode
= getMode(styler
.GetLine(startPos
) - 1);
191 int state
= initStyle
;
192 if (state
== SCE_L_ERROR
|| state
== SCE_L_SHORTCMD
|| state
== SCE_L_SPECIAL
) // should not happen
193 latexStateReset(mode
, state
);
195 char chNext
= styler
.SafeGetCharAt(startPos
);
196 char chVerbatimDelim
= '\0';
197 styler
.StartSegment(startPos
);
198 Sci_Position lengthDoc
= startPos
+ length
;
200 for (Sci_Position i
= startPos
; i
< lengthDoc
; i
++) {
202 chNext
= styler
.SafeGetCharAt(i
+ 1);
204 if (styler
.IsLeadByte(ch
)) {
206 chNext
= styler
.SafeGetCharAt(i
+ 1);
210 if (ch
== '\r' || ch
== '\n')
211 setMode(styler
.GetLine(i
), mode
);
217 styler
.ColourTo(i
- 1, state
);
218 if (latexIsLetter(chNext
)) {
219 state
= SCE_L_COMMAND
;
220 } else if (latexIsSpecial(chNext
)) {
221 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
223 chNext
= styler
.SafeGetCharAt(i
+ 1);
224 } else if (chNext
== '\r' || chNext
== '\n') {
225 styler
.ColourTo(i
, SCE_L_ERROR
);
226 } else if (IsASCII(chNext
)) {
227 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
231 } else if (chNext
== '[') {
236 chNext
= styler
.SafeGetCharAt(i
+ 1);
240 styler
.ColourTo(i
- 1, state
);
242 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
246 chNext
= styler
.SafeGetCharAt(i
+ 1);
248 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
254 styler
.ColourTo(i
- 1, state
);
255 state
= SCE_L_COMMENT
;
259 // These 3 will never be reached.
265 if (!latexIsLetter(chNext
)) {
266 styler
.ColourTo(i
, state
);
267 if (latexNextNotBlankIs(i
+ 1, styler
, '[' )) {
268 state
= SCE_L_CMDOPT
;
269 } else if (latexLastWordIs(i
, styler
, "\\begin")) {
271 } else if (latexLastWordIs(i
, styler
, "\\end")) {
273 } else if (latexLastWordIs(i
, styler
, "\\verb") && chNext
!= '*' && chNext
!= ' ') {
274 chVerbatimDelim
= chNext
;
275 state
= SCE_L_VERBATIM
;
277 latexStateReset(mode
, state
);
283 styler
.ColourTo(i
, state
);
284 latexStateReset(mode
, state
);
288 if (latexIsTagValid(i
, lengthDoc
, styler
)) {
289 styler
.ColourTo(i
, state
);
290 latexStateReset(mode
, state
);
291 if (latexLastWordIs(i
, styler
, "{verbatim}")) {
292 state
= SCE_L_VERBATIM
;
293 } else if (latexLastWordIs(i
, styler
, "{comment}")) {
294 state
= SCE_L_COMMENT2
;
295 } else if (latexLastWordIs(i
, styler
, "{math}") && mode
== 0) {
298 } else if (latexLastWordIsMathEnv(i
, styler
) && mode
== 0) {
303 styler
.ColourTo(i
, SCE_L_ERROR
);
304 latexStateReset(mode
, state
);
305 ch
= styler
.SafeGetCharAt(i
);
306 if (ch
== '\r' || ch
== '\n') setMode(styler
.GetLine(i
), mode
);
308 chNext
= styler
.SafeGetCharAt(i
+1);
311 if (latexIsTagValid(i
, lengthDoc
, styler
)) {
312 styler
.ColourTo(i
, state
);
313 latexStateReset(mode
, state
);
315 styler
.ColourTo(i
, SCE_L_ERROR
);
316 latexStateReset(mode
, state
);
317 ch
= styler
.SafeGetCharAt(i
);
318 if (ch
== '\r' || ch
== '\n') setMode(styler
.GetLine(i
), mode
);
320 chNext
= styler
.SafeGetCharAt(i
+1);
325 styler
.ColourTo(i
- 1, state
);
326 if (latexIsLetter(chNext
)) {
327 Sci_Position match
= i
+ 3;
328 if (latexLastWordIs(match
, styler
, "\\end")) {
330 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
331 if (latexLastWordIs(match
, styler
, "{math}"))
335 state
= SCE_L_COMMAND
;
336 } else if (latexIsSpecial(chNext
)) {
337 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
339 chNext
= styler
.SafeGetCharAt(i
+ 1);
340 } else if (chNext
== '\r' || chNext
== '\n') {
341 styler
.ColourTo(i
, SCE_L_ERROR
);
342 } else if (IsASCII(chNext
)) {
345 state
= SCE_L_DEFAULT
;
347 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
349 chNext
= styler
.SafeGetCharAt(i
+ 1);
353 styler
.ColourTo(i
- 1, state
);
354 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
356 state
= SCE_L_DEFAULT
;
359 styler
.ColourTo(i
- 1, state
);
360 state
= SCE_L_COMMENT
;
367 styler
.ColourTo(i
- 1, state
);
368 if (latexIsLetter(chNext
)) {
369 Sci_Position match
= i
+ 3;
370 if (latexLastWordIs(match
, styler
, "\\end")) {
372 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
373 if (latexLastWordIsMathEnv(match
, styler
))
377 state
= SCE_L_COMMAND
;
378 } else if (latexIsSpecial(chNext
)) {
379 styler
.ColourTo(i
+ 1, SCE_L_SPECIAL
);
381 chNext
= styler
.SafeGetCharAt(i
+ 1);
382 } else if (chNext
== '\r' || chNext
== '\n') {
383 styler
.ColourTo(i
, SCE_L_ERROR
);
384 } else if (IsASCII(chNext
)) {
387 state
= SCE_L_DEFAULT
;
389 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
391 chNext
= styler
.SafeGetCharAt(i
+ 1);
395 styler
.ColourTo(i
- 1, state
);
397 styler
.ColourTo(i
+ 1, SCE_L_SHORTCMD
);
399 chNext
= styler
.SafeGetCharAt(i
+ 1);
401 state
= SCE_L_DEFAULT
;
402 } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
403 styler
.ColourTo(i
, SCE_L_SHORTCMD
);
407 styler
.ColourTo(i
- 1, state
);
408 state
= SCE_L_COMMENT
;
413 if (ch
== '\r' || ch
== '\n') {
414 styler
.ColourTo(i
- 1, state
);
415 latexStateReset(mode
, state
);
418 case SCE_L_COMMENT2
:
420 Sci_Position match
= i
+ 3;
421 if (latexLastWordIs(match
, styler
, "\\end")) {
423 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
424 if (latexLastWordIs(match
, styler
, "{comment}")) {
425 styler
.ColourTo(i
- 1, state
);
426 state
= SCE_L_COMMAND
;
432 case SCE_L_VERBATIM
:
434 Sci_Position match
= i
+ 3;
435 if (latexLastWordIs(match
, styler
, "\\end")) {
437 if (latexIsTagValid(match
, lengthDoc
, styler
)) {
438 if (latexLastWordIs(match
, styler
, "{verbatim}")) {
439 styler
.ColourTo(i
- 1, state
);
440 state
= SCE_L_COMMAND
;
444 } else if (chNext
== chVerbatimDelim
) {
445 styler
.ColourTo(i
+ 1, state
);
446 latexStateReset(mode
, state
);
447 chVerbatimDelim
= '\0';
449 chNext
= styler
.SafeGetCharAt(i
+ 1);
450 } else if (chVerbatimDelim
!= '\0' && (ch
== '\n' || ch
== '\r')) {
451 styler
.ColourTo(i
, SCE_L_ERROR
);
452 latexStateReset(mode
, state
);
453 chVerbatimDelim
= '\0';
458 if (lengthDoc
== styler
.Length()) truncModes(styler
.GetLine(lengthDoc
- 1));
459 styler
.ColourTo(lengthDoc
- 1, state
);
463 static int latexFoldSaveToInt(const latexFoldSave
&save
) {
465 for (int i
= 0; i
<= save
.structLev
; ++i
)
466 sum
+= save
.openBegins
[i
];
467 return ((sum
+ save
.structLev
+ SC_FOLDLEVELBASE
) & SC_FOLDLEVELNUMBERMASK
);
470 // Change folding state while processing a line
471 // Return the level before the first relevant command
472 void SCI_METHOD
LexerLaTeX::Fold(Sci_PositionU startPos
, Sci_Position length
, int, IDocument
*pAccess
) {
473 const char *structWords
[7] = {"part", "chapter", "section", "subsection",
474 "subsubsection", "paragraph", "subparagraph"};
475 Accessor
styler(pAccess
, &props
);
476 Sci_PositionU endPos
= startPos
+ length
;
477 Sci_Position curLine
= styler
.GetLine(startPos
);
479 getSave(curLine
- 1, save
);
484 bool needFold
= false;
485 for (i
= static_cast<Sci_Position
>(startPos
); i
< static_cast<Sci_Position
>(endPos
); ++i
) {
486 ch
= styler
.SafeGetCharAt(i
);
487 if (ch
== '\r' || ch
== '\n') break;
488 if (ch
!= '\\' || styler
.StyleAt(i
) != SCE_L_COMMAND
) continue;
489 for (j
= 0; j
< 15 && i
+ 1 < static_cast<Sci_Position
>(endPos
); ++j
, ++i
) {
490 buf
[j
] = styler
.SafeGetCharAt(i
+ 1);
491 if (!latexIsLetter(buf
[j
])) break;
494 if (strcmp(buf
, "begin") == 0) {
495 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
496 ++save
.openBegins
[save
.structLev
];
499 else if (strcmp(buf
, "end") == 0) {
500 while (save
.structLev
> 0 && save
.openBegins
[save
.structLev
] == 0)
502 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
503 if (save
.openBegins
[save
.structLev
] > 0) --save
.openBegins
[save
.structLev
];
506 for (j
= 0; j
< 7; ++j
)
507 if (strcmp(buf
, structWords
[j
]) == 0) break;
508 if (j
>= 7) continue;
509 save
.structLev
= j
; // level before the command
510 for (j
= save
.structLev
+ 1; j
< 8; ++j
) {
511 save
.openBegins
[save
.structLev
] += save
.openBegins
[j
];
512 save
.openBegins
[j
] = 0;
514 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
515 ++save
.structLev
; // level after the command
519 if (lev
< 0) lev
= latexFoldSaveToInt(save
);
520 if (needFold
) lev
|= SC_FOLDLEVELHEADERFLAG
;
521 styler
.SetLevel(curLine
, lev
);
522 setSave(curLine
, save
);
524 startPos
= styler
.LineStart(curLine
);
525 if (static_cast<Sci_Position
>(startPos
) == styler
.Length()) {
526 lev
= latexFoldSaveToInt(save
);
527 styler
.SetLevel(curLine
, lev
);
528 setSave(curLine
, save
);
531 } while (startPos
< endPos
);
535 static const char *const emptyWordListDesc
[] = {
539 LexerModule
lmLatex(SCLEX_LATEX
, LexerLaTeX::LexerFactoryLaTeX
, "latex", emptyWordListDesc
);