1 /******************************************************************
4 * A simple Markdown lexer for scintilla.
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
23 * The highlighting aims to accurately reflect correct syntax,
24 * but a few restrictions are relaxed. Delimited code blocks are
25 * highlighted, even if the line following the code block is not blank.
26 * Requiring a blank line after a block, breaks the highlighting
27 * in certain cases, because of the way Scintilla ends up calling
30 * Written by Jon Strait - jstrait@moonloop.net
32 * The License.txt file describes the conditions under which this
33 * software may be distributed.
35 *****************************************************************/
44 #include "Scintilla.h"
48 #include "LexAccessor.h"
50 #include "StyleContext.h"
51 #include "CharacterSet.h"
52 #include "LexerModule.h"
55 using namespace Scintilla
;
58 static inline bool IsNewline(const int ch
) {
59 return (ch
== '\n' || ch
== '\r');
62 // True if can follow ch down to the end with possibly trailing whitespace
63 static bool FollowToLineEnd(const int ch
, const int state
, const unsigned int endPos
, StyleContext
&sc
) {
65 while (sc
.GetRelative(++i
) == ch
)
67 // Skip over whitespace
68 while (IsASpaceOrTab(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
70 if (IsNewline(sc
.GetRelative(i
)) || sc
.currentPos
+ i
== endPos
) {
72 sc
.ChangeState(state
);
73 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
79 // Set the state on text section from current to length characters,
80 // then set the rest until the newline to default, except for any characters matching token
81 static void SetStateAndZoom(const int state
, const int length
, const int token
, StyleContext
&sc
) {
84 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
87 while (sc
.More() && !IsNewline(sc
.ch
)) {
88 if (sc
.ch
== token
&& !started
) {
92 else if (sc
.ch
!= token
) {
93 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
98 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
101 // Does the previous line have more than spaces and tabs?
102 static bool HasPrevLineContent(StyleContext
&sc
) {
104 // Go back to the previous newline
105 while ((--i
+ (int)sc
.currentPos
) >= 0 && !IsNewline(sc
.GetRelative(i
)))
107 while ((--i
+ (int)sc
.currentPos
) >= 0) {
108 if (IsNewline(sc
.GetRelative(i
)))
110 if (!IsASpaceOrTab(sc
.GetRelative(i
)))
116 static bool AtTermStart(StyleContext
&sc
) {
117 return sc
.currentPos
== 0 || isspacechar(sc
.chPrev
);
120 static bool IsValidHrule(const unsigned int endPos
, StyleContext
&sc
) {
125 int c
= sc
.GetRelative(i
);
128 // hit a terminating character
129 else if (!IsASpaceOrTab(c
) || sc
.currentPos
+ i
== endPos
) {
130 // Are we a valid HRULE
131 if ((IsNewline(c
) || sc
.currentPos
+ i
== endPos
) &&
132 count
>= 3 && !HasPrevLineContent(sc
)) {
133 sc
.SetState(SCE_MARKDOWN_HRULE
);
135 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
139 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
146 static void ColorizeMarkdownDoc(unsigned int startPos
, int length
, int initStyle
,
147 WordList
**, Accessor
&styler
) {
148 unsigned int endPos
= startPos
+ length
;
149 int precharCount
= 0;
150 // Don't advance on a new loop iteration and retry at the same position.
151 // Useful in the corner case of having to start at the beginning file position
152 // in the default state.
153 bool freezeCursor
= false;
155 StyleContext
sc(startPos
, length
, initStyle
, styler
);
158 // Skip past escaped characters
164 // A blockquotes resets the line semantics
165 if (sc
.state
== SCE_MARKDOWN_BLOCKQUOTE
)
166 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
168 // Conditional state-based actions
169 if (sc
.state
== SCE_MARKDOWN_CODE2
) {
170 if (sc
.Match("``") && sc
.GetRelative(-2) != ' ') {
172 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
175 else if (sc
.state
== SCE_MARKDOWN_CODE
) {
176 if (sc
.ch
== '`' && sc
.chPrev
!= ' ')
177 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
179 /* De-activated because it gets in the way of other valid indentation
180 * schemes, for example multiple paragraphs inside a list item.
182 else if (sc.state == SCE_MARKDOWN_CODEBK) {
184 if (IsNewline(sc.ch)) {
185 if (sc.chNext != '\t') {
186 for (int c = 1; c < 5; ++c) {
187 if (sc.GetRelative(c) != ' ')
192 else if (sc.atLineStart) {
193 if (sc.ch != '\t' ) {
194 for (int i = 0; i < 4; ++i) {
195 if (sc.GetRelative(i) != ' ')
201 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
205 else if (sc
.state
== SCE_MARKDOWN_STRONG1
) {
206 if (sc
.Match("**") && sc
.chPrev
!= ' ') {
208 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
211 else if (sc
.state
== SCE_MARKDOWN_STRONG2
) {
212 if (sc
.Match("__") && sc
.chPrev
!= ' ') {
214 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
218 else if (sc
.state
== SCE_MARKDOWN_EM1
) {
219 if (sc
.ch
== '*' && sc
.chPrev
!= ' ')
220 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
222 else if (sc
.state
== SCE_MARKDOWN_EM2
) {
223 if (sc
.ch
== '_' && sc
.chPrev
!= ' ')
224 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
226 else if (sc
.state
== SCE_MARKDOWN_CODEBK
) {
227 if (sc
.atLineStart
&& sc
.Match("~~~")) {
229 while (!IsNewline(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
232 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
235 else if (sc
.state
== SCE_MARKDOWN_STRIKEOUT
) {
236 if (sc
.Match("~~") && sc
.chPrev
!= ' ') {
238 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
241 else if (sc
.state
== SCE_MARKDOWN_LINE_BEGIN
) {
243 if (sc
.Match("######"))
244 SetStateAndZoom(SCE_MARKDOWN_HEADER6
, 6, '#', sc
);
245 else if (sc
.Match("#####"))
246 SetStateAndZoom(SCE_MARKDOWN_HEADER5
, 5, '#', sc
);
247 else if (sc
.Match("####"))
248 SetStateAndZoom(SCE_MARKDOWN_HEADER4
, 4, '#', sc
);
249 else if (sc
.Match("###"))
250 SetStateAndZoom(SCE_MARKDOWN_HEADER3
, 3, '#', sc
);
251 else if (sc
.Match("##"))
252 SetStateAndZoom(SCE_MARKDOWN_HEADER2
, 2, '#', sc
);
253 else if (sc
.Match("#")) {
254 // Catch the special case of an unordered list
255 if (sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
257 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
260 SetStateAndZoom(SCE_MARKDOWN_HEADER1
, 1, '#', sc
);
263 else if (sc
.Match("~~~")) {
264 if (!HasPrevLineContent(sc
))
265 sc
.SetState(SCE_MARKDOWN_CODEBK
);
267 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
269 else if (sc
.ch
== '=') {
270 if (HasPrevLineContent(sc
) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1
, endPos
, sc
))
273 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
275 else if (sc
.ch
== '-') {
276 if (HasPrevLineContent(sc
) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2
, endPos
, sc
))
280 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
283 else if (IsNewline(sc
.ch
))
284 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
287 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
291 // The header lasts until the newline
292 else if (sc
.state
== SCE_MARKDOWN_HEADER1
|| sc
.state
== SCE_MARKDOWN_HEADER2
||
293 sc
.state
== SCE_MARKDOWN_HEADER3
|| sc
.state
== SCE_MARKDOWN_HEADER4
||
294 sc
.state
== SCE_MARKDOWN_HEADER5
|| sc
.state
== SCE_MARKDOWN_HEADER6
) {
295 if (IsNewline(sc
.ch
))
296 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
299 // New state only within the initial whitespace
300 if (sc
.state
== SCE_MARKDOWN_PRECHAR
) {
302 if (sc
.ch
== '>' && precharCount
< 5)
303 sc
.SetState(SCE_MARKDOWN_BLOCKQUOTE
);
305 // Begin of code block
306 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
307 sc.SetState(SCE_MARKDOWN_CODEBK);
309 // HRule - Total of three or more hyphens, asterisks, or underscores
310 // on a line by themselves
311 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '_') && IsValidHrule(endPos
, sc
))
314 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '+') && IsASpaceOrTab(sc
.chNext
)) {
315 sc
.SetState(SCE_MARKDOWN_ULIST_ITEM
);
316 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
319 else if (IsADigit(sc
.ch
)) {
321 while (IsADigit(sc
.GetRelative(++digitCount
)))
323 if (sc
.GetRelative(digitCount
) == '.' &&
324 IsASpaceOrTab(sc
.GetRelative(digitCount
+ 1))) {
325 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
326 sc
.Forward(digitCount
+ 1);
327 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
330 // Alternate Ordered list
331 else if (sc
.ch
== '#' && sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
332 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
334 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
336 else if (sc
.ch
!= ' ' || precharCount
> 2)
337 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
342 // New state anywhere in doc
343 if (sc
.state
== SCE_MARKDOWN_DEFAULT
) {
344 if (sc
.atLineStart
&& sc
.ch
== '#') {
345 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
349 if (sc
.Match("![") || sc
.ch
== '[') {
350 int i
= 0, j
= 0, k
= 0;
351 int len
= endPos
- sc
.currentPos
;
352 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
354 if (sc
.GetRelative(i
) == ']') {
356 if (sc
.GetRelative(++i
) == '(') {
357 while (i
< len
&& (sc
.GetRelative(++i
) != ')' || sc
.GetRelative(i
- 1) == '\\'))
359 if (sc
.GetRelative(i
) == ')')
362 else if (sc
.GetRelative(i
) == '[' || sc
.GetRelative(++i
) == '[') {
363 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
365 if (sc
.GetRelative(i
) == ']')
369 // At least a link text
371 sc
.SetState(SCE_MARKDOWN_LINK
);
373 // Also has a URL or reference portion
376 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
379 // Code - also a special case for alternate inside spacing
380 if (sc
.Match("``") && sc
.GetRelative(3) != ' ' && AtTermStart(sc
)) {
381 sc
.SetState(SCE_MARKDOWN_CODE2
);
384 else if (sc
.ch
== '`' && sc
.chNext
!= ' ' && AtTermStart(sc
)) {
385 sc
.SetState(SCE_MARKDOWN_CODE
);
388 else if (sc
.Match("**") && sc
.GetRelative(2) != ' ' && AtTermStart(sc
)) {
389 sc
.SetState(SCE_MARKDOWN_STRONG1
);
392 else if (sc
.Match("__") && sc
.GetRelative(2) != ' ' && AtTermStart(sc
)) {
393 sc
.SetState(SCE_MARKDOWN_STRONG2
);
397 else if (sc
.ch
== '*' && sc
.chNext
!= ' ' && AtTermStart(sc
)) {
398 sc
.SetState(SCE_MARKDOWN_EM1
);
400 else if (sc
.ch
== '_' && sc
.chNext
!= ' ' && AtTermStart(sc
)) {
401 sc
.SetState(SCE_MARKDOWN_EM2
);
404 else if (sc
.Match("~~") && sc
.GetRelative(2) != ' ' && AtTermStart(sc
)) {
405 sc
.SetState(SCE_MARKDOWN_STRIKEOUT
);
409 else if (IsNewline(sc
.ch
)) {
410 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
413 // Advance if not holding back the cursor for this iteration.
416 freezeCursor
= false;
421 LexerModule
lmMarkdown(SCLEX_MARKDOWN
, ColorizeMarkdownDoc
, "markdown");