1 /******************************************************************
4 * A simple Markdown lexer for scintilla.
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
23 * The highlighting aims to accurately reflect correct syntax,
24 * but a few restrictions are relaxed. Delimited code blocks are
25 * highlighted, even if the line following the code block is not blank.
26 * Requiring a blank line after a block, breaks the highlighting
27 * in certain cases, because of the way Scintilla ends up calling
30 * Written by Jon Strait - jstrait@moonloop.net
32 * The License.txt file describes the conditions under which this
33 * software may be distributed.
35 *****************************************************************/
44 #include "Scintilla.h"
48 #include "LexAccessor.h"
50 #include "StyleContext.h"
51 #include "CharacterSet.h"
52 #include "LexerModule.h"
55 using namespace Scintilla
;
58 static inline bool IsNewline(const int ch
) {
59 return (ch
== '\n' || ch
== '\r');
62 // True if can follow ch down to the end with possibly trailing whitespace
63 static bool FollowToLineEnd(const int ch
, const int state
, const unsigned int endPos
, StyleContext
&sc
) {
65 while (sc
.GetRelative(++i
) == ch
)
67 // Skip over whitespace
68 while (IsASpaceOrTab(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
70 if (IsNewline(sc
.GetRelative(i
)) || sc
.currentPos
+ i
== endPos
) {
72 sc
.ChangeState(state
);
73 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
79 // Set the state on text section from current to length characters,
80 // then set the rest until the newline to default, except for any characters matching token
81 static void SetStateAndZoom(const int state
, const int length
, const int token
, StyleContext
&sc
) {
84 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
87 while (sc
.More() && !IsNewline(sc
.ch
)) {
88 if (sc
.ch
== token
&& !started
) {
92 else if (sc
.ch
!= token
) {
93 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
98 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
101 // Does the previous line have more than spaces and tabs?
102 static bool HasPrevLineContent(StyleContext
&sc
) {
104 // Go back to the previous newline
105 while ((--i
+ (int)sc
.currentPos
) >= 0 && !IsNewline(sc
.GetRelative(i
)))
107 while ((--i
+ (int)sc
.currentPos
) >= 0) {
108 if (IsNewline(sc
.GetRelative(i
)))
110 if (!IsASpaceOrTab(sc
.GetRelative(i
)))
116 static bool IsValidHrule(const unsigned int endPos
, StyleContext
&sc
) {
120 c
= sc
.GetRelative(i
);
123 // hit a terminating character
124 else if (!IsASpaceOrTab(c
) || sc
.currentPos
+ i
== endPos
) {
125 // Are we a valid HRULE
126 if ((IsNewline(c
) || sc
.currentPos
+ i
== endPos
) &&
127 count
>= 3 && !HasPrevLineContent(sc
)) {
128 sc
.SetState(SCE_MARKDOWN_HRULE
);
130 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
134 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
142 static void ColorizeMarkdownDoc(unsigned int startPos
, int length
, int initStyle
,
143 WordList
**, Accessor
&styler
) {
144 unsigned int endPos
= startPos
+ length
;
145 int precharCount
= 0;
146 // Don't advance on a new loop iteration and retry at the same position.
147 // Useful in the corner case of having to start at the beginning file position
148 // in the default state.
149 bool freezeCursor
= false;
151 StyleContext
sc(startPos
, length
, initStyle
, styler
);
154 // Skip past escaped characters
160 // A blockquotes resets the line semantics
161 if (sc
.state
== SCE_MARKDOWN_BLOCKQUOTE
)
162 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
164 // Conditional state-based actions
165 if (sc
.state
== SCE_MARKDOWN_CODE2
) {
166 if (sc
.Match("``") && sc
.GetRelative(-2) != ' ') {
168 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
171 else if (sc
.state
== SCE_MARKDOWN_CODE
) {
172 if (sc
.ch
== '`' && sc
.chPrev
!= ' ')
173 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
175 /* De-activated because it gets in the way of other valid indentation
176 * schemes, for example multiple paragraphs inside a list item.
178 else if (sc.state == SCE_MARKDOWN_CODEBK) {
180 if (IsNewline(sc.ch)) {
181 if (sc.chNext != '\t') {
182 for (int c = 1; c < 5; ++c) {
183 if (sc.GetRelative(c) != ' ')
188 else if (sc.atLineStart) {
189 if (sc.ch != '\t' ) {
190 for (int i = 0; i < 4; ++i) {
191 if (sc.GetRelative(i) != ' ')
197 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
201 else if (sc
.state
== SCE_MARKDOWN_STRONG1
) {
202 if (sc
.Match("**") && sc
.chPrev
!= ' ') {
204 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
207 else if (sc
.state
== SCE_MARKDOWN_STRONG2
) {
208 if (sc
.Match("__") && sc
.chPrev
!= ' ') {
210 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
214 else if (sc
.state
== SCE_MARKDOWN_EM1
) {
215 if (sc
.ch
== '*' && sc
.chPrev
!= ' ')
216 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
218 else if (sc
.state
== SCE_MARKDOWN_EM2
) {
219 if (sc
.ch
== '_' && sc
.chPrev
!= ' ')
220 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
222 else if (sc
.state
== SCE_MARKDOWN_CODEBK
) {
223 if (sc
.atLineStart
&& sc
.Match("~~~")) {
225 while (!IsNewline(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
228 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
231 else if (sc
.state
== SCE_MARKDOWN_STRIKEOUT
) {
232 if (sc
.Match("~~") && sc
.chPrev
!= ' ') {
234 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
237 else if (sc
.state
== SCE_MARKDOWN_LINE_BEGIN
) {
239 if (sc
.Match("######"))
240 SetStateAndZoom(SCE_MARKDOWN_HEADER6
, 6, '#', sc
);
241 else if (sc
.Match("#####"))
242 SetStateAndZoom(SCE_MARKDOWN_HEADER5
, 5, '#', sc
);
243 else if (sc
.Match("####"))
244 SetStateAndZoom(SCE_MARKDOWN_HEADER4
, 4, '#', sc
);
245 else if (sc
.Match("###"))
246 SetStateAndZoom(SCE_MARKDOWN_HEADER3
, 3, '#', sc
);
247 else if (sc
.Match("##"))
248 SetStateAndZoom(SCE_MARKDOWN_HEADER2
, 2, '#', sc
);
249 else if (sc
.Match("#")) {
250 // Catch the special case of an unordered list
251 if (sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
253 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
256 SetStateAndZoom(SCE_MARKDOWN_HEADER1
, 1, '#', sc
);
259 else if (sc
.Match("~~~")) {
260 if (!HasPrevLineContent(sc
))
261 sc
.SetState(SCE_MARKDOWN_CODEBK
);
263 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
265 else if (sc
.ch
== '=') {
266 if (HasPrevLineContent(sc
) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1
, endPos
, sc
))
269 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
271 else if (sc
.ch
== '-') {
272 if (HasPrevLineContent(sc
) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2
, endPos
, sc
))
276 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
279 else if (IsNewline(sc
.ch
))
280 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
283 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
287 // The header lasts until the newline
288 else if (sc
.state
== SCE_MARKDOWN_HEADER1
|| sc
.state
== SCE_MARKDOWN_HEADER2
||
289 sc
.state
== SCE_MARKDOWN_HEADER3
|| sc
.state
== SCE_MARKDOWN_HEADER4
||
290 sc
.state
== SCE_MARKDOWN_HEADER5
|| sc
.state
== SCE_MARKDOWN_HEADER6
) {
291 if (IsNewline(sc
.ch
))
292 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
295 // New state only within the initial whitespace
296 if (sc
.state
== SCE_MARKDOWN_PRECHAR
) {
298 if (sc
.ch
== '>' && precharCount
< 5)
299 sc
.SetState(SCE_MARKDOWN_BLOCKQUOTE
);
301 // Begin of code block
302 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
303 sc.SetState(SCE_MARKDOWN_CODEBK);
305 // HRule - Total of three or more hyphens, asterisks, or underscores
306 // on a line by themselves
307 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '_') && IsValidHrule(endPos
, sc
))
310 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '+') && IsASpaceOrTab(sc
.chNext
)) {
311 sc
.SetState(SCE_MARKDOWN_ULIST_ITEM
);
312 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
315 else if (IsADigit(sc
.ch
)) {
317 while (IsADigit(sc
.GetRelative(++digitCount
)))
319 if (sc
.GetRelative(digitCount
) == '.' &&
320 IsASpaceOrTab(sc
.GetRelative(digitCount
+ 1))) {
321 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
322 sc
.Forward(digitCount
+ 1);
323 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
326 // Alternate Ordered list
327 else if (sc
.ch
== '#' && sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
328 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
330 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
332 else if (sc
.ch
!= ' ' || precharCount
> 2)
333 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
338 // New state anywhere in doc
339 if (sc
.state
== SCE_MARKDOWN_DEFAULT
) {
340 if (sc
.atLineStart
&& sc
.ch
== '#') {
341 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
345 if (sc
.Match("![") || sc
.ch
== '[') {
346 int i
= 0, j
= 0, k
= 0;
347 int len
= endPos
- sc
.currentPos
;
348 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
350 if (sc
.GetRelative(i
) == ']') {
352 if (sc
.GetRelative(++i
) == '(') {
353 while (i
< len
&& (sc
.GetRelative(++i
) != ')' || sc
.GetRelative(i
- 1) == '\\'))
355 if (sc
.GetRelative(i
) == ')')
358 else if (sc
.GetRelative(i
) == '[' || sc
.GetRelative(++i
) == '[') {
359 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
361 if (sc
.GetRelative(i
) == ']')
365 // At least a link text
367 sc
.SetState(SCE_MARKDOWN_LINK
);
369 // Also has a URL or reference portion
372 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
375 // Code - also a special case for alternate inside spacing
376 if (sc
.Match("``") && sc
.GetRelative(3) != ' ') {
377 sc
.SetState(SCE_MARKDOWN_CODE2
);
380 else if (sc
.ch
== '`' && sc
.chNext
!= ' ') {
381 sc
.SetState(SCE_MARKDOWN_CODE
);
384 else if (sc
.Match("**") && sc
.GetRelative(2) != ' ') {
385 sc
.SetState(SCE_MARKDOWN_STRONG1
);
388 else if (sc
.Match("__") && sc
.GetRelative(2) != ' ') {
389 sc
.SetState(SCE_MARKDOWN_STRONG2
);
393 else if (sc
.ch
== '*' && sc
.chNext
!= ' ')
394 sc
.SetState(SCE_MARKDOWN_EM1
);
395 else if (sc
.ch
== '_' && sc
.chNext
!= ' ')
396 sc
.SetState(SCE_MARKDOWN_EM2
);
398 else if (sc
.Match("~~") && sc
.GetRelative(2) != ' ') {
399 sc
.SetState(SCE_MARKDOWN_STRIKEOUT
);
403 else if (IsNewline(sc
.ch
))
404 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
406 // Advance if not holding back the cursor for this iteration.
409 freezeCursor
= false;
414 LexerModule
lmMarkdown(SCLEX_MARKDOWN
, ColorizeMarkdownDoc
, "markdown");