1 /******************************************************************
4 * A simple Markdown lexer for scintilla.
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
21 * highlighting; something Scintilla isn't really architected
24 * The highlighting aims to accurately reflect correct syntax,
25 * but a few restrictions are relaxed. Delimited code blocks are
26 * highlighted, even if the line following the code block is not blank.
27 * Requiring a blank line after a block, breaks the highlighting
28 * in certain cases, because of the way Scintilla ends up calling
31 * Written by Jon Strait - jstrait@moonloop.net
33 * This source code is released for free distribution under the
34 * terms of the GNU General Public License.
36 *****************************************************************/
48 #include "StyleContext.h"
50 #include "Scintilla.h"
54 using namespace Scintilla
;
57 static inline bool IsNewline(const int ch
) {
58 return (ch
== '\n' || ch
== '\r');
61 // True if can follow ch down to the end with possibly trailing whitespace
62 static bool FollowToLineEnd(const int ch
, const int state
, const int endPos
, StyleContext
&sc
) {
64 while (sc
.GetRelative(++i
) == ch
)
66 // Skip over whitespace
67 while (IsASpaceOrTab(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
69 if (IsNewline(sc
.GetRelative(i
)) || sc
.currentPos
+ i
== endPos
) {
71 sc
.ChangeState(state
);
72 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
78 // Set the state on text section from current to length characters,
79 // then set the rest until the newline to default, except for any characters matching token
80 static void SetStateAndZoom(const int state
, const int length
, const int token
, StyleContext
&sc
) {
84 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
86 while (sc
.More() && !IsNewline(sc
.ch
)) {
88 if (sc
.ch
== token
&& !started
) {
92 else if (sc
.ch
!= token
) {
93 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
98 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
101 // Does the previous line have more than spaces and tabs?
102 static bool HasPrevLineContent(StyleContext
&sc
) {
104 // Go back to the previous newline
105 while ((--i
+ sc
.currentPos
) && !IsNewline(sc
.GetRelative(i
)))
107 while (--i
+ sc
.currentPos
) {
108 if (IsNewline(sc
.GetRelative(i
)))
110 if (!IsASpaceOrTab(sc
.GetRelative(i
)))
116 static bool IsValidHrule(const int endPos
, StyleContext
&sc
) {
117 int c
, i
= 0, count
= 1;
119 c
= sc
.GetRelative(i
);
122 // hit a terminating character
123 else if (!IsASpaceOrTab(c
) || sc
.currentPos
+ i
== endPos
) {
124 // Are we a valid HRULE
125 if ((IsNewline(c
) || sc
.currentPos
+ i
== endPos
) &&
126 count
>= 3 && !HasPrevLineContent(sc
)) {
127 sc
.SetState(SCE_MARKDOWN_HRULE
);
129 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
133 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
140 // Only consume if already valid. Doesn't work for delimiting multiple lines.
141 static void ConsumeEnd(const int state
, const int origPos
, const int endPos
,
142 const char *token
, StyleContext
&sc
) {
144 while (sc
.currentPos
+ 1 < endPos
) {
146 if (sc
.Match(token
) && sc
.chPrev
!= '\\' && sc
.chPrev
!= ' ') {
147 targetPos
= sc
.currentPos
+ strlen(token
);
148 sc
.currentPos
= origPos
;
150 sc
.Forward(targetPos
- origPos
);
151 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
157 static void ColorizeMarkdownDoc(unsigned int startPos
, int length
, int initStyle
,
158 WordList
*keywordlists
[], Accessor
&styler
) {
161 int endPos
= startPos
+ length
;
163 // Don't advance on a new loop iteration and retry at the same position.
164 // Useful in the corner case of having to start at the beginning file position
165 // in the default state.
166 bool freezeCursor
= false;
168 StyleContext
sc(startPos
, length
, initStyle
, styler
);
171 // Skip past escaped characters
177 // A blockquotes resets the line semantics
178 if (sc
.state
== SCE_MARKDOWN_BLOCKQUOTE
)
179 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
181 // Conditional state-based actions
182 if (sc
.state
== SCE_MARKDOWN_CODE2
) {
183 if (sc
.Match("``") && sc
.GetRelative(-2) != ' ') {
185 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
188 else if (sc
.state
== SCE_MARKDOWN_CODE
) {
189 if (sc
.ch
== '`' && sc
.chPrev
!= ' ')
190 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
192 /* De-activated because it gets in the way of other valid indentation
193 * schemes, for example multiple paragraphs inside a list item.
195 else if (sc.state == SCE_MARKDOWN_CODEBK) {
197 if (IsNewline(sc.ch)) {
198 if (sc.chNext != '\t') {
199 for (int c = 1; c < 5; ++c) {
200 if (sc.GetRelative(c) != ' ')
205 else if (sc.atLineStart) {
206 if (sc.ch != '\t' ) {
207 for (int i = 0; i < 4; ++i) {
208 if (sc.GetRelative(i) != ' ')
214 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
218 else if (sc
.state
== SCE_MARKDOWN_STRONG1
) {
219 if (sc
.Match("**") && sc
.chPrev
!= ' ') {
221 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
224 else if (sc
.state
== SCE_MARKDOWN_STRONG2
) {
225 if (sc
.Match("__") && sc
.chPrev
!= ' ') {
227 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
231 else if (sc
.state
== SCE_MARKDOWN_EM1
) {
232 if (sc
.ch
== '*' && sc
.chPrev
!= ' ')
233 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
235 else if (sc
.state
== SCE_MARKDOWN_EM2
) {
236 if (sc
.ch
== '_' && sc
.chPrev
!= ' ')
237 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
239 else if (sc
.state
== SCE_MARKDOWN_CODEBK
) {
240 if (sc
.atLineStart
&& sc
.Match("~~~")) {
242 while (!IsNewline(sc
.GetRelative(i
)) && sc
.currentPos
+ i
< endPos
)
245 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
248 else if (sc
.state
== SCE_MARKDOWN_STRIKEOUT
) {
249 if (sc
.Match("~~") && sc
.chPrev
!= ' ') {
251 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
254 else if (sc
.state
== SCE_MARKDOWN_LINE_BEGIN
) {
256 if (sc
.Match("######"))
257 SetStateAndZoom(SCE_MARKDOWN_HEADER6
, 6, '#', sc
);
258 else if (sc
.Match("#####"))
259 SetStateAndZoom(SCE_MARKDOWN_HEADER5
, 5, '#', sc
);
260 else if (sc
.Match("####"))
261 SetStateAndZoom(SCE_MARKDOWN_HEADER4
, 4, '#', sc
);
262 else if (sc
.Match("###"))
263 SetStateAndZoom(SCE_MARKDOWN_HEADER3
, 3, '#', sc
);
264 else if (sc
.Match("##"))
265 SetStateAndZoom(SCE_MARKDOWN_HEADER2
, 2, '#', sc
);
266 else if (sc
.Match("#")) {
267 // Catch the special case of an unordered list
268 if (sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
270 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
273 SetStateAndZoom(SCE_MARKDOWN_HEADER1
, 1, '#', sc
);
276 else if (sc
.Match("~~~")) {
277 if (!HasPrevLineContent(sc
))
278 sc
.SetState(SCE_MARKDOWN_CODEBK
);
280 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
282 else if (sc
.ch
== '=') {
283 if (HasPrevLineContent(sc
) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1
, endPos
, sc
))
286 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
288 else if (sc
.ch
== '-') {
289 if (HasPrevLineContent(sc
) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2
, endPos
, sc
))
293 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
296 else if (IsNewline(sc
.ch
))
297 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
300 sc
.SetState(SCE_MARKDOWN_PRECHAR
);
304 // The header lasts until the newline
305 else if (sc
.state
== SCE_MARKDOWN_HEADER1
|| sc
.state
== SCE_MARKDOWN_HEADER2
||
306 sc
.state
== SCE_MARKDOWN_HEADER3
|| sc
.state
== SCE_MARKDOWN_HEADER4
||
307 sc
.state
== SCE_MARKDOWN_HEADER5
|| sc
.state
== SCE_MARKDOWN_HEADER6
) {
308 if (IsNewline(sc
.ch
))
309 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
312 // New state only within the initial whitespace
313 if (sc
.state
== SCE_MARKDOWN_PRECHAR
) {
315 if (sc
.ch
== '>' && precharCount
< 5)
316 sc
.SetState(SCE_MARKDOWN_BLOCKQUOTE
);
318 // Begin of code block
319 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
320 sc.SetState(SCE_MARKDOWN_CODEBK);
322 // HRule - Total of three or more hyphens, asterisks, or underscores
323 // on a line by themselves
324 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '_') && IsValidHrule(endPos
, sc
))
327 else if ((sc
.ch
== '-' || sc
.ch
== '*' || sc
.ch
== '+') && IsASpaceOrTab(sc
.chNext
)) {
328 sc
.SetState(SCE_MARKDOWN_ULIST_ITEM
);
329 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
332 else if (IsADigit(sc
.ch
)) {
334 while (IsADigit(sc
.GetRelative(++digitCount
)))
336 if (sc
.GetRelative(digitCount
) == '.' &&
337 IsASpaceOrTab(sc
.GetRelative(digitCount
+ 1))) {
338 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
339 sc
.Forward(digitCount
+ 1);
340 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
343 // Alternate Ordered list
344 else if (sc
.ch
== '#' && sc
.chNext
== '.' && IsASpaceOrTab(sc
.GetRelative(2))) {
345 sc
.SetState(SCE_MARKDOWN_OLIST_ITEM
);
347 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
349 else if (sc
.ch
!= ' ' || precharCount
> 2)
350 sc
.SetState(SCE_MARKDOWN_DEFAULT
);
355 // New state anywhere in doc
356 if (sc
.state
== SCE_MARKDOWN_DEFAULT
) {
357 int origPos
= sc
.currentPos
;
358 if (sc
.atLineStart
&& sc
.ch
== '#') {
359 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
363 if (sc
.Match("![") || sc
.ch
== '[') {
364 int i
= 0, j
= 0, k
= 0;
365 int len
= endPos
- sc
.currentPos
;
366 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
368 if (sc
.GetRelative(i
) == ']') {
370 if (sc
.GetRelative(++i
) == '(') {
371 while (i
< len
&& (sc
.GetRelative(++i
) != ')' || sc
.GetRelative(i
- 1) == '\\'))
373 if (sc
.GetRelative(i
) == ')')
376 else if (sc
.GetRelative(i
) == '[' || sc
.GetRelative(++i
) == '[') {
377 while (i
< len
&& (sc
.GetRelative(++i
) != ']' || sc
.GetRelative(i
- 1) == '\\'))
379 if (sc
.GetRelative(i
) == ']')
383 // At least a link text
385 sc
.SetState(SCE_MARKDOWN_LINK
);
387 // Also has a URL or reference portion
390 sc
.ForwardSetState(SCE_MARKDOWN_DEFAULT
);
393 // Code - also a special case for alternate inside spacing
394 if (sc
.Match("``") && sc
.GetRelative(3) != ' ') {
395 sc
.SetState(SCE_MARKDOWN_CODE2
);
398 else if (sc
.ch
== '`' && sc
.chNext
!= ' ') {
399 sc
.SetState(SCE_MARKDOWN_CODE
);
402 else if (sc
.Match("**") && sc
.GetRelative(2) != ' ') {
403 sc
.SetState(SCE_MARKDOWN_STRONG1
);
406 else if (sc
.Match("__") && sc
.GetRelative(2) != ' ') {
407 sc
.SetState(SCE_MARKDOWN_STRONG2
);
411 else if (sc
.ch
== '*' && sc
.chNext
!= ' ')
412 sc
.SetState(SCE_MARKDOWN_EM1
);
413 else if (sc
.ch
== '_' && sc
.chNext
!= ' ')
414 sc
.SetState(SCE_MARKDOWN_EM2
);
416 else if (sc
.Match("~~") && sc
.GetRelative(2) != ' ') {
417 sc
.SetState(SCE_MARKDOWN_STRIKEOUT
);
421 else if (IsNewline(sc
.ch
))
422 sc
.SetState(SCE_MARKDOWN_LINE_BEGIN
);
424 // Advance if not holding back the cursor for this iteration.
427 freezeCursor
= false;
432 LexerModule
lmMarkdown(SCLEX_MARKDOWN
, ColorizeMarkdownDoc
, "markdown");