scintilla: Update scintilla with changeset 3662:1d1c06df8a2f using gtk+3
[anjuta-extras.git] / plugins / scintilla / scintilla / LexMarkdown.cxx
blob393712033c6626be9489e5baf4dca482ec0ff10f
1 /******************************************************************
2 * LexMarkdown.cxx
4 * A simple Markdown lexer for scintilla.
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
10 * Limitations:
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
21 * highlighting.
23 * The highlighting aims to accurately reflect correct syntax,
24 * but a few restrictions are relaxed. Delimited code blocks are
25 * highlighted, even if the line following the code block is not blank.
26 * Requiring a blank line after a block, breaks the highlighting
27 * in certain cases, because of the way Scintilla ends up calling
28 * the lexer.
30 * Written by Jon Strait - jstrait@moonloop.net
32 * The License.txt file describes the conditions under which this
33 * software may be distributed.
35 *****************************************************************/
37 #include <stdlib.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdarg.h>
41 #include <assert.h>
43 #include "ILexer.h"
44 #include "Scintilla.h"
45 #include "SciLexer.h"
47 #include "WordList.h"
48 #include "LexAccessor.h"
49 #include "Accessor.h"
50 #include "StyleContext.h"
51 #include "CharacterSet.h"
52 #include "LexerModule.h"
54 #ifdef SCI_NAMESPACE
55 using namespace Scintilla;
56 #endif
58 static inline bool IsNewline(const int ch) {
59 return (ch == '\n' || ch == '\r');
62 // True if can follow ch down to the end with possibly trailing whitespace
63 static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) {
64 unsigned int i = 0;
65 while (sc.GetRelative(++i) == ch)
67 // Skip over whitespace
68 while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
69 ++i;
70 if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
71 sc.Forward(i);
72 sc.ChangeState(state);
73 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
74 return true;
76 else return false;
79 // Set the state on text section from current to length characters,
80 // then set the rest until the newline to default, except for any characters matching token
81 static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) {
82 sc.SetState(state);
83 sc.Forward(length);
84 sc.SetState(SCE_MARKDOWN_DEFAULT);
85 sc.Forward();
86 bool started = false;
87 while (sc.More() && !IsNewline(sc.ch)) {
88 if (sc.ch == token && !started) {
89 sc.SetState(state);
90 started = true;
92 else if (sc.ch != token) {
93 sc.SetState(SCE_MARKDOWN_DEFAULT);
94 started = false;
96 sc.Forward();
98 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
101 // Does the previous line have more than spaces and tabs?
102 static bool HasPrevLineContent(StyleContext &sc) {
103 int i = 0;
104 // Go back to the previous newline
105 while ((--i + (int)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i)))
107 while ((--i + (int)sc.currentPos) >= 0) {
108 if (IsNewline(sc.GetRelative(i)))
109 break;
110 if (!IsASpaceOrTab(sc.GetRelative(i)))
111 return true;
113 return false;
116 static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) {
117 int c, count = 1;
118 unsigned int i = 0;
119 while (++i) {
120 c = sc.GetRelative(i);
121 if (c == sc.ch)
122 ++count;
123 // hit a terminating character
124 else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
125 // Are we a valid HRULE
126 if ((IsNewline(c) || sc.currentPos + i == endPos) &&
127 count >= 3 && !HasPrevLineContent(sc)) {
128 sc.SetState(SCE_MARKDOWN_HRULE);
129 sc.Forward(i);
130 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
131 return true;
133 else {
134 sc.SetState(SCE_MARKDOWN_DEFAULT);
135 return false;
139 return false;
142 static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle,
143 WordList **, Accessor &styler) {
144 unsigned int endPos = startPos + length;
145 int precharCount = 0;
146 // Don't advance on a new loop iteration and retry at the same position.
147 // Useful in the corner case of having to start at the beginning file position
148 // in the default state.
149 bool freezeCursor = false;
151 StyleContext sc(startPos, length, initStyle, styler);
153 while (sc.More()) {
154 // Skip past escaped characters
155 if (sc.ch == '\\') {
156 sc.Forward();
157 continue;
160 // A blockquotes resets the line semantics
161 if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
162 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
164 // Conditional state-based actions
165 if (sc.state == SCE_MARKDOWN_CODE2) {
166 if (sc.Match("``") && sc.GetRelative(-2) != ' ') {
167 sc.Forward(2);
168 sc.SetState(SCE_MARKDOWN_DEFAULT);
171 else if (sc.state == SCE_MARKDOWN_CODE) {
172 if (sc.ch == '`' && sc.chPrev != ' ')
173 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
175 /* De-activated because it gets in the way of other valid indentation
176 * schemes, for example multiple paragraphs inside a list item.
177 // Code block
178 else if (sc.state == SCE_MARKDOWN_CODEBK) {
179 bool d = true;
180 if (IsNewline(sc.ch)) {
181 if (sc.chNext != '\t') {
182 for (int c = 1; c < 5; ++c) {
183 if (sc.GetRelative(c) != ' ')
184 d = false;
188 else if (sc.atLineStart) {
189 if (sc.ch != '\t' ) {
190 for (int i = 0; i < 4; ++i) {
191 if (sc.GetRelative(i) != ' ')
192 d = false;
196 if (!d)
197 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
200 // Strong
201 else if (sc.state == SCE_MARKDOWN_STRONG1) {
202 if (sc.Match("**") && sc.chPrev != ' ') {
203 sc.Forward(2);
204 sc.SetState(SCE_MARKDOWN_DEFAULT);
207 else if (sc.state == SCE_MARKDOWN_STRONG2) {
208 if (sc.Match("__") && sc.chPrev != ' ') {
209 sc.Forward(2);
210 sc.SetState(SCE_MARKDOWN_DEFAULT);
213 // Emphasis
214 else if (sc.state == SCE_MARKDOWN_EM1) {
215 if (sc.ch == '*' && sc.chPrev != ' ')
216 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
218 else if (sc.state == SCE_MARKDOWN_EM2) {
219 if (sc.ch == '_' && sc.chPrev != ' ')
220 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
222 else if (sc.state == SCE_MARKDOWN_CODEBK) {
223 if (sc.atLineStart && sc.Match("~~~")) {
224 int i = 1;
225 while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
226 i++;
227 sc.Forward(i);
228 sc.SetState(SCE_MARKDOWN_DEFAULT);
231 else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
232 if (sc.Match("~~") && sc.chPrev != ' ') {
233 sc.Forward(2);
234 sc.SetState(SCE_MARKDOWN_DEFAULT);
237 else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
238 // Header
239 if (sc.Match("######"))
240 SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
241 else if (sc.Match("#####"))
242 SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
243 else if (sc.Match("####"))
244 SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
245 else if (sc.Match("###"))
246 SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
247 else if (sc.Match("##"))
248 SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
249 else if (sc.Match("#")) {
250 // Catch the special case of an unordered list
251 if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
252 precharCount = 0;
253 sc.SetState(SCE_MARKDOWN_PRECHAR);
255 else
256 SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
258 // Code block
259 else if (sc.Match("~~~")) {
260 if (!HasPrevLineContent(sc))
261 sc.SetState(SCE_MARKDOWN_CODEBK);
262 else
263 sc.SetState(SCE_MARKDOWN_DEFAULT);
265 else if (sc.ch == '=') {
266 if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc))
268 else
269 sc.SetState(SCE_MARKDOWN_DEFAULT);
271 else if (sc.ch == '-') {
272 if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc))
274 else {
275 precharCount = 0;
276 sc.SetState(SCE_MARKDOWN_PRECHAR);
279 else if (IsNewline(sc.ch))
280 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
281 else {
282 precharCount = 0;
283 sc.SetState(SCE_MARKDOWN_PRECHAR);
287 // The header lasts until the newline
288 else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
289 sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
290 sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
291 if (IsNewline(sc.ch))
292 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
295 // New state only within the initial whitespace
296 if (sc.state == SCE_MARKDOWN_PRECHAR) {
297 // Blockquote
298 if (sc.ch == '>' && precharCount < 5)
299 sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
301 // Begin of code block
302 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
303 sc.SetState(SCE_MARKDOWN_CODEBK);
305 // HRule - Total of three or more hyphens, asterisks, or underscores
306 // on a line by themselves
307 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
309 // Unordered list
310 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
311 sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
312 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
314 // Ordered list
315 else if (IsADigit(sc.ch)) {
316 int digitCount = 0;
317 while (IsADigit(sc.GetRelative(++digitCount)))
319 if (sc.GetRelative(digitCount) == '.' &&
320 IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
321 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
322 sc.Forward(digitCount + 1);
323 sc.SetState(SCE_MARKDOWN_DEFAULT);
326 // Alternate Ordered list
327 else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
328 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
329 sc.Forward(2);
330 sc.SetState(SCE_MARKDOWN_DEFAULT);
332 else if (sc.ch != ' ' || precharCount > 2)
333 sc.SetState(SCE_MARKDOWN_DEFAULT);
334 else
335 ++precharCount;
338 // New state anywhere in doc
339 if (sc.state == SCE_MARKDOWN_DEFAULT) {
340 if (sc.atLineStart && sc.ch == '#') {
341 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
342 freezeCursor = true;
344 // Links and Images
345 if (sc.Match("![") || sc.ch == '[') {
346 int i = 0, j = 0, k = 0;
347 int len = endPos - sc.currentPos;
348 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
350 if (sc.GetRelative(i) == ']') {
351 j = i;
352 if (sc.GetRelative(++i) == '(') {
353 while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\'))
355 if (sc.GetRelative(i) == ')')
356 k = i;
358 else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') {
359 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
361 if (sc.GetRelative(i) == ']')
362 k = i;
365 // At least a link text
366 if (j) {
367 sc.SetState(SCE_MARKDOWN_LINK);
368 sc.Forward(j);
369 // Also has a URL or reference portion
370 if (k)
371 sc.Forward(k - j);
372 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
375 // Code - also a special case for alternate inside spacing
376 if (sc.Match("``") && sc.GetRelative(3) != ' ') {
377 sc.SetState(SCE_MARKDOWN_CODE2);
378 sc.Forward();
380 else if (sc.ch == '`' && sc.chNext != ' ') {
381 sc.SetState(SCE_MARKDOWN_CODE);
383 // Strong
384 else if (sc.Match("**") && sc.GetRelative(2) != ' ') {
385 sc.SetState(SCE_MARKDOWN_STRONG1);
386 sc.Forward();
388 else if (sc.Match("__") && sc.GetRelative(2) != ' ') {
389 sc.SetState(SCE_MARKDOWN_STRONG2);
390 sc.Forward();
392 // Emphasis
393 else if (sc.ch == '*' && sc.chNext != ' ')
394 sc.SetState(SCE_MARKDOWN_EM1);
395 else if (sc.ch == '_' && sc.chNext != ' ')
396 sc.SetState(SCE_MARKDOWN_EM2);
397 // Strikeout
398 else if (sc.Match("~~") && sc.GetRelative(2) != ' ') {
399 sc.SetState(SCE_MARKDOWN_STRIKEOUT);
400 sc.Forward();
402 // Beginning of line
403 else if (IsNewline(sc.ch))
404 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
406 // Advance if not holding back the cursor for this iteration.
407 if (!freezeCursor)
408 sc.Forward();
409 freezeCursor = false;
411 sc.Complete();
414 LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");