s/geany_find_text/search_find_text
[geany-mirror.git] / scintilla / LexMarkdown.cxx
blob34f79ac2256e4c1a43e9addb6ade5521009163a4
1 /******************************************************************
2 * LexMarkdown.cxx
4 * A simple Markdown lexer for scintilla.
5 *
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
9 *
10 * Limitations:
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
21 * highlighting; something Scintilla isn't really architected
22 * to support yet.
24 * The highlighting aims to accurately reflect correct syntax,
25 * but a few restrictions are relaxed. Delimited code blocks are
26 * highlighted, even if the line following the code block is not blank.
27 * Requiring a blank line after a block, breaks the highlighting
28 * in certain cases, because of the way Scintilla ends up calling
29 * the lexer.
31 * Written by Jon Strait - jstrait@moonloop.net
33 * This source code is released for free distribution under the
34 * terms of the GNU General Public License.
36 *****************************************************************/
38 #include <stdlib.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <stdarg.h>
44 #include "Platform.h"
46 #include "PropSet.h"
47 #include "Accessor.h"
48 #include "StyleContext.h"
49 #include "KeyWords.h"
50 #include "Scintilla.h"
51 #include "SciLexer.h"
53 #ifdef SCI_NAMESPACE
54 using namespace Scintilla;
55 #endif
57 static inline bool IsNewline(const int ch) {
58 return (ch == '\n' || ch == '\r');
61 // True if can follow ch down to the end with possibly trailing whitespace
62 static bool FollowToLineEnd(const int ch, const int state, const int endPos, StyleContext &sc) {
63 int i = 0;
64 while (sc.GetRelative(++i) == ch)
66 // Skip over whitespace
67 while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
68 ++i;
69 if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
70 sc.Forward(i);
71 sc.ChangeState(state);
72 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
73 return true;
75 else return false;
78 // Set the state on text section from current to length characters,
79 // then set the rest until the newline to default, except for any characters matching token
80 static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) {
81 int i = 0;
82 sc.SetState(state);
83 sc.Forward(length);
84 sc.SetState(SCE_MARKDOWN_DEFAULT);
85 sc.Forward();
86 while (sc.More() && !IsNewline(sc.ch)) {
87 bool started = false;
88 if (sc.ch == token && !started) {
89 sc.SetState(state);
90 started = true;
92 else if (sc.ch != token) {
93 sc.SetState(SCE_MARKDOWN_DEFAULT);
94 started = false;
96 sc.Forward();
98 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
101 // Does the previous line have more than spaces and tabs?
102 static bool HasPrevLineContent(StyleContext &sc) {
103 int i = 0;
104 // Go back to the previous newline
105 while ((--i + sc.currentPos) && !IsNewline(sc.GetRelative(i)))
107 while (--i + sc.currentPos) {
108 if (IsNewline(sc.GetRelative(i)))
109 break;
110 if (!IsASpaceOrTab(sc.GetRelative(i)))
111 return true;
113 return false;
116 static bool IsValidHrule(const int endPos, StyleContext &sc) {
117 int c, i = 0, count = 1;
118 while (++i) {
119 c = sc.GetRelative(i);
120 if (c == sc.ch)
121 ++count;
122 // hit a terminating character
123 else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
124 // Are we a valid HRULE
125 if ((IsNewline(c) || sc.currentPos + i == endPos) &&
126 count >= 3 && !HasPrevLineContent(sc)) {
127 sc.SetState(SCE_MARKDOWN_HRULE);
128 sc.Forward(i);
129 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
130 return true;
132 else {
133 sc.SetState(SCE_MARKDOWN_DEFAULT);
134 return false;
140 // Only consume if already valid. Doesn't work for delimiting multiple lines.
141 static void ConsumeEnd(const int state, const int origPos, const int endPos,
142 const char *token, StyleContext &sc) {
143 int targetPos;
144 while (sc.currentPos + 1 < endPos) {
145 sc.Forward();
146 if (sc.Match(token) && sc.chPrev != '\\' && sc.chPrev != ' ') {
147 targetPos = sc.currentPos + strlen(token);
148 sc.currentPos = origPos;
149 sc.SetState(state);
150 sc.Forward(targetPos - origPos);
151 sc.SetState(SCE_MARKDOWN_DEFAULT);
152 break;
157 static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle,
158 WordList *keywordlists[], Accessor &styler) {
160 int digitCount = 0;
161 int endPos = startPos + length;
162 int precharCount;
163 // Don't advance on a new loop iteration and retry at the same position.
164 // Useful in the corner case of having to start at the beginning file position
165 // in the default state.
166 bool freezeCursor = false;
168 StyleContext sc(startPos, length, initStyle, styler);
170 while (sc.More()) {
171 // Skip past escaped characters
172 if (sc.ch == '\\') {
173 sc.Forward();
174 continue;
177 // A blockquotes resets the line semantics
178 if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
179 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
181 // Conditional state-based actions
182 if (sc.state == SCE_MARKDOWN_CODE2) {
183 if (sc.Match("``") && sc.GetRelative(-2) != ' ') {
184 sc.Forward(2);
185 sc.SetState(SCE_MARKDOWN_DEFAULT);
188 else if (sc.state == SCE_MARKDOWN_CODE) {
189 if (sc.ch == '`' && sc.chPrev != ' ')
190 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
192 /* De-activated because it gets in the way of other valid indentation
193 * schemes, for example multiple paragraphs inside a list item.
194 // Code block
195 else if (sc.state == SCE_MARKDOWN_CODEBK) {
196 bool d = true;
197 if (IsNewline(sc.ch)) {
198 if (sc.chNext != '\t') {
199 for (int c = 1; c < 5; ++c) {
200 if (sc.GetRelative(c) != ' ')
201 d = false;
205 else if (sc.atLineStart) {
206 if (sc.ch != '\t' ) {
207 for (int i = 0; i < 4; ++i) {
208 if (sc.GetRelative(i) != ' ')
209 d = false;
213 if (!d)
214 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
217 // Strong
218 else if (sc.state == SCE_MARKDOWN_STRONG1) {
219 if (sc.Match("**") && sc.chPrev != ' ') {
220 sc.Forward(2);
221 sc.SetState(SCE_MARKDOWN_DEFAULT);
224 else if (sc.state == SCE_MARKDOWN_STRONG2) {
225 if (sc.Match("__") && sc.chPrev != ' ') {
226 sc.Forward(2);
227 sc.SetState(SCE_MARKDOWN_DEFAULT);
230 // Emphasis
231 else if (sc.state == SCE_MARKDOWN_EM1) {
232 if (sc.ch == '*' && sc.chPrev != ' ')
233 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
235 else if (sc.state == SCE_MARKDOWN_EM2) {
236 if (sc.ch == '_' && sc.chPrev != ' ')
237 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
239 else if (sc.state == SCE_MARKDOWN_CODEBK) {
240 if (sc.atLineStart && sc.Match("~~~")) {
241 int i = 1;
242 while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
243 i++;
244 sc.Forward(i);
245 sc.SetState(SCE_MARKDOWN_DEFAULT);
248 else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
249 if (sc.Match("~~") && sc.chPrev != ' ') {
250 sc.Forward(2);
251 sc.SetState(SCE_MARKDOWN_DEFAULT);
254 else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
255 // Header
256 if (sc.Match("######"))
257 SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
258 else if (sc.Match("#####"))
259 SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
260 else if (sc.Match("####"))
261 SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
262 else if (sc.Match("###"))
263 SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
264 else if (sc.Match("##"))
265 SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
266 else if (sc.Match("#")) {
267 // Catch the special case of an unordered list
268 if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
269 precharCount = 0;
270 sc.SetState(SCE_MARKDOWN_PRECHAR);
272 else
273 SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
275 // Code block
276 else if (sc.Match("~~~")) {
277 if (!HasPrevLineContent(sc))
278 sc.SetState(SCE_MARKDOWN_CODEBK);
279 else
280 sc.SetState(SCE_MARKDOWN_DEFAULT);
282 else if (sc.ch == '=') {
283 if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc))
285 else
286 sc.SetState(SCE_MARKDOWN_DEFAULT);
288 else if (sc.ch == '-') {
289 if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc))
291 else {
292 precharCount = 0;
293 sc.SetState(SCE_MARKDOWN_PRECHAR);
296 else if (IsNewline(sc.ch))
297 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
298 else {
299 precharCount = 0;
300 sc.SetState(SCE_MARKDOWN_PRECHAR);
304 // The header lasts until the newline
305 else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
306 sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
307 sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
308 if (IsNewline(sc.ch))
309 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
312 // New state only within the initial whitespace
313 if (sc.state == SCE_MARKDOWN_PRECHAR) {
314 // Blockquote
315 if (sc.ch == '>' && precharCount < 5)
316 sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
318 // Begin of code block
319 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
320 sc.SetState(SCE_MARKDOWN_CODEBK);
322 // HRule - Total of three or more hyphens, asterisks, or underscores
323 // on a line by themselves
324 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
326 // Unordered list
327 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
328 sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
329 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
331 // Ordered list
332 else if (IsADigit(sc.ch)) {
333 digitCount = 0;
334 while (IsADigit(sc.GetRelative(++digitCount)))
336 if (sc.GetRelative(digitCount) == '.' &&
337 IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
338 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
339 sc.Forward(digitCount + 1);
340 sc.SetState(SCE_MARKDOWN_DEFAULT);
343 // Alternate Ordered list
344 else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
345 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
346 sc.Forward(2);
347 sc.SetState(SCE_MARKDOWN_DEFAULT);
349 else if (sc.ch != ' ' || precharCount > 2)
350 sc.SetState(SCE_MARKDOWN_DEFAULT);
351 else
352 ++precharCount;
355 // New state anywhere in doc
356 if (sc.state == SCE_MARKDOWN_DEFAULT) {
357 int origPos = sc.currentPos;
358 if (sc.atLineStart && sc.ch == '#') {
359 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
360 freezeCursor = true;
362 // Links and Images
363 if (sc.Match("![") || sc.ch == '[') {
364 int i = 0, j = 0, k = 0;
365 int len = endPos - sc.currentPos;
366 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
368 if (sc.GetRelative(i) == ']') {
369 j = i;
370 if (sc.GetRelative(++i) == '(') {
371 while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\'))
373 if (sc.GetRelative(i) == ')')
374 k = i;
376 else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') {
377 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
379 if (sc.GetRelative(i) == ']')
380 k = i;
383 // At least a link text
384 if (j) {
385 sc.SetState(SCE_MARKDOWN_LINK);
386 sc.Forward(j);
387 // Also has a URL or reference portion
388 if (k)
389 sc.Forward(k - j);
390 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
393 // Code - also a special case for alternate inside spacing
394 if (sc.Match("``") && sc.GetRelative(3) != ' ') {
395 sc.SetState(SCE_MARKDOWN_CODE2);
396 sc.Forward();
398 else if (sc.ch == '`' && sc.chNext != ' ') {
399 sc.SetState(SCE_MARKDOWN_CODE);
401 // Strong
402 else if (sc.Match("**") && sc.GetRelative(2) != ' ') {
403 sc.SetState(SCE_MARKDOWN_STRONG1);
404 sc.Forward();
406 else if (sc.Match("__") && sc.GetRelative(2) != ' ') {
407 sc.SetState(SCE_MARKDOWN_STRONG2);
408 sc.Forward();
410 // Emphasis
411 else if (sc.ch == '*' && sc.chNext != ' ')
412 sc.SetState(SCE_MARKDOWN_EM1);
413 else if (sc.ch == '_' && sc.chNext != ' ')
414 sc.SetState(SCE_MARKDOWN_EM2);
415 // Strikeout
416 else if (sc.Match("~~") && sc.GetRelative(2) != ' ') {
417 sc.SetState(SCE_MARKDOWN_STRIKEOUT);
418 sc.Forward();
420 // Beginning of line
421 else if (IsNewline(sc.ch))
422 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
424 // Advance if not holding back the cursor for this iteration.
425 if (!freezeCursor)
426 sc.Forward();
427 freezeCursor = false;
429 sc.Complete();
432 LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");