Remove unused variable
[geany-mirror.git] / ctags / parsers / markdown.c
blob3510d756d3cfe2f9b60281e023c0423ba8255bc5
1 /*
3 * Copyright (c) 2007-2011, Nick Treleaven
4 * Copyright (c) 2012, Lex Trotman
5 * Copyright (c) 2021, Jiri Techet
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License version 2 or (at your option) any later version.
10 * This module contains functions for generating tags for markdown files.
12 * This parser was based on the asciidoc parser.
14 * Extended syntax like footnotes is described in
15 * https://www.markdownguide.org/extended-syntax/
19 * INCLUDE FILES
21 #include "general.h" /* must always come first */
23 #include <ctype.h>
24 #include <string.h>
26 #include "debug.h"
27 #include "entry.h"
28 #include "parse.h"
29 #include "read.h"
30 #include "vstring.h"
31 #include "nestlevel.h"
32 #include "routines.h"
33 #include "promise.h"
34 #include "htable.h"
36 #include "markdown.h"
39 * DATA DEFINITIONS
41 typedef enum {
42 K_CHAPTER = 0,
43 K_SECTION,
44 K_SUBSECTION,
45 K_SUBSUBSECTION,
46 K_LEVEL4SECTION,
47 K_LEVEL5SECTION,
48 K_SECTION_COUNT,
49 K_FOOTNOTE = K_SECTION_COUNT,
50 } markdownKind;
52 static kindDefinition MarkdownKinds[] = {
53 { true, 'c', "chapter", "chapters"},
54 { true, 's', "section", "sections" },
55 { true, 'S', "subsection", "level 2 sections" },
56 { true, 't', "subsubsection", "level 3 sections" },
57 { true, 'T', "l4subsection", "level 4 sections" },
58 { true, 'u', "l5subsection", "level 5 sections" },
59 { true, 'n', "footnote", "footnotes" },
62 static fieldDefinition MarkdownFields [] = {
64 .enabled = false,
65 .name = "sectionMarker",
66 .description = "character used for declaring section(#, ##, =, or -)",
70 typedef enum {
71 F_MARKER,
72 } markdownField;
74 static NestingLevels *nestingLevels = NULL;
77 * FUNCTION DEFINITIONS
80 static NestingLevel *getNestingLevel (const int kind, unsigned long adjustmentWhenPop)
82 NestingLevel *nl;
83 tagEntryInfo *e;
84 unsigned long line = getInputLineNumber ();
86 line = (line > adjustmentWhenPop)? (line - adjustmentWhenPop): 0;
88 while (1)
90 nl = nestingLevelsGetCurrent (nestingLevels);
91 e = getEntryOfNestingLevel (nl);
92 if ((nl && (e == NULL)) || (e && (e->kindIndex >= kind)))
93 nestingLevelsPop (nestingLevels);
94 else
95 break;
97 return nl;
101 static int makeMarkdownTag (const vString* const name, const int kind, const bool twoLine)
103 int r = CORK_NIL;
105 if (vStringLength (name) > 0)
107 const NestingLevel *const nl = getNestingLevel (kind, twoLine? 2: 1);
108 tagEntryInfo *parent = getEntryOfNestingLevel (nl);
109 tagEntryInfo e;
111 initTagEntry (&e, vStringValue (name), kind);
113 if (twoLine)
115 /* we want the line before the '---' underline chars */
116 const unsigned long line = getInputLineNumber ();
117 Assert (line > 0);
118 if (line > 0)
120 e.lineNumber--;
121 e.filePosition = getInputFilePositionForLine (line - 1);
125 if (parent && (parent->kindIndex < kind))
126 e.extensionFields.scopeIndex = nl->corkIndex;
128 r = makeTagEntry (&e);
130 return r;
134 static int makeSectionMarkdownTag (const vString* const name, const int kind, const char *marker)
136 int r = makeMarkdownTag (name, kind, marker[0] != '#');
137 attachParserFieldToCorkEntry (r, MarkdownFields [F_MARKER].ftype, marker);
139 nestingLevelsPush (nestingLevels, r);
140 return r;
144 static vString *getHeading (const int kind, const unsigned char *line,
145 const int lineLen, bool *delimited)
147 int pos = 0;
148 int start = kind + 1;
149 int end = lineLen - 1;
150 vString *name = vStringNew ();
152 Assert (kind >= 0 && kind < K_SECTION_COUNT);
153 Assert (lineLen > start);
155 *delimited = false;
156 while (isspace (line[pos])) ++pos;
157 while (line[end] == line[pos] && end - 1 >= 0 && line[end - 1] != '\\')
159 --end;
160 *delimited = true;
162 while (isspace (line[start])) ++start;
163 while (isspace (line[end])) --end;
165 if (start <= end)
166 vStringNCatS (name, (const char*)(&(line[start])), end - start + 1);
168 return name;
172 static int getFirstCharPos (const unsigned char *line, int lineLen, bool *indented)
174 int indent = 0;
175 int i;
176 for (i = 0; i < lineLen && isspace (line[i]); i++)
177 indent += line[i] == '\t' ? 4 : 1;
178 *indented = indent >= 4;
179 return i;
183 static void getFootnoteMaybe (const char *line)
185 const char *start = strstr (line, "[^");
186 const char *end = start? strstr(start + 2, "]:"): NULL;
188 if (! (start && end))
189 return;
190 if (! (end > (start + 2)))
191 return;
193 vString * footnote = vStringNewNInit (start + 2, end - (start + 2));
194 const NestingLevel *const nl = nestingLevelsGetCurrent (nestingLevels);
195 tagEntryInfo e;
197 initTagEntry (&e, vStringValue (footnote), K_FOOTNOTE);
198 if (nl)
199 e.extensionFields.scopeIndex = nl->corkIndex;
200 makeTagEntry (&e);
202 vStringDelete (footnote);
205 static bool extractLanguageForCodeBlock (const char *langMarker,
206 vString *codeLang)
208 subparser *s;
209 bool r = false;
211 foreachSubparser (s, false)
213 markdownSubparser *m = (markdownSubparser *)s;
214 enterSubparser(s);
215 if (m->extractLanguageForCodeBlock)
216 r = m->extractLanguageForCodeBlock (m, langMarker, codeLang);
217 leaveSubparser();
218 if (r)
219 break;
222 return r;
225 static void findMarkdownTags (void)
227 vString *prevLine = vStringNew ();
228 vString *codeLang = vStringNew ();
229 const unsigned char *line;
230 char inCodeChar = 0;
231 long startSourceLineNumber = 0;
232 long startLineNumber = 0;
233 bool inPreambule = false;
234 bool inComment = false;
236 subparser *sub = getSubparserRunningBaseparser();
237 if (sub)
238 chooseExclusiveSubparser (sub, NULL);
240 nestingLevels = nestingLevelsNew (0);
242 while ((line = readLineFromInputFile ()) != NULL)
244 int lineLen = strlen ((const char*) line);
245 bool lineProcessed = false;
246 bool indented;
247 int pos = getFirstCharPos (line, lineLen, &indented);
248 const int lineNum = getInputLineNumber ();
250 if (lineNum == 1 || inPreambule)
252 if (line[pos] == '-' && line[pos + 1] == '-' && line[pos + 2] == '-')
254 if (inPreambule)
256 long endLineNumber = lineNum;
257 if (startLineNumber < endLineNumber)
258 makePromise ("FrontMatter", startLineNumber, 0,
259 endLineNumber, 0, startSourceLineNumber);
261 else
262 startSourceLineNumber = startLineNumber = lineNum;
263 inPreambule = !inPreambule;
267 if (inPreambule)
268 continue;
270 /* fenced code block */
271 if (line[pos] == '`' || line[pos] == '~')
273 char c = line[pos];
274 char otherC = c == '`' ? '~' : '`';
275 int nSame;
276 for (nSame = 1; line[nSame] == line[pos]; ++nSame);
278 if (inCodeChar != otherC && nSame >= 3)
280 inCodeChar = inCodeChar ? 0 : c;
281 if (inCodeChar == c && strstr ((const char *)(line + pos + nSame), "```") != NULL)
282 inCodeChar = 0;
283 else if (inCodeChar)
285 const char *langMarker = (const char *)(line + pos + nSame);
286 startLineNumber = startSourceLineNumber = lineNum + 1;
288 vStringClear (codeLang);
289 if (! extractLanguageForCodeBlock (langMarker, codeLang))
291 vStringCopyS (codeLang, langMarker);
292 vStringStripLeading (codeLang);
293 vStringStripTrailing (codeLang);
296 else
298 long endLineNumber = lineNum;
299 if (vStringLength (codeLang) > 0
300 && startLineNumber < endLineNumber)
301 makePromise (vStringValue (codeLang), startLineNumber, 0,
302 endLineNumber, 0, startSourceLineNumber);
305 lineProcessed = true;
308 /* XML comment start */
309 else if (lineLen >= pos + 4 && line[pos] == '<' && line[pos + 1] == '!' &&
310 line[pos + 2] == '-' && line[pos + 3] == '-')
312 if (strstr ((const char *)(line + pos + 4), "-->") == NULL)
313 inComment = true;
314 lineProcessed = true;
316 /* XML comment end */
317 else if (inComment && strstr ((const char *)(line + pos), "-->"))
319 inComment = false;
320 lineProcessed = true;
323 /* code block or comment */
324 if (inCodeChar || inComment)
325 lineProcessed = true;
327 /* code block using indent */
328 else if (indented)
329 lineProcessed = true;
331 /* if it's a title underline, or a delimited block marking character */
332 else if (line[pos] == '=' || line[pos] == '-' || line[pos] == '#' || line[pos] == '>')
334 int nSame;
335 for (nSame = 1; line[nSame] == line[pos]; ++nSame);
337 /* quote */
338 if (line[pos] == '>')
339 ; /* just to make sure lineProcessed = true so it won't be in a heading */
340 /* is it a two line title */
341 else if (line[pos] == '=' || line[pos] == '-')
343 char marker[2] = { line[pos], '\0' };
344 int kind = line[pos] == '=' ? K_CHAPTER : K_SECTION;
345 bool whitespaceTerminated = true;
347 for (int i = pos + nSame; i < lineLen; i++)
349 if (!isspace (line[i]))
351 whitespaceTerminated = false;
352 break;
356 vStringStripLeading (prevLine);
357 vStringStripTrailing (prevLine);
358 if (whitespaceTerminated && vStringLength (prevLine) > 0)
359 makeSectionMarkdownTag (prevLine, kind, marker);
361 /* otherwise is it a one line title */
362 else if (line[pos] == '#' && nSame <= K_SECTION_COUNT && isspace (line[nSame]))
364 int kind = nSame - 1;
365 bool delimited = false;
366 vString *name = getHeading (kind, line, lineLen, &delimited);
367 if (vStringLength (name) > 0)
368 makeSectionMarkdownTag (name, kind, delimited ? "##" : "#");
369 vStringDelete (name);
372 lineProcessed = true;
375 vStringClear (prevLine);
376 if (!lineProcessed)
378 getFootnoteMaybe ((const char *)line);
379 vStringCatS (prevLine, (const char*) line);
382 vStringDelete (prevLine);
383 vStringDelete (codeLang);
385 unsigned int line = (unsigned int)getInputLineNumber ();
386 nestingLevelsFree (nestingLevels);
390 extern parserDefinition* MarkdownParser (void)
392 parserDefinition* const def = parserNew ("Markdown");
393 static const char *const extensions [] = { "md", "markdown", NULL };
395 def->enabled = true;
396 def->extensions = extensions;
397 def->useCork = CORK_QUEUE;
398 def->kindTable = MarkdownKinds;
399 def->kindCount = ARRAY_SIZE (MarkdownKinds);
400 def->fieldTable = MarkdownFields;
401 def->fieldCount = ARRAY_SIZE (MarkdownFields);
402 def->defaultScopeSeparator = "\"\"";
403 def->parser = findMarkdownTags;
406 * This setting (useMemoryStreamInput) is for running
407 * Yaml parser from YamlFrontMatter as subparser.
408 * YamlFrontMatter is run from FrontMatter as a gust parser.
409 * FrontMatter is run from Markdown as a guest parser.
410 * This stacked structure hits the limitation of the main
411 * part: subparser's requirement for memory based input stream
412 * is not propagated to the main part.
414 * TODO: instead of setting useMemoryStreamInput here, we
415 * should remove the limitation.
417 def->useMemoryStreamInput = true;
419 return def;