3 * Copyright (c) 2007-2011, Nick Treleaven
4 * Copyright (c) 2012, Lex Trotman
5 * Copyright (c) 2021, Jiri Techet
7 * This source code is released for free distribution under the terms of the
8 * GNU General Public License version 2 or (at your option) any later version.
10 * This module contains functions for generating tags for markdown files.
12 * This parser was based on the asciidoc parser.
14 * Extended syntax like footnotes is described in
15 * https://www.markdownguide.org/extended-syntax/
21 #include "general.h" /* must always come first */
31 #include "nestlevel.h"
49 K_FOOTNOTE
= K_SECTION_COUNT
,
52 static kindDefinition MarkdownKinds
[] = {
53 { true, 'c', "chapter", "chapters"},
54 { true, 's', "section", "sections" },
55 { true, 'S', "subsection", "level 2 sections" },
56 { true, 't', "subsubsection", "level 3 sections" },
57 { true, 'T', "l4subsection", "level 4 sections" },
58 { true, 'u', "l5subsection", "level 5 sections" },
59 { true, 'n', "footnote", "footnotes" },
62 static fieldDefinition MarkdownFields
[] = {
65 .name
= "sectionMarker",
66 .description
= "character used for declaring section(#, ##, =, or -)",
74 static NestingLevels
*nestingLevels
= NULL
;
77 * FUNCTION DEFINITIONS
80 static NestingLevel
*getNestingLevel (const int kind
, unsigned long adjustmentWhenPop
)
84 unsigned long line
= getInputLineNumber ();
86 line
= (line
> adjustmentWhenPop
)? (line
- adjustmentWhenPop
): 0;
90 nl
= nestingLevelsGetCurrent (nestingLevels
);
91 e
= getEntryOfNestingLevel (nl
);
92 if ((nl
&& (e
== NULL
)) || (e
&& (e
->kindIndex
>= kind
)))
93 nestingLevelsPop (nestingLevels
);
101 static int makeMarkdownTag (const vString
* const name
, const int kind
, const bool twoLine
)
105 if (vStringLength (name
) > 0)
107 const NestingLevel
*const nl
= getNestingLevel (kind
, twoLine
? 2: 1);
108 tagEntryInfo
*parent
= getEntryOfNestingLevel (nl
);
111 initTagEntry (&e
, vStringValue (name
), kind
);
115 /* we want the line before the '---' underline chars */
116 const unsigned long line
= getInputLineNumber ();
121 e
.filePosition
= getInputFilePositionForLine (line
- 1);
125 if (parent
&& (parent
->kindIndex
< kind
))
126 e
.extensionFields
.scopeIndex
= nl
->corkIndex
;
128 r
= makeTagEntry (&e
);
134 static int makeSectionMarkdownTag (const vString
* const name
, const int kind
, const char *marker
)
136 int r
= makeMarkdownTag (name
, kind
, marker
[0] != '#');
137 attachParserFieldToCorkEntry (r
, MarkdownFields
[F_MARKER
].ftype
, marker
);
139 nestingLevelsPush (nestingLevels
, r
);
144 static vString
*getHeading (const int kind
, const unsigned char *line
,
145 const int lineLen
, bool *delimited
)
148 int start
= kind
+ 1;
149 int end
= lineLen
- 1;
150 vString
*name
= vStringNew ();
152 Assert (kind
>= 0 && kind
< K_SECTION_COUNT
);
153 Assert (lineLen
> start
);
156 while (isspace (line
[pos
])) ++pos
;
157 while (line
[end
] == line
[pos
] && end
- 1 >= 0 && line
[end
- 1] != '\\')
162 while (isspace (line
[start
])) ++start
;
163 while (isspace (line
[end
])) --end
;
166 vStringNCatS (name
, (const char*)(&(line
[start
])), end
- start
+ 1);
172 static int getFirstCharPos (const unsigned char *line
, int lineLen
, bool *indented
)
176 for (i
= 0; i
< lineLen
&& isspace (line
[i
]); i
++)
177 indent
+= line
[i
] == '\t' ? 4 : 1;
178 *indented
= indent
>= 4;
183 static void getFootnoteMaybe (const char *line
)
185 const char *start
= strstr (line
, "[^");
186 const char *end
= start
? strstr(start
+ 2, "]:"): NULL
;
188 if (! (start
&& end
))
190 if (! (end
> (start
+ 2)))
193 vString
* footnote
= vStringNewNInit (start
+ 2, end
- (start
+ 2));
194 const NestingLevel
*const nl
= nestingLevelsGetCurrent (nestingLevels
);
197 initTagEntry (&e
, vStringValue (footnote
), K_FOOTNOTE
);
199 e
.extensionFields
.scopeIndex
= nl
->corkIndex
;
202 vStringDelete (footnote
);
205 static bool extractLanguageForCodeBlock (const char *langMarker
,
211 foreachSubparser (s
, false)
213 markdownSubparser
*m
= (markdownSubparser
*)s
;
215 if (m
->extractLanguageForCodeBlock
)
216 r
= m
->extractLanguageForCodeBlock (m
, langMarker
, codeLang
);
225 static void findMarkdownTags (void)
227 vString
*prevLine
= vStringNew ();
228 vString
*codeLang
= vStringNew ();
229 const unsigned char *line
;
231 long startSourceLineNumber
= 0;
232 long startLineNumber
= 0;
233 bool inPreambule
= false;
234 bool inComment
= false;
236 subparser
*sub
= getSubparserRunningBaseparser();
238 chooseExclusiveSubparser (sub
, NULL
);
240 nestingLevels
= nestingLevelsNew (0);
242 while ((line
= readLineFromInputFile ()) != NULL
)
244 int lineLen
= strlen ((const char*) line
);
245 bool lineProcessed
= false;
247 int pos
= getFirstCharPos (line
, lineLen
, &indented
);
248 const int lineNum
= getInputLineNumber ();
250 if (lineNum
== 1 || inPreambule
)
252 if (line
[pos
] == '-' && line
[pos
+ 1] == '-' && line
[pos
+ 2] == '-')
256 long endLineNumber
= lineNum
;
257 if (startLineNumber
< endLineNumber
)
258 makePromise ("FrontMatter", startLineNumber
, 0,
259 endLineNumber
, 0, startSourceLineNumber
);
262 startSourceLineNumber
= startLineNumber
= lineNum
;
263 inPreambule
= !inPreambule
;
270 /* fenced code block */
271 if (line
[pos
] == '`' || line
[pos
] == '~')
274 char otherC
= c
== '`' ? '~' : '`';
276 for (nSame
= 1; line
[nSame
] == line
[pos
]; ++nSame
);
278 if (inCodeChar
!= otherC
&& nSame
>= 3)
280 inCodeChar
= inCodeChar
? 0 : c
;
281 if (inCodeChar
== c
&& strstr ((const char *)(line
+ pos
+ nSame
), "```") != NULL
)
285 const char *langMarker
= (const char *)(line
+ pos
+ nSame
);
286 startLineNumber
= startSourceLineNumber
= lineNum
+ 1;
288 vStringClear (codeLang
);
289 if (! extractLanguageForCodeBlock (langMarker
, codeLang
))
291 vStringCopyS (codeLang
, langMarker
);
292 vStringStripLeading (codeLang
);
293 vStringStripTrailing (codeLang
);
298 long endLineNumber
= lineNum
;
299 if (vStringLength (codeLang
) > 0
300 && startLineNumber
< endLineNumber
)
301 makePromise (vStringValue (codeLang
), startLineNumber
, 0,
302 endLineNumber
, 0, startSourceLineNumber
);
305 lineProcessed
= true;
308 /* XML comment start */
309 else if (lineLen
>= pos
+ 4 && line
[pos
] == '<' && line
[pos
+ 1] == '!' &&
310 line
[pos
+ 2] == '-' && line
[pos
+ 3] == '-')
312 if (strstr ((const char *)(line
+ pos
+ 4), "-->") == NULL
)
314 lineProcessed
= true;
316 /* XML comment end */
317 else if (inComment
&& strstr ((const char *)(line
+ pos
), "-->"))
320 lineProcessed
= true;
323 /* code block or comment */
324 if (inCodeChar
|| inComment
)
325 lineProcessed
= true;
327 /* code block using indent */
329 lineProcessed
= true;
331 /* if it's a title underline, or a delimited block marking character */
332 else if (line
[pos
] == '=' || line
[pos
] == '-' || line
[pos
] == '#' || line
[pos
] == '>')
335 for (nSame
= 1; line
[nSame
] == line
[pos
]; ++nSame
);
338 if (line
[pos
] == '>')
339 ; /* just to make sure lineProcessed = true so it won't be in a heading */
340 /* is it a two line title */
341 else if (line
[pos
] == '=' || line
[pos
] == '-')
343 char marker
[2] = { line
[pos
], '\0' };
344 int kind
= line
[pos
] == '=' ? K_CHAPTER
: K_SECTION
;
345 bool whitespaceTerminated
= true;
347 for (int i
= pos
+ nSame
; i
< lineLen
; i
++)
349 if (!isspace (line
[i
]))
351 whitespaceTerminated
= false;
356 vStringStripLeading (prevLine
);
357 vStringStripTrailing (prevLine
);
358 if (whitespaceTerminated
&& vStringLength (prevLine
) > 0)
359 makeSectionMarkdownTag (prevLine
, kind
, marker
);
361 /* otherwise is it a one line title */
362 else if (line
[pos
] == '#' && nSame
<= K_SECTION_COUNT
&& isspace (line
[nSame
]))
364 int kind
= nSame
- 1;
365 bool delimited
= false;
366 vString
*name
= getHeading (kind
, line
, lineLen
, &delimited
);
367 if (vStringLength (name
) > 0)
368 makeSectionMarkdownTag (name
, kind
, delimited
? "##" : "#");
369 vStringDelete (name
);
372 lineProcessed
= true;
375 vStringClear (prevLine
);
378 getFootnoteMaybe ((const char *)line
);
379 vStringCatS (prevLine
, (const char*) line
);
382 vStringDelete (prevLine
);
383 vStringDelete (codeLang
);
385 unsigned int line
= (unsigned int)getInputLineNumber ();
386 nestingLevelsFree (nestingLevels
);
390 extern parserDefinition
* MarkdownParser (void)
392 parserDefinition
* const def
= parserNew ("Markdown");
393 static const char *const extensions
[] = { "md", "markdown", NULL
};
396 def
->extensions
= extensions
;
397 def
->useCork
= CORK_QUEUE
;
398 def
->kindTable
= MarkdownKinds
;
399 def
->kindCount
= ARRAY_SIZE (MarkdownKinds
);
400 def
->fieldTable
= MarkdownFields
;
401 def
->fieldCount
= ARRAY_SIZE (MarkdownFields
);
402 def
->defaultScopeSeparator
= "\"\"";
403 def
->parser
= findMarkdownTags
;
406 * This setting (useMemoryStreamInput) is for running
407 * Yaml parser from YamlFrontMatter as subparser.
408 * YamlFrontMatter is run from FrontMatter as a gust parser.
409 * FrontMatter is run from Markdown as a guest parser.
410 * This stacked structure hits the limitation of the main
411 * part: subparser's requirement for memory based input stream
412 * is not propagated to the main part.
414 * TODO: instead of setting useMemoryStreamInput here, we
415 * should remove the limitation.
417 def
->useMemoryStreamInput
= true;