Merge pull request #3196 from techee/anon_rename_fix2
[geany-mirror.git] / ctags / parsers / rst.c
blobe708ea5f402a518d57293681cf40ddbe668ef0c4
1 /*
3 * Copyright (c) 2007-2011, Nick Treleaven
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
8 * This module contains functions for generating tags for reStructuredText (reST) files.
10 * This module was ported from geany.
14 * INCLUDE FILES
16 #include "general.h" /* must always come first */
18 #include <ctype.h>
19 #include <string.h>
21 #include "parse.h"
22 #include "read.h"
23 #include "vstring.h"
24 #include "nestlevel.h"
25 #include "entry.h"
26 #include "routines.h"
27 #include "field.h"
30 * DATA DEFINITIONS
32 typedef enum {
33 K_EOF = -1,
34 K_CHAPTER = 0,
35 K_SECTION,
36 K_SUBSECTION,
37 K_SUBSUBSECTION,
38 K_CITATION,
39 K_TARGET,
40 K_SUBSTDEF,
41 SECTION_COUNT
42 } rstKind;
44 static kindDefinition RstKinds[] = {
45 { true, 'c', "chapter", "chapters"},
46 { true, 's', "section", "sections" },
47 { true, 'S', "subsection", "subsections" },
48 { true, 't', "subsubsection", "subsubsections" },
49 { true, 'C', "citation", "citations"},
50 { true, 'T', "target", "targets" },
51 { true, 'd', "substdef", "substitute definitions" },
54 typedef enum {
55 F_SECTION_MARKER,
56 } rstField;
58 static fieldDefinition RstFields [] = {
60 .name = "sectionMarker",
61 .description = "character used for declaring section",
62 .enabled = false,
66 static char kindchars[SECTION_COUNT];
68 static NestingLevels *nestingLevels = NULL;
71 * FUNCTION DEFINITIONS
74 static NestingLevel *getNestingLevel(const int kind)
76 NestingLevel *nl;
77 tagEntryInfo *e;
79 int d = 0;
81 if (kind > K_EOF)
83 d++;
84 /* 1. we want the line before the '---' underline chars */
85 d++;
86 /* 2. we want the line before the next section/chapter title. */
89 while (1)
91 nl = nestingLevelsGetCurrent(nestingLevels);
92 e = getEntryOfNestingLevel (nl);
93 if ((nl && (e == NULL)) || (e && e->kindIndex >= kind))
95 if (e)
96 e->extensionFields.endLine = (getInputLineNumber() - d);
97 nestingLevelsPop(nestingLevels);
99 else
100 break;
102 return nl;
105 static int makeTargetRstTag(const vString* const name, rstKind kindex)
107 tagEntryInfo e;
109 initTagEntry (&e, vStringValue (name), kindex);
111 const NestingLevel *nl = nestingLevelsGetCurrent(nestingLevels);
112 tagEntryInfo *parent = NULL;
113 if (nl)
114 parent = getEntryOfNestingLevel (nl);
116 if (parent)
118 e.extensionFields.scopeKindIndex = parent->kindIndex;
119 e.extensionFields.scopeName = parent->name;
122 return makeTagEntry (&e);
125 static void makeSectionRstTag(const vString* const name, const int kind, const MIOPos filepos,
126 char marker)
128 const NestingLevel *const nl = getNestingLevel(kind);
129 tagEntryInfo *parent;
131 int r = CORK_NIL;
133 if (vStringLength (name) > 0)
135 tagEntryInfo e;
136 char m [2] = { [1] = '\0' };
138 initTagEntry (&e, vStringValue (name), kind);
140 e.lineNumber--; /* we want the line before the '---' underline chars */
141 e.filePosition = filepos;
143 parent = getEntryOfNestingLevel (nl);
144 if (parent && (parent->kindIndex < kind))
146 #if 1
147 e.extensionFields.scopeKindIndex = parent->kindIndex;
148 e.extensionFields.scopeName = parent->name;
149 #else
150 /* TODO
152 Following code makes the scope information full qualified form.
153 Do users want the full qualified form?
154 --- ./Units/rst.simple.d/expected.tags 2015-12-18 01:32:35.574255617 +0900
155 +++ /home/yamato/var/ctags-github/Units/rst.simple.d/FILTERED.tmp 2016-05-05 03:05:38.165604756 +0900
156 @@ -5,2 +5,2 @@
157 -Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Section 1.1
158 -Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Subsection 1.1.1
159 +Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Chapter 1.Section 1.1
160 +Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Chapter 1.Section 1.1.Subsection 1.1.1
162 e.extensionFields.scopeIndex = nl->corkIndex;
163 #endif
166 m[0] = marker;
167 attachParserField (&e, false, RstFields [F_SECTION_MARKER].ftype, m);
168 r = makeTagEntry (&e);
170 nestingLevelsPush(nestingLevels, r);
174 /* checks if str is all the same character */
175 static bool issame(const char *str)
177 char first = *str;
179 while (*str)
181 char c;
183 str++;
184 c = *str;
185 if (c && c != first)
186 return false;
188 return true;
192 static int get_kind(char c)
194 int i;
196 for (i = 0; i < SECTION_COUNT; i++)
198 if (kindchars[i] == c)
199 return i;
201 if (kindchars[i] == 0)
203 kindchars[i] = c;
204 return i;
207 return -1;
211 /* computes the length of an UTF-8 string
212 * if the string doesn't look like UTF-8, return -1 */
213 static int utf8_strlen(const char *buf, int buf_len)
215 int len = 0;
216 const char *end = buf + buf_len;
218 for (len = 0; buf < end; len ++)
220 /* perform quick and naive validation (no sub-byte checking) */
221 if (! (*buf & 0x80))
222 buf ++;
223 else if ((*buf & 0xe0) == 0xc0)
224 buf += 2;
225 else if ((*buf & 0xf0) == 0xe0)
226 buf += 3;
227 else if ((*buf & 0xf8) == 0xf0)
228 buf += 4;
229 else /* not a valid leading UTF-8 byte, abort */
230 return -1;
232 if (buf > end) /* incomplete last byte */
233 return -1;
236 return len;
240 static const unsigned char *is_markup_line (const unsigned char *line, char reftype)
242 if ((line [0] == '.') && (line [1] == '.') && (line [2] == ' ')
243 && (line [3] == reftype))
244 return line + 4;
245 return NULL;
248 static int capture_markup (const unsigned char *target_line, char defaultTerminator, rstKind kindex)
250 vString *name = vStringNew ();
251 unsigned char terminator;
252 int r = CORK_NIL;
254 if (*target_line == '`')
255 terminator = '`';
256 else if (!isspace (*target_line) && *target_line != '\0')
258 /* "Simple reference names are single words consisting of
259 * alphanumerics plus isolated (no two adjacent) internal
260 * hyphens, underscores, periods, colons and plus signs; no
261 * whitespace or other characters are allowed."
262 * -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#reference-names
264 vStringPut (name, *target_line);
265 terminator = defaultTerminator;
267 else
268 goto out;
270 target_line++;
273 bool escaped = false;
274 while (*target_line != '\0')
276 if (escaped)
278 vStringPut (name, *target_line);
279 escaped = false;
281 else
283 if (*target_line == '\\')
285 vStringPut (name, *target_line);
286 escaped = true;
288 else if (*target_line == terminator)
289 break;
290 else
291 vStringPut (name, *target_line);
293 target_line++;
296 if (vStringLength (name) == 0)
297 goto out;
299 r = makeTargetRstTag (name, kindex);
301 out:
302 vStringDelete (name);
303 return r;
306 /* TODO: parse overlining & underlining as distinct sections. */
307 static void findRstTags (void)
309 vString *name = vStringNew ();
310 MIOPos filepos;
311 const unsigned char *line;
312 const unsigned char *markup_line;
314 memset(&filepos, 0, sizeof(filepos));
315 memset(kindchars, 0, sizeof kindchars);
316 nestingLevels = nestingLevelsNew(0);
318 while ((line = readLineFromInputFile ()) != NULL)
320 if ((markup_line = is_markup_line (line, '_')) != NULL)
322 /* Handle .. _target:
323 * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets
325 if (capture_markup (markup_line, ':', K_TARGET) != CORK_NIL)
327 vStringClear (name);
328 continue;
331 else if ((markup_line = is_markup_line (line, '[')) != NULL)
333 /* Handle .. [citation]
334 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#citations
336 if (capture_markup (markup_line, ']', K_CITATION) != CORK_NIL)
338 vStringClear (name);
339 continue;
342 else if ((markup_line = is_markup_line (line, '|')) != NULL)
344 /* Hanle .. |substitute definition|
345 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#substitution-definitions
347 if (capture_markup (markup_line, '|', K_SUBSTDEF) != CORK_NIL)
349 vStringClear (name);
350 continue;
354 int line_len = strlen((const char*) line);
355 int name_len_bytes = vStringLength(name);
356 /* FIXME: this isn't right, actually we need the real display width,
357 * taking into account double-width characters and stuff like that.
358 * But duh. */
359 int name_len = utf8_strlen(vStringValue(name), name_len_bytes);
361 /* if the name doesn't look like UTF-8, assume one-byte charset */
362 if (name_len < 0)
363 name_len = name_len_bytes;
365 /* underlines must be the same length or more */
366 if (line_len >= name_len && name_len > 0 &&
367 ispunct(line[0]) && issame((const char*) line))
369 char c = line[0];
370 int kind = get_kind(c);
372 if (kind >= 0)
374 makeSectionRstTag(name, kind, filepos, c);
375 continue;
378 vStringClear (name);
379 if (!isspace(*line))
381 vStringCatS(name, (const char*)line);
382 filepos = getInputFilePosition();
385 /* Force popping all nesting levels */
386 getNestingLevel (K_EOF);
387 vStringDelete (name);
388 nestingLevelsFree(nestingLevels);
391 extern parserDefinition* RstParser (void)
393 static const char *const extensions [] = { "rest", "reST", "rst", NULL };
394 parserDefinition* const def = parserNew ("ReStructuredText");
395 static const char *const aliases[] = {
396 "rst", /* The name of emacs's mode */
397 NULL
400 def->kindTable = RstKinds;
401 def->kindCount = ARRAY_SIZE (RstKinds);
402 def->extensions = extensions;
403 def->aliases = aliases;
404 def->parser = findRstTags;
406 def->fieldTable = RstFields;
407 def->fieldCount = ARRAY_SIZE (RstFields);
409 def->useCork = CORK_QUEUE;
411 return def;