Merge pull request #3560 from techee/cancel_popups
[geany-mirror.git] / ctags / parsers / bibtex.c
blobe23a3774565cfd3ebcab3ca36c8e3615ad15601e
1 /*
2 * Copyright (c) 2008, David Fishburn
3 * Copyright (c) 2012, Jan Larres
4 * Copyright (c) 2019, Mirco Schönfeld
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating identifiers of entries of Bibtex language files.
11 * BibTex language "reference":
12 * https://en.wikipedia.org/wiki/BibTeX
16 * INCLUDE FILES
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
20 #include <string.h>
22 #include "debug.h"
23 #include "bibtex.h"
24 #include "entry.h"
25 #include "keyword.h"
26 #include "parse.h"
27 #include "read.h"
28 #include "routines.h"
29 #include "vstring.h"
32 * MACROS
34 #define isType(token,t) (bool) ((token)->type == (t))
35 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
36 #define isIdentChar(c) \
37 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':' || (c) == '.' || (c) == '/')
40 * DATA DECLARATIONS
44 * Used to specify type of keyword.
46 enum eKeywordId {
47 KEYWORD_article,
48 KEYWORD_book,
49 KEYWORD_booklet,
50 KEYWORD_conference,
51 KEYWORD_inbook,
52 KEYWORD_incollection,
53 KEYWORD_inproceedings,
54 KEYWORD_manual,
55 KEYWORD_mastersthesis,
56 KEYWORD_misc,
57 KEYWORD_phdthesis,
58 KEYWORD_proceedings,
59 KEYWORD_string,
60 KEYWORD_techreport,
61 KEYWORD_unpublished
63 typedef int keywordId; /* to allow KEYWORD_NONE */
65 enum eTokenType {
66 /* 0..255 are the byte's value. Some are named for convenience */
67 TOKEN_OPEN_CURLY = '{',
68 /* above is special types */
69 TOKEN_UNDEFINED = 256,
70 TOKEN_KEYWORD,
71 TOKEN_IDENTIFIER
73 typedef int tokenType;
75 typedef struct sTokenInfo {
76 tokenType type;
77 keywordId keyword;
78 vString * string;
79 unsigned long lineNumber;
80 MIOPos filePosition;
81 } tokenInfo;
84 * DATA DEFINITIONS
87 static langType Lang_bib;
89 typedef enum {
90 BIBTAG_ARTICLE,
91 BIBTAG_BOOK,
92 BIBTAG_BOOKLET,
93 BIBTAG_CONFERENCE,
94 BIBTAG_INBOOK,
95 BIBTAG_INCOLLECTION,
96 BIBTAG_INPROCEEDINGS,
97 BIBTAG_MANUAL,
98 BIBTAG_MASTERSTHESIS,
99 BIBTAG_MISC,
100 BIBTAG_PHDTHESIS,
101 BIBTAG_PROCEEDINGS,
102 BIBTAG_STRING,
103 BIBTAG_TECHREPORT,
104 BIBTAG_UNPUBLISHED,
105 BIBTAG_COUNT
106 } bibKind;
108 static kindDefinition BibKinds [] = {
109 { true, 'a', "article", "article" },
110 { true, 'b', "book", "book" },
111 { true, 'B', "booklet", "booklet" },
112 { true, 'c', "conference", "conference" },
113 { true, 'i', "inbook", "inbook" },
114 { true, 'I', "incollection", "incollection" },
115 { true, 'j', "inproceedings", "inproceedings" },
116 { true, 'm', "manual", "manual" },
117 { true, 'M', "mastersthesis", "mastersthesis" },
118 { true, 'n', "misc", "misc" },
119 { true, 'p', "phdthesis", "phdthesis" },
120 { true, 'P', "proceedings", "proceedings" },
121 { true, 's', "string", "string" },
122 { true, 't', "techreport", "techreport" },
123 { true, 'u', "unpublished", "unpublished" }
126 static const keywordTable BibKeywordTable [] = {
127 /* keyword keyword ID */
128 { "article", KEYWORD_article },
129 { "book", KEYWORD_book },
130 { "booklet", KEYWORD_booklet },
131 { "conference", KEYWORD_conference },
132 { "inbook", KEYWORD_inbook },
133 { "incollection", KEYWORD_incollection },
134 { "inproceedings",KEYWORD_inproceedings },
135 { "manual", KEYWORD_manual },
136 { "mastersthesis",KEYWORD_mastersthesis },
137 { "misc", KEYWORD_misc },
138 { "phdthesis", KEYWORD_phdthesis },
139 { "proceedings", KEYWORD_proceedings },
140 { "string", KEYWORD_string },
141 { "techreport", KEYWORD_techreport },
142 { "unpublished", KEYWORD_unpublished }
146 * FUNCTION DEFINITIONS
149 static tokenInfo *newToken (void)
151 tokenInfo *const token = xMalloc (1, tokenInfo);
153 token->type = TOKEN_UNDEFINED;
154 token->keyword = KEYWORD_NONE;
155 token->string = vStringNew ();
156 token->lineNumber = getInputLineNumber ();
157 token->filePosition = getInputFilePosition ();
159 return token;
162 static void deleteToken (tokenInfo *const token)
164 vStringDelete (token->string);
165 eFree (token);
169 * Tag generation functions
171 static void makeBibTag (tokenInfo *const token, bibKind kind)
173 const char *const name = vStringValue (token->string);
174 tagEntryInfo e;
175 initTagEntry (&e, name, kind);
177 updateTagLine (&e, token->lineNumber, token->filePosition);
179 makeTagEntry (&e);
183 * Parsing functions
187 * Read a C identifier beginning with "firstChar" and places it into
188 * "name".
190 static void parseIdentifier (vString *const string, const int firstChar)
192 int c = firstChar;
193 Assert (isIdentChar (c));
196 vStringPut (string, c);
197 c = getcFromInputFile ();
198 } while (c != EOF && isIdentChar (c));
199 if (c != EOF)
200 ungetcToInputFile (c); /* unget non-identifier character */
203 static bool readToken (tokenInfo *const token)
205 int c;
207 token->type = TOKEN_UNDEFINED;
208 token->keyword = KEYWORD_NONE;
209 vStringClear (token->string);
211 getNextChar:
215 c = getcFromInputFile ();
217 while (c == '\t' || c == ' ' || c == '\n');
219 token->lineNumber = getInputLineNumber ();
220 token->filePosition = getInputFilePosition ();
222 token->type = (unsigned char) c;
223 switch (c)
225 case EOF: return false;
227 case '@':
229 * All Bib entries start with an at symbol.
230 * Check if the next character is an alpha character
231 * else it is not a potential tex tag.
233 c = getcFromInputFile ();
234 if (! isalpha (c))
235 ungetcToInputFile (c);
236 else
238 vStringPut (token->string, '@');
239 parseIdentifier (token->string, c);
240 token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
241 if (isKeyword (token, KEYWORD_NONE))
242 token->type = TOKEN_IDENTIFIER;
243 else
244 token->type = TOKEN_KEYWORD;
246 break;
247 case '%':
248 skipToCharacterInInputFile ('\n'); /* % are single line comments */
249 goto getNextChar;
250 break;
251 default:
252 if (isIdentChar (c))
254 parseIdentifier (token->string, c);
255 token->type = TOKEN_IDENTIFIER;
257 break;
259 return true;
262 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
264 dest->lineNumber = src->lineNumber;
265 dest->filePosition = src->filePosition;
266 dest->type = src->type;
267 dest->keyword = src->keyword;
268 vStringCopy (dest->string, src->string);
272 * Scanning functions
275 static bool parseTag (tokenInfo *const token, bool foreignKeyword, int kind)
277 tokenInfo * const name = newToken ();
278 vString * currentid;
279 bool eof = false;
281 currentid = vStringNew ();
283 * Bib entries are of these formats:
284 * @article{identifier,
285 * author="John Doe"}
287 * When a keyword is found, loop through all words up to
288 * a comma brace for the tag name.
291 if (isType (token, TOKEN_KEYWORD) || foreignKeyword)
293 copyToken (name, token);
294 if (!readToken (token))
296 eof = true;
297 goto out;
301 if (isType (token, TOKEN_OPEN_CURLY))
303 if (!readToken (token))
305 eof = true;
306 goto out;
308 if (isType (token, TOKEN_IDENTIFIER)){
309 vStringCat (currentid, token->string);
310 vStringStripTrailing (currentid);
311 if (vStringLength (currentid) > 0)
313 vStringCopy (name->string, currentid);
314 makeBibTag (name, kind);
317 else
318 { // should find an identifier for bib item at first place
319 eof = true;
320 goto out;
324 out:
325 deleteToken (name);
326 vStringDelete (currentid);
327 return eof;
330 static bool mayParseTokenInSubparser (tokenInfo *const token)
332 bool eof = false;
333 subparser *sub;
335 if (*vStringValue (token->string) != '@')
336 return eof;
338 foreachSubparser (sub, true)
340 bibTexSubparser *bibsub = (bibTexSubparser *)sub;
341 if (bibsub->isKeywordForTagging)
343 int kind;
344 enterSubparser (sub);
345 kind = bibsub->isKeywordForTagging (bibsub,
346 vStringValue (token->string) + 1);
347 if (kind != KIND_GHOST_INDEX)
348 eof = parseTag (token, true, kind);
349 leaveSubparser ();
350 if (kind != KIND_GHOST_INDEX)
351 break;
355 return eof;
358 static void parseBibFile (tokenInfo *const token)
360 bool eof = false;
364 if (!readToken (token))
365 break;
367 bibKind kind = KIND_GHOST_INDEX;;
369 if (isType (token, TOKEN_KEYWORD))
371 switch (token->keyword)
373 case KEYWORD_article:
374 kind = BIBTAG_ARTICLE;
375 break;
376 case KEYWORD_book:
377 kind = BIBTAG_BOOK;
378 break;
379 case KEYWORD_booklet:
380 kind = BIBTAG_BOOKLET;
381 break;
382 case KEYWORD_conference:
383 kind = BIBTAG_CONFERENCE;
384 break;
385 case KEYWORD_inbook:
386 kind = BIBTAG_INBOOK;
387 break;
388 case KEYWORD_incollection:
389 kind = BIBTAG_INCOLLECTION;
390 break;
391 case KEYWORD_inproceedings:
392 kind = BIBTAG_INPROCEEDINGS;
393 break;
394 case KEYWORD_manual:
395 kind = BIBTAG_MANUAL;
396 break;
397 case KEYWORD_mastersthesis:
398 kind = BIBTAG_MASTERSTHESIS;
399 break;
400 case KEYWORD_misc:
401 kind = BIBTAG_MISC;
402 break;
403 case KEYWORD_phdthesis:
404 kind = BIBTAG_PHDTHESIS;
405 break;
406 case KEYWORD_proceedings:
407 kind = BIBTAG_PROCEEDINGS;
408 break;
409 case KEYWORD_string:
410 kind = BIBTAG_STRING;
411 break;
412 case KEYWORD_techreport:
413 kind = BIBTAG_TECHREPORT;
414 break;
415 case KEYWORD_unpublished:
416 kind = BIBTAG_UNPUBLISHED;
417 break;
421 if (kind != KIND_GHOST_INDEX)
422 eof = parseTag (token, false, kind);
423 else
424 eof = mayParseTokenInSubparser(token);
426 } while (!eof);
429 static void initialize (const langType language)
431 Lang_bib = language;
434 static void findBibTags (void)
436 tokenInfo *const token = newToken ();
438 parseBibFile (token);
440 deleteToken (token);
443 /* Create parser definition structure */
444 extern parserDefinition* BibtexParser (void)
446 Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
447 static const char *const extensions [] = { "bib", NULL };
448 parserDefinition *const def = parserNew ("BibTeX");
449 def->extensions = extensions;
451 * New definitions for parsing instead of regex
453 def->kindTable = BibKinds;
454 def->kindCount = ARRAY_SIZE (BibKinds);
455 def->parser = findBibTags;
456 def->initialize = initialize;
457 def->keywordTable = BibKeywordTable;
458 def->keywordCount = ARRAY_SIZE (BibKeywordTable);
459 return def;