Use g_*list_free_full() instead of g_*list_foreach()
[geany-mirror.git] / ctags / parsers / bibtex.c
blob088481260f196daefb63cff1ce2629089d325f91
1 /*
2 * Copyright (c) 2008, David Fishburn
3 * Copyright (c) 2012, Jan Larres
4 * Copyright (c) 2019, Mirco Schönfeld
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating identifiers of entries of Bibtex language files.
11 * BibTex language "reference":
12 * https://en.wikipedia.org/wiki/BibTeX
16 * INCLUDE FILES
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
20 #include <string.h>
22 #include "debug.h"
23 #include "entry.h"
24 #include "keyword.h"
25 #include "parse.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
31 * MACROS
33 #define isType(token,t) (bool) ((token)->type == (t))
34 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
35 #define isIdentChar(c) \
36 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':')
39 * DATA DECLARATIONS
43 * Used to specify type of keyword.
45 enum eKeywordId {
46 KEYWORD_article,
47 KEYWORD_book,
48 KEYWORD_booklet,
49 KEYWORD_conference,
50 KEYWORD_inbook,
51 KEYWORD_incollection,
52 KEYWORD_inproceedings,
53 KEYWORD_manual,
54 KEYWORD_mastersthesis,
55 KEYWORD_misc,
56 KEYWORD_phdthesis,
57 KEYWORD_proceedings,
58 KEYWORD_string,
59 KEYWORD_techreport,
60 KEYWORD_unpublished
62 typedef int keywordId; /* to allow KEYWORD_NONE */
64 enum eTokenType {
65 /* 0..255 are the byte's value. Some are named for convenience */
66 TOKEN_OPEN_CURLY = '{',
67 /* above is special types */
68 TOKEN_UNDEFINED = 256,
69 TOKEN_KEYWORD,
70 TOKEN_IDENTIFIER
72 typedef int tokenType;
74 typedef struct sTokenInfo {
75 tokenType type;
76 keywordId keyword;
77 vString * string;
78 unsigned long lineNumber;
79 MIOPos filePosition;
80 } tokenInfo;
83 * DATA DEFINITIONS
86 static langType Lang_bib;
88 typedef enum {
89 BIBTAG_ARTICLE,
90 BIBTAG_BOOK,
91 BIBTAG_BOOKLET,
92 BIBTAG_CONFERENCE,
93 BIBTAG_INBOOK,
94 BIBTAG_INCOLLECTION,
95 BIBTAG_INPROCEEDINGS,
96 BIBTAG_MANUAL,
97 BIBTAG_MASTERSTHESIS,
98 BIBTAG_MISC,
99 BIBTAG_PHDTHESIS,
100 BIBTAG_PROCEEDINGS,
101 BIBTAG_STRING,
102 BIBTAG_TECHREPORT,
103 BIBTAG_UNPUBLISHED,
104 BIBTAG_COUNT
105 } bibKind;
107 static kindDefinition BibKinds [] = {
108 { true, 'a', "article", "article" },
109 { true, 'b', "book", "book" },
110 { true, 'B', "booklet", "booklet" },
111 { true, 'c', "conference", "conference" },
112 { true, 'i', "inbook", "inbook" },
113 { true, 'I', "incollection", "incollection" },
114 { true, 'j', "inproceedings", "inproceedings" },
115 { true, 'm', "manual", "manual" },
116 { true, 'M', "mastersthesis", "mastersthesis" },
117 { true, 'n', "misc", "misc" },
118 { true, 'p', "phdthesis", "phdthesis" },
119 { true, 'P', "proceedings", "proceedings" },
120 { true, 's', "string", "string" },
121 { true, 't', "techreport", "techreport" },
122 { true, 'u', "unpublished", "unpublished" }
125 static const keywordTable BibKeywordTable [] = {
126 /* keyword keyword ID */
127 { "article", KEYWORD_article },
128 { "book", KEYWORD_book },
129 { "booklet", KEYWORD_booklet },
130 { "conference", KEYWORD_conference },
131 { "inbook", KEYWORD_inbook },
132 { "incollection", KEYWORD_incollection },
133 { "inproceedings",KEYWORD_inproceedings },
134 { "manual", KEYWORD_manual },
135 { "mastersthesis",KEYWORD_mastersthesis },
136 { "misc", KEYWORD_misc },
137 { "phdthesis", KEYWORD_phdthesis },
138 { "proceedings", KEYWORD_proceedings },
139 { "string", KEYWORD_string },
140 { "techreport", KEYWORD_techreport },
141 { "unpublished", KEYWORD_unpublished }
145 * FUNCTION DEFINITIONS
148 static tokenInfo *newToken (void)
150 tokenInfo *const token = xMalloc (1, tokenInfo);
152 token->type = TOKEN_UNDEFINED;
153 token->keyword = KEYWORD_NONE;
154 token->string = vStringNew ();
155 token->lineNumber = getInputLineNumber ();
156 token->filePosition = getInputFilePosition ();
158 return token;
161 static void deleteToken (tokenInfo *const token)
163 vStringDelete (token->string);
164 eFree (token);
168 * Tag generation functions
170 static void makeBibTag (tokenInfo *const token, bibKind kind)
172 if (BibKinds [kind].enabled)
174 const char *const name = vStringValue (token->string);
175 tagEntryInfo e;
176 initTagEntry (&e, name, kind);
178 e.lineNumber = token->lineNumber;
179 e.filePosition = token->filePosition;
181 makeTagEntry (&e);
186 * Parsing functions
190 * Read a C identifier beginning with "firstChar" and places it into
191 * "name".
193 static void parseIdentifier (vString *const string, const int firstChar)
195 int c = firstChar;
196 Assert (isIdentChar (c));
199 vStringPut (string, c);
200 c = getcFromInputFile ();
201 } while (c != EOF && isIdentChar (c));
202 if (c != EOF)
203 ungetcToInputFile (c); /* unget non-identifier character */
206 static bool readToken (tokenInfo *const token)
208 int c;
210 token->type = TOKEN_UNDEFINED;
211 token->keyword = KEYWORD_NONE;
212 vStringClear (token->string);
214 getNextChar:
218 c = getcFromInputFile ();
220 while (c == '\t' || c == ' ' || c == '\n');
222 token->lineNumber = getInputLineNumber ();
223 token->filePosition = getInputFilePosition ();
225 token->type = (unsigned char) c;
226 switch (c)
228 case EOF: return false;
230 case '@':
232 * All Bib entries start with an at symbol.
233 * Check if the next character is an alpha character
234 * else it is not a potential tex tag.
236 c = getcFromInputFile ();
237 if (! isalpha (c))
238 ungetcToInputFile (c);
239 else
241 vStringPut (token->string, '@');
242 parseIdentifier (token->string, c);
243 token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
244 if (isKeyword (token, KEYWORD_NONE))
245 token->type = TOKEN_IDENTIFIER;
246 else
247 token->type = TOKEN_KEYWORD;
249 break;
250 case '%':
251 skipToCharacterInInputFile ('\n'); /* % are single line comments */
252 goto getNextChar;
253 break;
254 default:
255 if (isIdentChar (c))
257 parseIdentifier (token->string, c);
258 token->type = TOKEN_IDENTIFIER;
260 break;
262 return true;
265 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
267 dest->lineNumber = src->lineNumber;
268 dest->filePosition = src->filePosition;
269 dest->type = src->type;
270 dest->keyword = src->keyword;
271 vStringCopy (dest->string, src->string);
275 * Scanning functions
278 static bool parseTag (tokenInfo *const token, bibKind kind)
280 tokenInfo * const name = newToken ();
281 vString * currentid;
282 bool eof = false;
284 currentid = vStringNew ();
286 * Bib entries are of these formats:
287 * @article{identifier,
288 * author="John Doe"}
290 * When a keyword is found, loop through all words up to
291 * a comma brace for the tag name.
294 if (isType (token, TOKEN_KEYWORD))
296 copyToken (name, token);
297 if (!readToken (token))
299 eof = true;
300 goto out;
304 if (isType (token, TOKEN_OPEN_CURLY))
306 if (!readToken (token))
308 eof = true;
309 goto out;
311 if (isType (token, TOKEN_IDENTIFIER)){
312 vStringCat (currentid, token->string);
313 vStringStripTrailing (currentid);
314 if (vStringLength (currentid) > 0)
316 vStringCopy (name->string, currentid);
317 makeBibTag (name, kind);
320 else
321 { // should find an identifier for bib item at first place
322 eof = true;
323 goto out;
327 out:
328 deleteToken (name);
329 vStringDelete (currentid);
330 return eof;
333 static void parseBibFile (tokenInfo *const token)
335 bool eof = false;
339 if (!readToken (token))
340 break;
342 if (isType (token, TOKEN_KEYWORD))
344 switch (token->keyword)
346 case KEYWORD_article:
347 eof = parseTag (token, BIBTAG_ARTICLE);
348 break;
349 case KEYWORD_book:
350 eof = parseTag (token, BIBTAG_BOOK);
351 break;
352 case KEYWORD_booklet:
353 eof = parseTag (token, BIBTAG_BOOKLET);
354 break;
355 case KEYWORD_conference:
356 eof = parseTag (token, BIBTAG_CONFERENCE);
357 break;
358 case KEYWORD_inbook:
359 eof = parseTag (token, BIBTAG_INBOOK);
360 break;
361 case KEYWORD_incollection:
362 eof = parseTag (token, BIBTAG_INCOLLECTION);
363 break;
364 case KEYWORD_inproceedings:
365 eof = parseTag (token, BIBTAG_INPROCEEDINGS);
366 break;
367 case KEYWORD_manual:
368 eof = parseTag (token, BIBTAG_MANUAL);
369 break;
370 case KEYWORD_mastersthesis:
371 eof = parseTag (token, BIBTAG_MASTERSTHESIS);
372 break;
373 case KEYWORD_misc:
374 eof = parseTag (token, BIBTAG_MISC);
375 break;
376 case KEYWORD_phdthesis:
377 eof = parseTag (token, BIBTAG_PHDTHESIS);
378 break;
379 case KEYWORD_proceedings:
380 eof = parseTag (token, BIBTAG_PROCEEDINGS);
381 break;
382 case KEYWORD_string:
383 eof = parseTag (token, BIBTAG_STRING);
384 break;
385 case KEYWORD_techreport:
386 eof = parseTag (token, BIBTAG_TECHREPORT);
387 break;
388 case KEYWORD_unpublished:
389 eof = parseTag (token, BIBTAG_UNPUBLISHED);
390 break;
391 default:
392 break;
395 if (eof)
396 break;
397 } while (true);
400 static void initialize (const langType language)
402 Lang_bib = language;
405 static void findBibTags (void)
407 tokenInfo *const token = newToken ();
409 parseBibFile (token);
411 deleteToken (token);
414 /* Create parser definition structure */
415 extern parserDefinition* BibtexParser (void)
417 Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
418 static const char *const extensions [] = { "bib", NULL };
419 parserDefinition *const def = parserNew ("BibTeX");
420 def->extensions = extensions;
422 * New definitions for parsing instead of regex
424 def->kindTable = BibKinds;
425 def->kindCount = ARRAY_SIZE (BibKinds);
426 def->parser = findBibTags;
427 def->initialize = initialize;
428 def->keywordTable = BibKeywordTable;
429 def->keywordCount = ARRAY_SIZE (BibKeywordTable);
430 return def;