2 * Copyright (c) 2008, David Fishburn
3 * Copyright (c) 2012, Jan Larres
4 * Copyright (c) 2019, Mirco Schönfeld
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating identifiers of entries of Bibtex language files.
11 * BibTex language "reference":
12 * https://en.wikipedia.org/wiki/BibTeX
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
33 #define isType(token,t) (bool) ((token)->type == (t))
34 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
35 #define isIdentChar(c) \
36 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':')
43 * Used to specify type of keyword.
52 KEYWORD_inproceedings
,
54 KEYWORD_mastersthesis
,
62 typedef int keywordId
; /* to allow KEYWORD_NONE */
65 /* 0..255 are the byte's value. Some are named for convenience */
66 TOKEN_OPEN_CURLY
= '{',
67 /* above is special types */
68 TOKEN_UNDEFINED
= 256,
72 typedef int tokenType
;
74 typedef struct sTokenInfo
{
78 unsigned long lineNumber
;
86 static langType Lang_bib
;
107 static kindDefinition BibKinds
[] = {
108 { true, 'a', "article", "article" },
109 { true, 'b', "book", "book" },
110 { true, 'B', "booklet", "booklet" },
111 { true, 'c', "conference", "conference" },
112 { true, 'i', "inbook", "inbook" },
113 { true, 'I', "incollection", "incollection" },
114 { true, 'j', "inproceedings", "inproceedings" },
115 { true, 'm', "manual", "manual" },
116 { true, 'M', "mastersthesis", "mastersthesis" },
117 { true, 'n', "misc", "misc" },
118 { true, 'p', "phdthesis", "phdthesis" },
119 { true, 'P', "proceedings", "proceedings" },
120 { true, 's', "string", "string" },
121 { true, 't', "techreport", "techreport" },
122 { true, 'u', "unpublished", "unpublished" }
125 static const keywordTable BibKeywordTable
[] = {
126 /* keyword keyword ID */
127 { "article", KEYWORD_article
},
128 { "book", KEYWORD_book
},
129 { "booklet", KEYWORD_booklet
},
130 { "conference", KEYWORD_conference
},
131 { "inbook", KEYWORD_inbook
},
132 { "incollection", KEYWORD_incollection
},
133 { "inproceedings",KEYWORD_inproceedings
},
134 { "manual", KEYWORD_manual
},
135 { "mastersthesis",KEYWORD_mastersthesis
},
136 { "misc", KEYWORD_misc
},
137 { "phdthesis", KEYWORD_phdthesis
},
138 { "proceedings", KEYWORD_proceedings
},
139 { "string", KEYWORD_string
},
140 { "techreport", KEYWORD_techreport
},
141 { "unpublished", KEYWORD_unpublished
}
145 * FUNCTION DEFINITIONS
148 static tokenInfo
*newToken (void)
150 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
152 token
->type
= TOKEN_UNDEFINED
;
153 token
->keyword
= KEYWORD_NONE
;
154 token
->string
= vStringNew ();
155 token
->lineNumber
= getInputLineNumber ();
156 token
->filePosition
= getInputFilePosition ();
161 static void deleteToken (tokenInfo
*const token
)
163 vStringDelete (token
->string
);
168 * Tag generation functions
170 static void makeBibTag (tokenInfo
*const token
, bibKind kind
)
172 if (BibKinds
[kind
].enabled
)
174 const char *const name
= vStringValue (token
->string
);
176 initTagEntry (&e
, name
, kind
);
178 e
.lineNumber
= token
->lineNumber
;
179 e
.filePosition
= token
->filePosition
;
190 * Read a C identifier beginning with "firstChar" and places it into
193 static void parseIdentifier (vString
*const string
, const int firstChar
)
196 Assert (isIdentChar (c
));
199 vStringPut (string
, c
);
200 c
= getcFromInputFile ();
201 } while (c
!= EOF
&& isIdentChar (c
));
203 ungetcToInputFile (c
); /* unget non-identifier character */
206 static bool readToken (tokenInfo
*const token
)
210 token
->type
= TOKEN_UNDEFINED
;
211 token
->keyword
= KEYWORD_NONE
;
212 vStringClear (token
->string
);
218 c
= getcFromInputFile ();
220 while (c
== '\t' || c
== ' ' || c
== '\n');
222 token
->lineNumber
= getInputLineNumber ();
223 token
->filePosition
= getInputFilePosition ();
225 token
->type
= (unsigned char) c
;
228 case EOF
: return false;
232 * All Bib entries start with an at symbol.
233 * Check if the next character is an alpha character
234 * else it is not a potential tex tag.
236 c
= getcFromInputFile ();
238 ungetcToInputFile (c
);
241 vStringPut (token
->string
, '@');
242 parseIdentifier (token
->string
, c
);
243 token
->keyword
= lookupCaseKeyword (vStringValue (token
->string
) + 1, Lang_bib
);
244 if (isKeyword (token
, KEYWORD_NONE
))
245 token
->type
= TOKEN_IDENTIFIER
;
247 token
->type
= TOKEN_KEYWORD
;
251 skipToCharacterInInputFile ('\n'); /* % are single line comments */
257 parseIdentifier (token
->string
, c
);
258 token
->type
= TOKEN_IDENTIFIER
;
265 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
267 dest
->lineNumber
= src
->lineNumber
;
268 dest
->filePosition
= src
->filePosition
;
269 dest
->type
= src
->type
;
270 dest
->keyword
= src
->keyword
;
271 vStringCopy (dest
->string
, src
->string
);
278 static bool parseTag (tokenInfo
*const token
, bibKind kind
)
280 tokenInfo
* const name
= newToken ();
284 currentid
= vStringNew ();
286 * Bib entries are of these formats:
287 * @article{identifier,
290 * When a keyword is found, loop through all words up to
291 * a comma brace for the tag name.
294 if (isType (token
, TOKEN_KEYWORD
))
296 copyToken (name
, token
);
297 if (!readToken (token
))
304 if (isType (token
, TOKEN_OPEN_CURLY
))
306 if (!readToken (token
))
311 if (isType (token
, TOKEN_IDENTIFIER
)){
312 vStringCat (currentid
, token
->string
);
313 vStringStripTrailing (currentid
);
314 if (vStringLength (currentid
) > 0)
316 vStringCopy (name
->string
, currentid
);
317 makeBibTag (name
, kind
);
321 { // should find an identifier for bib item at first place
329 vStringDelete (currentid
);
333 static void parseBibFile (tokenInfo
*const token
)
339 if (!readToken (token
))
342 if (isType (token
, TOKEN_KEYWORD
))
344 switch (token
->keyword
)
346 case KEYWORD_article
:
347 eof
= parseTag (token
, BIBTAG_ARTICLE
);
350 eof
= parseTag (token
, BIBTAG_BOOK
);
352 case KEYWORD_booklet
:
353 eof
= parseTag (token
, BIBTAG_BOOKLET
);
355 case KEYWORD_conference
:
356 eof
= parseTag (token
, BIBTAG_CONFERENCE
);
359 eof
= parseTag (token
, BIBTAG_INBOOK
);
361 case KEYWORD_incollection
:
362 eof
= parseTag (token
, BIBTAG_INCOLLECTION
);
364 case KEYWORD_inproceedings
:
365 eof
= parseTag (token
, BIBTAG_INPROCEEDINGS
);
368 eof
= parseTag (token
, BIBTAG_MANUAL
);
370 case KEYWORD_mastersthesis
:
371 eof
= parseTag (token
, BIBTAG_MASTERSTHESIS
);
374 eof
= parseTag (token
, BIBTAG_MISC
);
376 case KEYWORD_phdthesis
:
377 eof
= parseTag (token
, BIBTAG_PHDTHESIS
);
379 case KEYWORD_proceedings
:
380 eof
= parseTag (token
, BIBTAG_PROCEEDINGS
);
383 eof
= parseTag (token
, BIBTAG_STRING
);
385 case KEYWORD_techreport
:
386 eof
= parseTag (token
, BIBTAG_TECHREPORT
);
388 case KEYWORD_unpublished
:
389 eof
= parseTag (token
, BIBTAG_UNPUBLISHED
);
400 static void initialize (const langType language
)
405 static void findBibTags (void)
407 tokenInfo
*const token
= newToken ();
409 parseBibFile (token
);
414 /* Create parser definition structure */
415 extern parserDefinition
* BibtexParser (void)
417 Assert (ARRAY_SIZE (BibKinds
) == BIBTAG_COUNT
);
418 static const char *const extensions
[] = { "bib", NULL
};
419 parserDefinition
*const def
= parserNew ("BibTeX");
420 def
->extensions
= extensions
;
422 * New definitions for parsing instead of regex
424 def
->kindTable
= BibKinds
;
425 def
->kindCount
= ARRAY_SIZE (BibKinds
);
426 def
->parser
= findBibTags
;
427 def
->initialize
= initialize
;
428 def
->keywordTable
= BibKeywordTable
;
429 def
->keywordCount
= ARRAY_SIZE (BibKeywordTable
);