2 * Copyright (c) 2008, David Fishburn
3 * Copyright (c) 2012, Jan Larres
4 * Copyright (c) 2019, Mirco Schönfeld
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating identifiers of entries of Bibtex language files.
11 * BibTex language "reference":
12 * https://en.wikipedia.org/wiki/BibTeX
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
34 #define isType(token,t) (bool) ((token)->type == (t))
35 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
36 #define isIdentChar(c) \
37 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':' || (c) == '.' || (c) == '/')
44 * Used to specify type of keyword.
53 KEYWORD_inproceedings
,
55 KEYWORD_mastersthesis
,
63 typedef int keywordId
; /* to allow KEYWORD_NONE */
66 /* 0..255 are the byte's value. Some are named for convenience */
67 TOKEN_OPEN_CURLY
= '{',
68 /* above is special types */
69 TOKEN_UNDEFINED
= 256,
73 typedef int tokenType
;
75 typedef struct sTokenInfo
{
79 unsigned long lineNumber
;
87 static langType Lang_bib
;
108 static kindDefinition BibKinds
[] = {
109 { true, 'a', "article", "article" },
110 { true, 'b', "book", "book" },
111 { true, 'B', "booklet", "booklet" },
112 { true, 'c', "conference", "conference" },
113 { true, 'i', "inbook", "inbook" },
114 { true, 'I', "incollection", "incollection" },
115 { true, 'j', "inproceedings", "inproceedings" },
116 { true, 'm', "manual", "manual" },
117 { true, 'M', "mastersthesis", "mastersthesis" },
118 { true, 'n', "misc", "misc" },
119 { true, 'p', "phdthesis", "phdthesis" },
120 { true, 'P', "proceedings", "proceedings" },
121 { true, 's', "string", "string" },
122 { true, 't', "techreport", "techreport" },
123 { true, 'u', "unpublished", "unpublished" }
126 static const keywordTable BibKeywordTable
[] = {
127 /* keyword keyword ID */
128 { "article", KEYWORD_article
},
129 { "book", KEYWORD_book
},
130 { "booklet", KEYWORD_booklet
},
131 { "conference", KEYWORD_conference
},
132 { "inbook", KEYWORD_inbook
},
133 { "incollection", KEYWORD_incollection
},
134 { "inproceedings",KEYWORD_inproceedings
},
135 { "manual", KEYWORD_manual
},
136 { "mastersthesis",KEYWORD_mastersthesis
},
137 { "misc", KEYWORD_misc
},
138 { "phdthesis", KEYWORD_phdthesis
},
139 { "proceedings", KEYWORD_proceedings
},
140 { "string", KEYWORD_string
},
141 { "techreport", KEYWORD_techreport
},
142 { "unpublished", KEYWORD_unpublished
}
146 * FUNCTION DEFINITIONS
149 static tokenInfo
*newToken (void)
151 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
153 token
->type
= TOKEN_UNDEFINED
;
154 token
->keyword
= KEYWORD_NONE
;
155 token
->string
= vStringNew ();
156 token
->lineNumber
= getInputLineNumber ();
157 token
->filePosition
= getInputFilePosition ();
162 static void deleteToken (tokenInfo
*const token
)
164 vStringDelete (token
->string
);
169 * Tag generation functions
171 static void makeBibTag (tokenInfo
*const token
, bibKind kind
)
173 const char *const name
= vStringValue (token
->string
);
175 initTagEntry (&e
, name
, kind
);
177 updateTagLine (&e
, token
->lineNumber
, token
->filePosition
);
187 * Read a C identifier beginning with "firstChar" and places it into
190 static void parseIdentifier (vString
*const string
, const int firstChar
)
193 Assert (isIdentChar (c
));
196 vStringPut (string
, c
);
197 c
= getcFromInputFile ();
198 } while (c
!= EOF
&& isIdentChar (c
));
200 ungetcToInputFile (c
); /* unget non-identifier character */
203 static bool readToken (tokenInfo
*const token
)
207 token
->type
= TOKEN_UNDEFINED
;
208 token
->keyword
= KEYWORD_NONE
;
209 vStringClear (token
->string
);
215 c
= getcFromInputFile ();
217 while (c
== '\t' || c
== ' ' || c
== '\n');
219 token
->lineNumber
= getInputLineNumber ();
220 token
->filePosition
= getInputFilePosition ();
222 token
->type
= (unsigned char) c
;
225 case EOF
: return false;
229 * All Bib entries start with an at symbol.
230 * Check if the next character is an alpha character
231 * else it is not a potential tex tag.
233 c
= getcFromInputFile ();
235 ungetcToInputFile (c
);
238 vStringPut (token
->string
, '@');
239 parseIdentifier (token
->string
, c
);
240 token
->keyword
= lookupCaseKeyword (vStringValue (token
->string
) + 1, Lang_bib
);
241 if (isKeyword (token
, KEYWORD_NONE
))
242 token
->type
= TOKEN_IDENTIFIER
;
244 token
->type
= TOKEN_KEYWORD
;
248 skipToCharacterInInputFile ('\n'); /* % are single line comments */
254 parseIdentifier (token
->string
, c
);
255 token
->type
= TOKEN_IDENTIFIER
;
262 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
264 dest
->lineNumber
= src
->lineNumber
;
265 dest
->filePosition
= src
->filePosition
;
266 dest
->type
= src
->type
;
267 dest
->keyword
= src
->keyword
;
268 vStringCopy (dest
->string
, src
->string
);
275 static bool parseTag (tokenInfo
*const token
, bool foreignKeyword
, int kind
)
277 tokenInfo
* const name
= newToken ();
281 currentid
= vStringNew ();
283 * Bib entries are of these formats:
284 * @article{identifier,
287 * When a keyword is found, loop through all words up to
288 * a comma brace for the tag name.
291 if (isType (token
, TOKEN_KEYWORD
) || foreignKeyword
)
293 copyToken (name
, token
);
294 if (!readToken (token
))
301 if (isType (token
, TOKEN_OPEN_CURLY
))
303 if (!readToken (token
))
308 if (isType (token
, TOKEN_IDENTIFIER
)){
309 vStringCat (currentid
, token
->string
);
310 vStringStripTrailing (currentid
);
311 if (vStringLength (currentid
) > 0)
313 vStringCopy (name
->string
, currentid
);
314 makeBibTag (name
, kind
);
318 { // should find an identifier for bib item at first place
326 vStringDelete (currentid
);
330 static bool mayParseTokenInSubparser (tokenInfo
*const token
)
335 if (*vStringValue (token
->string
) != '@')
338 foreachSubparser (sub
, true)
340 bibTexSubparser
*bibsub
= (bibTexSubparser
*)sub
;
341 if (bibsub
->isKeywordForTagging
)
344 enterSubparser (sub
);
345 kind
= bibsub
->isKeywordForTagging (bibsub
,
346 vStringValue (token
->string
) + 1);
347 if (kind
!= KIND_GHOST_INDEX
)
348 eof
= parseTag (token
, true, kind
);
350 if (kind
!= KIND_GHOST_INDEX
)
358 static void parseBibFile (tokenInfo
*const token
)
364 if (!readToken (token
))
367 bibKind kind
= KIND_GHOST_INDEX
;;
369 if (isType (token
, TOKEN_KEYWORD
))
371 switch (token
->keyword
)
373 case KEYWORD_article
:
374 kind
= BIBTAG_ARTICLE
;
379 case KEYWORD_booklet
:
380 kind
= BIBTAG_BOOKLET
;
382 case KEYWORD_conference
:
383 kind
= BIBTAG_CONFERENCE
;
386 kind
= BIBTAG_INBOOK
;
388 case KEYWORD_incollection
:
389 kind
= BIBTAG_INCOLLECTION
;
391 case KEYWORD_inproceedings
:
392 kind
= BIBTAG_INPROCEEDINGS
;
395 kind
= BIBTAG_MANUAL
;
397 case KEYWORD_mastersthesis
:
398 kind
= BIBTAG_MASTERSTHESIS
;
403 case KEYWORD_phdthesis
:
404 kind
= BIBTAG_PHDTHESIS
;
406 case KEYWORD_proceedings
:
407 kind
= BIBTAG_PROCEEDINGS
;
410 kind
= BIBTAG_STRING
;
412 case KEYWORD_techreport
:
413 kind
= BIBTAG_TECHREPORT
;
415 case KEYWORD_unpublished
:
416 kind
= BIBTAG_UNPUBLISHED
;
421 if (kind
!= KIND_GHOST_INDEX
)
422 eof
= parseTag (token
, false, kind
);
424 eof
= mayParseTokenInSubparser(token
);
429 static void initialize (const langType language
)
434 static void findBibTags (void)
436 tokenInfo
*const token
= newToken ();
438 parseBibFile (token
);
443 /* Create parser definition structure */
444 extern parserDefinition
* BibtexParser (void)
446 Assert (ARRAY_SIZE (BibKinds
) == BIBTAG_COUNT
);
447 static const char *const extensions
[] = { "bib", NULL
};
448 parserDefinition
*const def
= parserNew ("BibTeX");
449 def
->extensions
= extensions
;
451 * New definitions for parsing instead of regex
453 def
->kindTable
= BibKinds
;
454 def
->kindCount
= ARRAY_SIZE (BibKinds
);
455 def
->parser
= findBibTags
;
456 def
->initialize
= initialize
;
457 def
->keywordTable
= BibKeywordTable
;
458 def
->keywordCount
= ARRAY_SIZE (BibKeywordTable
);