indented BibTeX styles
[texmacs.git] / src / src / Data / Convert / BibTeX / parsebib.cpp
blob5bbea54432228e838c7181b19ba38b1fda303b94
2 /******************************************************************************
3 * MODULE : parsebib.cpp
4 * DESCRIPTION: conversion of bibtex strings into logical bibtex trees
5 * COPYRIGHT : (C) 2010 David MICHEL
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
12 #include "convert.hpp"
13 #include "analyze.hpp"
14 #include "list.hpp"
15 #include "tree_traverse.hpp"
16 #include "Bibtex/bibtex_functions.hpp"
18 bool
19 bib_ok (string s, int pos) {
20 return 0 <= pos && pos < N(s);
23 void
24 bib_char (string s, int& pos, char c) {
25 if (!bib_ok (s, pos)) return;
26 if (s[pos] == c) pos++;
27 else {
28 cerr << "TeXmacs] Error: invalid BibTeX file.\n";
29 if (c) cerr << "TeXmacs] Invalid char: \'" << s[pos]
30 << "\', expected \'" << c << "\'\n";
31 pos= -1;
35 bool
36 bib_is_in (char c, string cs) {
37 int i= 0;
38 while (i < N(cs) && cs[i] != c) i++;
39 return i != N(cs);
42 void
43 bib_blank (string s, int& pos) {
44 if (!bib_ok (s, pos)) return;
45 string cs= " \t\n\r";
46 while (bib_ok (s, pos) && bib_is_in (s[pos], cs)) pos++;
49 void
50 bib_within (string s, int& pos, char cbegin, char cend, string& content) {
51 if (!bib_ok (s, pos)) return;
52 int depth= 0;
53 bib_char (s, pos, cbegin);
54 while (bib_ok (s, pos) && (s[pos] != cend || depth > 0)) {
55 if (cbegin != cend) {
56 if (s[pos] == cbegin) depth++;
57 else if (s[pos] == cend) depth--;
59 if (s[pos] == '\\' && bib_ok (s, pos+1)) {
60 content << '\\';
61 pos++;
63 content << s[pos];
64 pos++;
66 bib_char (s, pos, cend);
69 void
70 bib_until (string s, int& pos, string cs, string& content) {
71 if (!bib_ok (s, pos)) return;
72 while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
73 content << s[pos];
74 pos++;
78 void
79 bib_comment (string s, int& pos, tree& t) {
80 if (!bib_ok (s, pos)) return;
81 string content;
82 while (bib_ok (s, pos) && s[pos] == '%') {
83 bib_char (s, pos, '%');
84 while (bib_ok (s, pos) && s[pos] != '\n') {
85 content << s[pos];
86 pos++;
88 t << compound ("bib-line", content);
89 content= "";
90 pos++;
94 void bib_atomic_arg (string s, int& pos, string ce, tree& a) {
95 if (!bib_ok (s, pos)) return;
96 string sa;
97 string f, v, j, l;
98 switch (s[pos]) {
99 case '\"': {
100 bib_within (s, pos, '\"', '\"', sa);
101 a= sa;
102 break;
104 case '{': {
105 bib_within (s, pos, '{', '}', sa);
106 a= sa;
107 break;
109 default: {
110 string cs= ", \t\n\r";
111 cs << ce;
112 if (!is_digit (s[pos])) {
113 bib_until (s, pos, cs, sa);
114 a= compound ("bib-var", sa);
116 else {
117 bib_until (s, pos, cs, sa);
118 a= sa;
120 break;
125 void
126 bib_arg (string s, int& pos, string ce, tree& arg) {
127 if (!bib_ok (s, pos)) return;
128 string cs= ",";
129 cs << ce;
130 while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
131 tree a;
132 bib_atomic_arg (s, pos, ce, a);
133 arg << a;
134 bib_blank (s, pos);
135 if (bib_ok (s, pos) && s[pos] == '#') pos++;
136 bib_blank (s, pos);
140 void
141 bib_fields (string s, int& pos, string ce, string tag, tree& fields) {
142 if (!bib_ok (s, pos)) return;
143 int savpos;
144 bib_blank (s, pos);
145 while (bib_ok (s, pos) && s[pos] == ',') {
146 pos++;
147 bib_blank (s, pos);
149 while (bib_ok (s, pos) && !bib_is_in (s[pos], ce)) {
150 savpos= pos;
151 string param;
152 tree arg (CONCAT);
153 bib_until (s, pos, string ("={( \t\n\r"), param);
154 if (bib_ok (s, pos) && (s[pos]=='{' || s[pos]=='(')) {
155 pos= savpos;
156 return;
158 bib_blank (s, pos);
159 bib_char (s, pos, '=');
160 bib_blank (s, pos);
161 bib_arg (s, pos, ce, arg);
162 if (tag == "bib-field") param= locase_all (param);
163 arg= simplify_correct (arg);
164 fields << compound (tag, param, arg);
165 bib_blank (s, pos);
166 while (bib_ok (s, pos) && s[pos] == ',') {
167 pos++;
168 bib_blank (s, pos);
173 void
174 bib_string (string s, int& pos, tree& t) {
175 if (!bib_ok (s, pos)) return;
176 tree fields= tree (DOCUMENT);
177 string cs= ", \t\n\r";
178 char cend;
179 switch (s[pos]) {
180 case '{': cend= '}'; break;
181 case '(': cend= ')'; break;
182 default: pos= -1; return;
184 pos++;
185 cs << cend;
186 bib_blank (s, pos);
187 string ce;
188 ce << cend;
189 bib_fields (s, pos, ce, string ("bib-assign"), fields);
190 bib_blank (s, pos);
191 bib_char (s, pos, cend);
192 t << A (fields);
195 void
196 bib_preamble (string s, int& pos, tree& t) {
197 if (!bib_ok (s, pos)) return;
198 string cs= ",";
199 char cend;
200 switch (s[pos]) {
201 case '{': cend= '}'; break;
202 case '(': cend= ')'; break;
203 default: pos= -1; return;
205 pos++;
206 cs << cend;
207 bib_blank (s, pos);
208 while (bib_ok (s, pos) && s[pos] == ',') {
209 pos++;
210 bib_blank (s, pos);
212 while (bib_ok (s, pos) && s[pos] != cend) {
213 bib_blank (s, pos);
214 tree arg (CONCAT);
215 bib_arg (s, pos, cs, arg);
216 arg= simplify_correct (arg);
217 t << compound ("bib-latex", arg);
218 bib_blank (s, pos);
219 while (bib_ok (s, pos) && s[pos] == ',') {
220 pos++;
221 bib_blank (s, pos);
224 bib_blank (s, pos);
225 bib_char (s, pos, cend);
228 void
229 bib_entry (string s, int& pos, tree type, tree& t) {
230 if (!bib_ok (s, pos)) return;
231 tree entry;
232 tree fields= tree (DOCUMENT);
233 string cs= ", \t\n\r";
234 char cend;
235 switch (s[pos]) {
236 case '{': cend= '}'; break;
237 case '(': cend= ')'; break;
238 default: pos= -1; return;
240 pos++;
241 cs << cend;
242 bib_blank (s, pos);
243 string tag;
244 bib_until (s, pos, cs, tag);
245 bib_blank (s, pos);
246 string ce;
247 ce << cend;
248 bib_fields (s, pos, ce, string ("bib-field"), fields);
249 bib_blank (s, pos);
250 bib_char (s, pos, cend);
251 entry= compound ("bib-entry");
252 entry << type << tag << fields;
253 t << entry;
256 void
257 bib_list (string s, int& pos, tree& t) {
258 if (!bib_ok (s, pos)) return;
259 tree tentry (DOCUMENT);
260 tree tpreamble (DOCUMENT);
261 tree tstring (DOCUMENT);
262 string type;
263 bool comment= true;
264 int savpos;
265 while (bib_ok (s, pos)) {
266 bib_blank (s, pos);
267 if (!bib_ok (s, pos)) break;
268 switch (s[pos]) {
269 case '%': {
270 tree tc= tree (DOCUMENT);
271 bib_comment (s, pos, tc);
272 tentry << compound ("bib-comment", tc);
273 break;
275 case '@': {
276 pos++;
277 comment= false;
279 default: {
280 bib_blank (s, pos);
281 savpos= pos;
282 type= "";
283 bib_until (s, pos, string ("{(= \t\n\r"), type);
284 bib_blank (s, pos);
285 if (bib_ok (s, pos) && s[pos] == '=') {
286 tree fields (DOCUMENT);
287 pos= savpos;
288 bib_fields (s, pos, string (")}@"), string ("bib-field"), tentry);
289 bib_blank (s, pos);
290 if (bib_ok (s, pos) && (s[pos]==')' || s[pos]=='}')) {
291 if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
292 if (N(tstring) != 0) t << compound ("bib-string", tstring);
293 t << A(tentry);
294 return;
297 else {
298 string stype= locase_all (type);
299 if (stype == "string") {
300 tree ts;
301 if (comment) ts= tree (DOCUMENT);
302 else ts= tstring;
303 bib_string (s, pos, ts);
304 if (comment) {
305 if (N(ts) == 1) tstring << compound ("bib-comment", ts[0]);
306 else tstring << compound ("bib-comment", ts);
309 else if (stype == "preamble") {
310 tree tp;
311 if (comment) tp= tree (DOCUMENT);
312 else tp= tpreamble;
313 bib_preamble (s, pos, tp);
314 if (comment) {
315 if (N(tp) == 1) tpreamble << compound ("bib-comment", tp[0]);
316 else tpreamble << compound ("bib-comment", tp);
319 else {
320 tree te;
321 if (comment) te= tree (DOCUMENT);
322 else te= tentry;
323 bib_entry (s, pos, stype, te);
324 if (comment) {
325 if (N(te) == 1) tentry << compound ("bib-comment", te[0]);
326 else tentry << compound ("bib-comment", te);
329 comment= true;
331 break;
335 // cerr << "ENTRIES: " << tentry << "\n";
336 // cerr << "PREAMBLE: " << tpreamble << "\n";
337 // cerr << "STRING: " << tstring << "\n";
338 // if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
339 // if (N(tstring) != 0) t << compound ("bib-string", tstring);
340 // t << A(tentry);
341 hashmap<string,string> dict=
342 bib_strings_dict (tree (DOCUMENT, compound ("bib-string", tstring)));
343 t << A(bib_subst_vars (tentry, dict));
344 bib_parse_fields (t);
347 tree
348 parse_bib (string s) {
349 int pos= 0;
350 tree r (DOCUMENT);
351 bib_list (s, pos, r);
352 if (N(s) == 0 || N(r) == 0) return tree ();
353 if (pos < 0) {
354 cerr << "TeXmacs] Error: failed to load BibTeX file.\n";
355 return tree ();
357 return r;