git-svn make executable
[texmacs.git] / src / src / Data / Convert / BibTeX / parsebib.cpp
blob58729fa4106227128a47c1c8f0895e64d017c0bb
2 /******************************************************************************
3 * MODULE : parsebib.cpp
4 * DESCRIPTION: conversion of bibtex strings into logical bibtex trees
5 * COPYRIGHT : (C) 2010 David MICHEL
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
12 #include "convert.hpp"
13 #include "analyze.hpp"
14 #include "list.hpp"
15 #include "tree_traverse.hpp"
16 #include "Bibtex/bibtex_functions.hpp"
18 bool
19 bib_ok (string s, int pos) {
20 return 0 <= pos && pos < N(s);
23 void
24 bib_char (string s, int& pos, char c) {
25 if (!bib_ok (s, pos)) return;
26 if (s[pos] == c) pos++;
27 else {
28 cerr << "TeXmacs] Error: invalid BibTeX file.\n";
29 if (c) cerr << "TeXmacs] Invalid char: \'" << s[pos]
30 << "\', expected \'" << c << "\'\n";
31 pos= -1;
35 bool
36 bib_is_in (char c, string cs) {
37 int i= 0;
38 while (i < N(cs) && cs[i] != c) i++;
39 return i != N(cs);
42 void
43 bib_blank (string s, int& pos) {
44 if (!bib_ok (s, pos)) return;
45 string cs= " \t\n\r";
46 while (bib_ok (s, pos) && bib_is_in (s[pos], cs)) pos++;
49 void
50 bib_within (string s, int& pos, char cbegin, char cend, string& content) {
51 if (!bib_ok (s, pos)) return;
52 int depth= 0;
53 bib_char (s, pos, cbegin);
54 while (bib_ok (s, pos) && (s[pos] != cend || depth > 0)) {
55 if (cbegin != cend) {
56 if (s[pos] == cbegin) depth++;
57 else if (s[pos] == cend) depth--;
59 if (s[pos] == '\\' && bib_ok (s, pos+1)) {
60 content << '\\';
61 pos++;
63 content << s[pos];
64 pos++;
66 bib_char (s, pos, cend);
69 void
70 bib_until (string s, int& pos, string cs, string& content) {
71 if (!bib_ok (s, pos)) return;
72 while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
73 content << s[pos];
74 pos++;
78 void
79 bib_comment (string s, int& pos, tree& t) {
80 if (!bib_ok (s, pos)) return;
81 string content;
82 while (bib_ok (s, pos) && s[pos] == '%') {
83 bib_char (s, pos, '%');
84 while (bib_ok (s, pos) && s[pos] != '\n') {
85 content << s[pos];
86 pos++;
88 t << compound ("bib-line", content);
89 content= "";
90 pos++;
94 void bib_atomic_arg (string s, int& pos, string ce, tree& a) {
95 if (!bib_ok (s, pos)) return;
96 string sa;
97 string f, v, j, l;
98 switch (s[pos]) {
99 case '\"': {
100 bib_within (s, pos, '\"', '\"', sa);
101 a= sa;
102 break;
104 case '{': {
105 bib_within (s, pos, '{', '}', sa);
106 a= sa;
107 break;
109 default: {
110 string cs= ", \t\n\r";
111 cs << ce;
112 if (!is_digit (s[pos])) {
113 bib_until (s, pos, cs, sa);
114 a= compound ("bib-var", sa);
116 else {
117 bib_until (s, pos, cs, sa);
118 a= sa;
120 break;
125 void
126 bib_arg (string s, int& pos, string ce, tree& arg) {
127 if (!bib_ok (s, pos)) return;
128 string cs= ",";
129 cs << ce;
130 while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
131 tree a;
132 bib_atomic_arg (s, pos, ce, a);
133 arg << a;
134 bib_blank (s, pos);
135 if (bib_ok (s, pos) && s[pos] == '#') {
136 pos++;
137 bib_blank (s, pos);
139 else break;
143 void
144 bib_fields (string s, int& pos, string ce, string tag, tree& fields) {
145 if (!bib_ok (s, pos)) return;
146 int savpos;
147 bib_blank (s, pos);
148 while (bib_ok (s, pos) && s[pos] == ',') {
149 pos++;
150 bib_blank (s, pos);
152 while (bib_ok (s, pos) && !bib_is_in (s[pos], ce)) {
153 savpos= pos;
154 string param;
155 tree arg (CONCAT);
156 bib_until (s, pos, string ("={( \t\n\r"), param);
157 if (bib_ok (s, pos) && (s[pos]=='{' || s[pos]=='(')) {
158 pos= savpos;
159 return;
161 bib_blank (s, pos);
162 bib_char (s, pos, '=');
163 bib_blank (s, pos);
164 bib_arg (s, pos, ce, arg);
165 if (tag == "bib-field") param= locase_all (param);
166 arg= simplify_correct (arg);
167 fields << compound (tag, param, arg);
168 bib_blank (s, pos);
169 string cend= ce;
170 cend << ",";
171 while (bib_ok (s, pos) && !bib_is_in (s[pos], cend)) pos++;
172 while (bib_ok (s, pos) && s[pos] == ',') {
173 pos++;
174 bib_blank (s, pos);
179 void
180 bib_string (string s, int& pos, tree& t) {
181 if (!bib_ok (s, pos)) return;
182 tree fields= tree (DOCUMENT);
183 string cs= ", \t\n\r";
184 char cend;
185 switch (s[pos]) {
186 case '{': cend= '}'; break;
187 case '(': cend= ')'; break;
188 default: pos= -1; return;
190 pos++;
191 cs << cend;
192 bib_blank (s, pos);
193 string ce;
194 ce << cend;
195 bib_fields (s, pos, ce, string ("bib-assign"), fields);
196 bib_blank (s, pos);
197 bib_char (s, pos, cend);
198 t << A (fields);
201 void
202 bib_preamble (string s, int& pos, tree& t) {
203 if (!bib_ok (s, pos)) return;
204 string cs= ",";
205 char cend;
206 switch (s[pos]) {
207 case '{': cend= '}'; break;
208 case '(': cend= ')'; break;
209 default: pos= -1; return;
211 pos++;
212 cs << cend;
213 bib_blank (s, pos);
214 while (bib_ok (s, pos) && s[pos] == ',') {
215 pos++;
216 bib_blank (s, pos);
218 while (bib_ok (s, pos) && s[pos] != cend) {
219 bib_blank (s, pos);
220 tree arg (CONCAT);
221 bib_arg (s, pos, cs, arg);
222 arg= simplify_correct (arg);
223 t << compound ("bib-latex", arg);
224 bib_blank (s, pos);
225 while (bib_ok (s, pos) && s[pos] == ',') {
226 pos++;
227 bib_blank (s, pos);
230 bib_blank (s, pos);
231 bib_char (s, pos, cend);
234 void
235 bib_entry (string s, int& pos, tree type, tree& t) {
236 if (!bib_ok (s, pos)) return;
237 tree entry;
238 tree fields= tree (DOCUMENT);
239 string cs= ",\t\n\r";
240 char cend;
241 switch (s[pos]) {
242 case '{': cend= '}'; break;
243 case '(': cend= ')'; break;
244 default: pos= -1; return;
246 pos++;
247 cs << cend;
248 bib_blank (s, pos);
249 string tag;
250 bib_until (s, pos, cs, tag);
251 bib_blank (s, pos);
252 string ce;
253 ce << cend;
254 bib_fields (s, pos, ce, string ("bib-field"), fields);
255 bib_blank (s, pos);
256 bib_char (s, pos, cend);
257 entry= compound ("bib-entry");
258 entry << type << tag << fields;
259 t << entry;
262 void
263 bib_list (string s, int& pos, tree& t) {
264 if (!bib_ok (s, pos)) return;
265 tree tentry (DOCUMENT);
266 tree tpreamble (DOCUMENT);
267 tree tstring (DOCUMENT);
268 string type;
269 bool comment= true;
270 int savpos;
271 while (bib_ok (s, pos)) {
272 bib_blank (s, pos);
273 if (!bib_ok (s, pos)) break;
274 switch (s[pos]) {
275 case '%': {
276 tree tc= tree (DOCUMENT);
277 bib_comment (s, pos, tc);
278 tentry << compound ("bib-comment", tc);
279 break;
281 case '@': {
282 pos++;
283 comment= false;
285 default: {
286 bib_blank (s, pos);
287 savpos= pos;
288 type= "";
289 bib_until (s, pos, string ("{(= \t\n\r"), type);
290 bib_blank (s, pos);
291 if (bib_ok (s, pos) && s[pos] == '=') {
292 tree fields (DOCUMENT);
293 pos= savpos;
294 bib_fields (s, pos, string (")}@"), string ("bib-field"), tentry);
295 bib_blank (s, pos);
296 if (bib_ok (s, pos) && (s[pos]==')' || s[pos]=='}')) {
297 if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
298 if (N(tstring) != 0) t << compound ("bib-string", tstring);
299 t << A(tentry);
300 return;
303 else {
304 string stype= locase_all (type);
305 if (stype == "string") {
306 tree ts;
307 if (comment) ts= tree (DOCUMENT);
308 else ts= tstring;
309 bib_string (s, pos, ts);
310 if (comment) {
311 if (N(ts) == 1) tstring << compound ("bib-comment", ts[0]);
312 else tstring << compound ("bib-comment", ts);
315 else if (stype == "preamble") {
316 tree tp;
317 if (comment) tp= tree (DOCUMENT);
318 else tp= tpreamble;
319 bib_preamble (s, pos, tp);
320 if (comment) {
321 if (N(tp) == 1) tpreamble << compound ("bib-comment", tp[0]);
322 else tpreamble << compound ("bib-comment", tp);
325 else {
326 tree te;
327 if (stype == "comment") comment= true;
328 if (comment) te= tree (DOCUMENT);
329 else te= tentry;
330 bib_entry (s, pos, stype, te);
331 if (comment) {
332 if (N(te) == 1) tentry << compound ("bib-comment", te[0]);
333 else tentry << compound ("bib-comment", te);
336 comment= true;
338 break;
342 // cerr << "ENTRIES: " << tentry << "\n";
343 // cerr << "PREAMBLE: " << tpreamble << "\n";
344 // cerr << "STRING: " << tstring << "\n";
345 // if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
346 // if (N(tstring) != 0) t << compound ("bib-string", tstring);
347 // t << A(tentry);
348 hashmap<string,string> dict=
349 bib_strings_dict (tree (DOCUMENT, compound ("bib-string", tstring)));
350 t << A(bib_subst_vars (tentry, dict));
351 bib_parse_fields (t);
354 tree
355 parse_bib (string s) {
356 int pos= 0;
357 tree r (DOCUMENT);
358 bib_list (s, pos, r);
359 if (N(s) == 0 || N(r) == 0) return tree ();
360 if (pos < 0) {
361 cerr << "TeXmacs] Error: failed to load BibTeX file.\n";
362 return tree ();
364 return r;