* tmac/s.tmac (pg@top): Don't save PO register.
[s-roff.git] / src / preproc / refer / refer.cpp
blob37cc5806188564c0019dabd80308561b40a6cecd
1 // -*- C++ -*-
2 /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "refer.h"
23 #include "refid.h"
24 #include "ref.h"
25 #include "token.h"
26 #include "search.h"
27 #include "command.h"
29 extern "C" const char *Version_string;
31 const char PRE_LABEL_MARKER = '\013';
32 const char POST_LABEL_MARKER = '\014';
33 const char LABEL_MARKER = '\015'; // label_type is added on
35 #define FORCE_LEFT_BRACKET 04
36 #define FORCE_RIGHT_BRACKET 010
38 static FILE *outfp = stdout;
40 string capitalize_fields;
41 string reverse_fields;
42 string abbreviate_fields;
43 string period_before_last_name = ". ";
44 string period_before_initial = ".";
45 string period_before_hyphen = "";
46 string period_before_other = ". ";
47 string sort_fields;
48 int annotation_field = -1;
49 string annotation_macro;
50 string discard_fields = "XYZ";
51 string pre_label = "\\*([.";
52 string post_label = "\\*(.]";
53 string sep_label = ", ";
54 int accumulate = 0;
55 int move_punctuation = 0;
56 int abbreviate_label_ranges = 0;
57 string label_range_indicator;
58 int label_in_text = 1;
59 int label_in_reference = 1;
60 int date_as_label = 0;
61 int sort_adjacent_labels = 0;
62 // Join exactly two authors with this.
63 string join_authors_exactly_two = " and ";
64 // When there are more than two authors join the last two with this.
65 string join_authors_last_two = ", and ";
66 // Otherwise join authors with this.
67 string join_authors_default = ", ";
68 string separate_label_second_parts = ", ";
69 // Use this string to represent that there are other authors.
70 string et_al = " et al";
71 // Use et al only if it can replace at least this many authors.
72 int et_al_min_elide = 2;
73 // Use et al only if the total number of authors is at least this.
74 int et_al_min_total = 3;
77 int compatible_flag = 0;
79 int short_label_flag = 0;
81 static int recognize_R1_R2 = 1;
83 search_list database_list;
84 int search_default = 1;
85 static int default_database_loaded = 0;
87 static reference **citation = 0;
88 static int ncitations = 0;
89 static int citation_max = 0;
91 static reference **reference_hash_table = 0;
92 static int hash_table_size;
93 static int nreferences = 0;
95 static int need_syncing = 0;
96 string pending_line;
97 string pending_lf_lines;
99 static void output_pending_line();
100 static unsigned immediately_handle_reference(const string &);
101 static void immediately_output_references();
102 static unsigned store_reference(const string &);
103 static void divert_to_temporary_file();
104 static reference *make_reference(const string &, unsigned *);
105 static void usage(FILE *stream);
106 static void do_file(const char *);
107 static void split_punct(string &line, string &punct);
108 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
109 static void possibly_load_default_database();
111 int main(int argc, char **argv)
113 program_name = argv[0];
114 static char stderr_buf[BUFSIZ];
115 setbuf(stderr, stderr_buf);
116 outfp = stdout;
117 int finished_options = 0;
118 int bib_flag = 0;
119 int done_spec = 0;
121 for (--argc, ++argv;
122 !finished_options && argc > 0 && argv[0][0] == '-'
123 && argv[0][1] != '\0';
124 argv++, argc--) {
125 const char *opt = argv[0] + 1;
126 while (opt != 0 && *opt != '\0') {
127 switch (*opt) {
128 case 'C':
129 compatible_flag = 1;
130 opt++;
131 break;
132 case 'B':
133 bib_flag = 1;
134 label_in_reference = 0;
135 label_in_text = 0;
136 ++opt;
137 if (*opt == '\0') {
138 annotation_field = 'X';
139 annotation_macro = "AP";
141 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
142 annotation_field = opt[0];
143 annotation_macro = opt + 2;
145 opt = 0;
146 break;
147 case 'P':
148 move_punctuation = 1;
149 opt++;
150 break;
151 case 'R':
152 recognize_R1_R2 = 0;
153 opt++;
154 break;
155 case 'S':
156 // Not a very useful spec.
157 set_label_spec("(A.n|Q)', '(D.y|D)");
158 done_spec = 1;
159 pre_label = " (";
160 post_label = ")";
161 sep_label = "; ";
162 opt++;
163 break;
164 case 'V':
165 verify_flag = 1;
166 opt++;
167 break;
168 case 'f':
170 const char *num = 0;
171 if (*++opt == '\0') {
172 if (argc > 1) {
173 num = *++argv;
174 --argc;
176 else {
177 error("option `f' requires an argument");
178 usage(stderr);
179 exit(1);
182 else {
183 num = opt;
184 opt = 0;
186 const char *ptr;
187 for (ptr = num; *ptr; ptr++)
188 if (!csdigit(*ptr)) {
189 error("bad character `%1' in argument to -f option", *ptr);
190 break;
192 if (*ptr == '\0') {
193 string spec;
194 spec = '%';
195 spec += num;
196 spec += '\0';
197 set_label_spec(spec.contents());
198 done_spec = 1;
200 break;
202 case 'b':
203 label_in_text = 0;
204 label_in_reference = 0;
205 opt++;
206 break;
207 case 'e':
208 accumulate = 1;
209 opt++;
210 break;
211 case 'c':
212 capitalize_fields = ++opt;
213 opt = 0;
214 break;
215 case 'k':
217 char buf[5];
218 if (csalpha(*++opt))
219 buf[0] = *opt++;
220 else {
221 if (*opt != '\0')
222 error("bad field name `%1'", *opt++);
223 buf[0] = 'L';
225 buf[1] = '~';
226 buf[2] = '%';
227 buf[3] = 'a';
228 buf[4] = '\0';
229 set_label_spec(buf);
230 done_spec = 1;
232 break;
233 case 'a':
235 const char *ptr;
236 for (ptr = ++opt; *ptr; ptr++)
237 if (!csdigit(*ptr)) {
238 error("argument to `a' option not a number");
239 break;
241 if (*ptr == '\0') {
242 reverse_fields = 'A';
243 reverse_fields += opt;
245 opt = 0;
247 break;
248 case 'i':
249 linear_ignore_fields = ++opt;
250 opt = 0;
251 break;
252 case 'l':
254 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
255 strcpy(buf, "A.n");
256 if (*++opt != '\0' && *opt != ',') {
257 char *ptr;
258 long n = strtol(opt, &ptr, 10);
259 if (n == 0 && ptr == opt) {
260 error("bad integer `%1' in `l' option", opt);
261 opt = 0;
262 break;
264 if (n < 0)
265 n = 0;
266 opt = ptr;
267 sprintf(strchr(buf, '\0'), "+%ld", n);
269 strcat(buf, "D.y");
270 if (*opt == ',')
271 opt++;
272 if (*opt != '\0') {
273 char *ptr;
274 long n = strtol(opt, &ptr, 10);
275 if (n == 0 && ptr == opt) {
276 error("bad integer `%1' in `l' option", opt);
277 opt = 0;
278 break;
280 if (n < 0)
281 n = 0;
282 sprintf(strchr(buf, '\0'), "-%ld", n);
283 opt = ptr;
284 if (*opt != '\0')
285 error("argument to `l' option not of form `m,n'");
287 strcat(buf, "%a");
288 if (!set_label_spec(buf))
289 assert(0);
290 done_spec = 1;
292 break;
293 case 'n':
294 search_default = 0;
295 opt++;
296 break;
297 case 'p':
299 const char *filename = 0;
300 if (*++opt == '\0') {
301 if (argc > 1) {
302 filename = *++argv;
303 argc--;
305 else {
306 error("option `p' requires an argument");
307 usage(stderr);
308 exit(1);
311 else {
312 filename = opt;
313 opt = 0;
315 database_list.add_file(filename);
317 break;
318 case 's':
319 if (*++opt == '\0')
320 sort_fields = "AD";
321 else {
322 sort_fields = opt;
323 opt = 0;
325 accumulate = 1;
326 break;
327 case 't':
329 char *ptr;
330 long n = strtol(opt, &ptr, 10);
331 if (n == 0 && ptr == opt) {
332 error("bad integer `%1' in `t' option", opt);
333 opt = 0;
334 break;
336 if (n < 1)
337 n = 1;
338 linear_truncate_len = int(n);
339 opt = ptr;
340 break;
342 case '-':
343 if (opt[1] == '\0') {
344 finished_options = 1;
345 opt++;
346 break;
348 if (strcmp(opt,"-version")==0) {
349 case 'v':
350 printf("GNU refer (groff) version %s\n", Version_string);
351 exit(0);
352 break;
354 if (strcmp(opt,"-help")==0) {
355 usage(stdout);
356 exit(0);
357 break;
359 // fall through
360 default:
361 error("unrecognized option `%1'", *opt);
362 usage(stderr);
363 exit(1);
364 break;
368 if (!done_spec)
369 set_label_spec("%1");
370 if (argc <= 0) {
371 if (bib_flag)
372 do_bib("-");
373 else
374 do_file("-");
376 else {
377 for (int i = 0; i < argc; i++) {
378 if (bib_flag)
379 do_bib(argv[i]);
380 else
381 do_file(argv[i]);
384 if (accumulate)
385 output_references();
386 if (fflush(stdout) < 0)
387 fatal("output error");
388 return 0;
391 static void usage(FILE *stream)
393 fprintf(stream,
394 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
395 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
396 program_name);
399 static void possibly_load_default_database()
401 if (search_default && !default_database_loaded) {
402 char *filename = getenv("REFER");
403 if (filename)
404 database_list.add_file(filename);
405 else
406 database_list.add_file(DEFAULT_INDEX, 1);
407 default_database_loaded = 1;
411 static int is_list(const string &str)
413 const char *start = str.contents();
414 const char *end = start + str.length();
415 while (end > start && csspace(end[-1]))
416 end--;
417 while (start < end && csspace(*start))
418 start++;
419 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
422 static void do_file(const char *filename)
424 FILE *fp;
425 if (strcmp(filename, "-") == 0) {
426 fp = stdin;
428 else {
429 errno = 0;
430 fp = fopen(filename, "r");
431 if (fp == 0) {
432 error("can't open `%1': %2", filename, strerror(errno));
433 return;
436 current_filename = filename;
437 fprintf(outfp, ".lf 1 %s\n", filename);
438 string line;
439 current_lineno = 0;
440 for (;;) {
441 line.clear();
442 for (;;) {
443 int c = getc(fp);
444 if (c == EOF) {
445 if (line.length() > 0)
446 line += '\n';
447 break;
449 if (invalid_input_char(c))
450 error("invalid input character code %1", c);
451 else {
452 line += c;
453 if (c == '\n')
454 break;
457 int len = line.length();
458 if (len == 0)
459 break;
460 current_lineno++;
461 if (len >= 2 && line[0] == '.' && line[1] == '[') {
462 int start_lineno = current_lineno;
463 int start_of_line = 1;
464 string str;
465 string post;
466 string pre(line.contents() + 2, line.length() - 3);
467 for (;;) {
468 int c = getc(fp);
469 if (c == EOF) {
470 error_with_file_and_line(current_filename, start_lineno,
471 "missing `.]' line");
472 break;
474 if (start_of_line)
475 current_lineno++;
476 if (start_of_line && c == '.') {
477 int d = getc(fp);
478 if (d == ']') {
479 while ((d = getc(fp)) != '\n' && d != EOF) {
480 if (invalid_input_char(d))
481 error("invalid input character code %1", d);
482 else
483 post += d;
485 break;
487 if (d != EOF)
488 ungetc(d, fp);
490 if (invalid_input_char(c))
491 error("invalid input character code %1", c);
492 else
493 str += c;
494 start_of_line = (c == '\n');
496 if (is_list(str)) {
497 output_pending_line();
498 if (accumulate)
499 output_references();
500 else
501 error("found `$LIST$' but not accumulating references");
503 else {
504 unsigned flags = (accumulate
505 ? store_reference(str)
506 : immediately_handle_reference(str));
507 if (label_in_text) {
508 if (accumulate && outfp == stdout)
509 divert_to_temporary_file();
510 if (pending_line.length() == 0) {
511 warning("can't attach citation to previous line");
513 else
514 pending_line.set_length(pending_line.length() - 1);
515 string punct;
516 if (move_punctuation)
517 split_punct(pending_line, punct);
518 int have_text = pre.length() > 0 || post.length() > 0;
519 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
520 |FORCE_RIGHT_BRACKET));
521 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
522 pending_line += PRE_LABEL_MARKER;
523 pending_line += pre;
524 char lm = LABEL_MARKER + (int)lt;
525 pending_line += lm;
526 pending_line += post;
527 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
528 pending_line += POST_LABEL_MARKER;
529 pending_line += punct;
530 pending_line += '\n';
533 need_syncing = 1;
535 else if (len >= 4
536 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
537 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
538 pending_lf_lines += line;
539 line += '\0';
540 if (interpret_lf_args(line.contents() + 3))
541 current_lineno--;
543 else if (recognize_R1_R2
544 && len >= 4
545 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
546 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
547 line.clear();
548 int start_of_line = 1;
549 int start_lineno = current_lineno;
550 for (;;) {
551 int c = getc(fp);
552 if (c != EOF && start_of_line)
553 current_lineno++;
554 if (start_of_line && c == '.') {
555 c = getc(fp);
556 if (c == 'R') {
557 c = getc(fp);
558 if (c == '2') {
559 c = getc(fp);
560 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
561 while (c != EOF && c != '\n')
562 c = getc(fp);
563 break;
565 else {
566 line += '.';
567 line += 'R';
568 line += '2';
571 else {
572 line += '.';
573 line += 'R';
576 else
577 line += '.';
579 if (c == EOF) {
580 error_with_file_and_line(current_filename, start_lineno,
581 "missing `.R2' line");
582 break;
584 if (invalid_input_char(c))
585 error("invalid input character code %1", int(c));
586 else {
587 line += c;
588 start_of_line = c == '\n';
591 output_pending_line();
592 if (accumulate)
593 output_references();
594 else
595 nreferences = 0;
596 process_commands(line, current_filename, start_lineno + 1);
597 need_syncing = 1;
599 else {
600 output_pending_line();
601 pending_line = line;
604 need_syncing = 0;
605 output_pending_line();
606 if (fp != stdin)
607 fclose(fp);
610 class label_processing_state {
611 enum {
612 NORMAL,
613 PENDING_LABEL,
614 PENDING_LABEL_POST,
615 PENDING_LABEL_POST_PRE,
616 PENDING_POST
617 } state;
618 label_type type; // type of pending labels
619 int count; // number of pending labels
620 reference **rptr; // pointer to next reference
621 int rcount; // number of references left
622 FILE *fp;
623 int handle_pending(int c);
624 public:
625 label_processing_state(reference **, int, FILE *);
626 ~label_processing_state();
627 void process(int c);
630 static void output_pending_line()
632 if (label_in_text && !accumulate && ncitations > 0) {
633 label_processing_state state(citation, ncitations, outfp);
634 int len = pending_line.length();
635 for (int i = 0; i < len; i++)
636 state.process((unsigned char)(pending_line[i]));
638 else
639 put_string(pending_line, outfp);
640 pending_line.clear();
641 if (pending_lf_lines.length() > 0) {
642 put_string(pending_lf_lines, outfp);
643 pending_lf_lines.clear();
645 if (!accumulate)
646 immediately_output_references();
647 if (need_syncing) {
648 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
649 need_syncing = 0;
653 static void split_punct(string &line, string &punct)
655 const char *start = line.contents();
656 const char *end = start + line.length();
657 const char *ptr = start;
658 const char *last_token_start = 0;
659 for (;;) {
660 if (ptr >= end)
661 break;
662 last_token_start = ptr;
663 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
664 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
665 ptr++;
666 else if (!get_token(&ptr, end))
667 break;
669 if (last_token_start) {
670 const token_info *ti = lookup_token(last_token_start, end);
671 if (ti->is_punct()) {
672 punct.append(last_token_start, end - last_token_start);
673 line.set_length(last_token_start - start);
678 static void divert_to_temporary_file()
680 outfp = xtmpfile();
683 static void store_citation(reference *ref)
685 if (ncitations >= citation_max) {
686 if (citation == 0)
687 citation = new reference*[citation_max = 100];
688 else {
689 reference **old_citation = citation;
690 citation_max *= 2;
691 citation = new reference *[citation_max];
692 memcpy(citation, old_citation, ncitations*sizeof(reference *));
693 a_delete old_citation;
696 citation[ncitations++] = ref;
699 static unsigned store_reference(const string &str)
701 if (reference_hash_table == 0) {
702 reference_hash_table = new reference *[17];
703 hash_table_size = 17;
704 for (int i = 0; i < hash_table_size; i++)
705 reference_hash_table[i] = 0;
707 unsigned flags;
708 reference *ref = make_reference(str, &flags);
709 ref->compute_hash_code();
710 unsigned h = ref->hash();
711 reference **ptr;
712 for (ptr = reference_hash_table + (h % hash_table_size);
713 *ptr != 0;
714 ((ptr == reference_hash_table)
715 ? (ptr = reference_hash_table + hash_table_size - 1)
716 : --ptr))
717 if (same_reference(**ptr, *ref))
718 break;
719 if (*ptr != 0) {
720 if (ref->is_merged())
721 warning("fields ignored because reference already used");
722 delete ref;
723 ref = *ptr;
725 else {
726 *ptr = ref;
727 ref->set_number(nreferences);
728 nreferences++;
729 ref->pre_compute_label();
730 ref->compute_sort_key();
731 if (nreferences*2 >= hash_table_size) {
732 // Rehash it.
733 reference **old_table = reference_hash_table;
734 int old_size = hash_table_size;
735 hash_table_size = next_size(hash_table_size);
736 reference_hash_table = new reference*[hash_table_size];
737 int i;
738 for (i = 0; i < hash_table_size; i++)
739 reference_hash_table[i] = 0;
740 for (i = 0; i < old_size; i++)
741 if (old_table[i]) {
742 reference **p;
743 for (p = (reference_hash_table
744 + (old_table[i]->hash() % hash_table_size));
746 ((p == reference_hash_table)
747 ? (p = reference_hash_table + hash_table_size - 1)
748 : --p))
750 *p = old_table[i];
752 a_delete old_table;
755 if (label_in_text)
756 store_citation(ref);
757 return flags;
760 unsigned immediately_handle_reference(const string &str)
762 unsigned flags;
763 reference *ref = make_reference(str, &flags);
764 ref->set_number(nreferences);
765 if (label_in_text || label_in_reference) {
766 ref->pre_compute_label();
767 ref->immediate_compute_label();
769 nreferences++;
770 store_citation(ref);
771 return flags;
774 static void immediately_output_references()
776 for (int i = 0; i < ncitations; i++) {
777 reference *ref = citation[i];
778 if (label_in_reference) {
779 fputs(".ds [F ", outfp);
780 const string &label = ref->get_label(NORMAL_LABEL);
781 if (label.length() > 0
782 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
783 putc('"', outfp);
784 put_string(label, outfp);
785 putc('\n', outfp);
787 ref->output(outfp);
788 delete ref;
790 ncitations = 0;
793 static void output_citation_group(reference **v, int n, label_type type,
794 FILE *fp)
796 if (sort_adjacent_labels) {
797 // Do an insertion sort. Usually n will be very small.
798 for (int i = 1; i < n; i++) {
799 int num = v[i]->get_number();
800 reference *temp = v[i];
801 int j;
802 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
803 v[j + 1] = v[j];
804 v[j + 1] = temp;
807 // This messes up if !accumulate.
808 if (accumulate && n > 1) {
809 // remove duplicates
810 int j = 1;
811 for (int i = 1; i < n; i++)
812 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
813 v[j++] = v[i];
814 n = j;
816 string merged_label;
817 for (int i = 0; i < n; i++) {
818 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
819 if (nmerged > 0) {
820 put_string(merged_label, fp);
821 i += nmerged;
823 else
824 put_string(v[i]->get_label(type), fp);
825 if (i < n - 1)
826 put_string(sep_label, fp);
831 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
832 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
836 label_processing_state::~label_processing_state()
838 int handled = handle_pending(EOF);
839 assert(!handled);
840 assert(rcount == 0);
843 int label_processing_state::handle_pending(int c)
845 switch (state) {
846 case NORMAL:
847 break;
848 case PENDING_LABEL:
849 if (c == POST_LABEL_MARKER) {
850 state = PENDING_LABEL_POST;
851 return 1;
853 else {
854 output_citation_group(rptr, count, type, fp);
855 rptr += count ;
856 rcount -= count;
857 state = NORMAL;
859 break;
860 case PENDING_LABEL_POST:
861 if (c == PRE_LABEL_MARKER) {
862 state = PENDING_LABEL_POST_PRE;
863 return 1;
865 else {
866 output_citation_group(rptr, count, type, fp);
867 rptr += count;
868 rcount -= count;
869 put_string(post_label, fp);
870 state = NORMAL;
872 break;
873 case PENDING_LABEL_POST_PRE:
874 if (c >= LABEL_MARKER
875 && c < LABEL_MARKER + N_LABEL_TYPES
876 && c - LABEL_MARKER == type) {
877 count += 1;
878 state = PENDING_LABEL;
879 return 1;
881 else {
882 output_citation_group(rptr, count, type, fp);
883 rptr += count;
884 rcount -= count;
885 put_string(sep_label, fp);
886 state = NORMAL;
888 break;
889 case PENDING_POST:
890 if (c == PRE_LABEL_MARKER) {
891 put_string(sep_label, fp);
892 state = NORMAL;
893 return 1;
895 else {
896 put_string(post_label, fp);
897 state = NORMAL;
899 break;
901 return 0;
904 void label_processing_state::process(int c)
906 if (handle_pending(c))
907 return;
908 assert(state == NORMAL);
909 switch (c) {
910 case PRE_LABEL_MARKER:
911 put_string(pre_label, fp);
912 state = NORMAL;
913 break;
914 case POST_LABEL_MARKER:
915 state = PENDING_POST;
916 break;
917 case LABEL_MARKER:
918 case LABEL_MARKER + 1:
919 count = 1;
920 state = PENDING_LABEL;
921 type = label_type(c - LABEL_MARKER);
922 break;
923 default:
924 state = NORMAL;
925 putc(c, fp);
926 break;
930 extern "C" {
932 int rcompare(const void *p1, const void *p2)
934 return compare_reference(**(reference **)p1, **(reference **)p2);
939 void output_references()
941 assert(accumulate);
942 if (!hash_table_size) {
943 error("nothing to reference (probably `bibliography' before `sort')");
944 accumulate = 0;
945 nreferences = 0;
946 return;
948 if (nreferences > 0) {
949 int j = 0;
950 int i;
951 for (i = 0; i < hash_table_size; i++)
952 if (reference_hash_table[i] != 0)
953 reference_hash_table[j++] = reference_hash_table[i];
954 assert(j == nreferences);
955 for (; j < hash_table_size; j++)
956 reference_hash_table[j] = 0;
957 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
958 for (i = 0; i < nreferences; i++)
959 reference_hash_table[i]->set_number(i);
960 compute_labels(reference_hash_table, nreferences);
962 if (outfp != stdout) {
963 rewind(outfp);
965 label_processing_state state(citation, ncitations, stdout);
966 int c;
967 while ((c = getc(outfp)) != EOF)
968 state.process(c);
970 ncitations = 0;
971 fclose(outfp);
972 outfp = stdout;
974 if (nreferences > 0) {
975 fputs(".]<\n", outfp);
976 for (int i = 0; i < nreferences; i++) {
977 if (sort_fields.length() > 0)
978 reference_hash_table[i]->print_sort_key_comment(outfp);
979 if (label_in_reference) {
980 fputs(".ds [F ", outfp);
981 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
982 if (label.length() > 0
983 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
984 putc('"', outfp);
985 put_string(label, outfp);
986 putc('\n', outfp);
988 reference_hash_table[i]->output(outfp);
989 delete reference_hash_table[i];
990 reference_hash_table[i] = 0;
992 fputs(".]>\n", outfp);
993 nreferences = 0;
995 clear_labels();
998 static reference *find_reference(const char *query, int query_len)
1000 // This is so that error messages look better.
1001 while (query_len > 0 && csspace(query[query_len - 1]))
1002 query_len--;
1003 string str;
1004 for (int i = 0; i < query_len; i++)
1005 str += query[i] == '\n' ? ' ' : query[i];
1006 str += '\0';
1007 possibly_load_default_database();
1008 search_list_iterator iter(&database_list, str.contents());
1009 reference_id rid;
1010 const char *start;
1011 int len;
1012 if (!iter.next(&start, &len, &rid)) {
1013 error("no matches for `%1'", str.contents());
1014 return 0;
1016 const char *end = start + len;
1017 while (start < end) {
1018 if (*start == '%')
1019 break;
1020 while (start < end && *start++ != '\n')
1023 if (start >= end) {
1024 error("found a reference for `%1' but it didn't contain any fields",
1025 str.contents());
1026 return 0;
1028 reference *result = new reference(start, end - start, &rid);
1029 if (iter.next(&start, &len, &rid))
1030 warning("multiple matches for `%1'", str.contents());
1031 return result;
1034 static reference *make_reference(const string &str, unsigned *flagsp)
1036 const char *start = str.contents();
1037 const char *end = start + str.length();
1038 const char *ptr = start;
1039 while (ptr < end) {
1040 if (*ptr == '%')
1041 break;
1042 while (ptr < end && *ptr++ != '\n')
1045 *flagsp = 0;
1046 for (; start < ptr; start++) {
1047 if (*start == '#')
1048 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1049 | FORCE_LEFT_BRACKET)));
1050 else if (*start == '[')
1051 *flagsp |= FORCE_LEFT_BRACKET;
1052 else if (*start == ']')
1053 *flagsp |= FORCE_RIGHT_BRACKET;
1054 else if (!csspace(*start))
1055 break;
1057 if (start >= end) {
1058 error("empty reference");
1059 return new reference;
1061 reference *database_ref = 0;
1062 if (start < ptr)
1063 database_ref = find_reference(start, ptr - start);
1064 reference *inline_ref = 0;
1065 if (ptr < end)
1066 inline_ref = new reference(ptr, end - ptr);
1067 if (inline_ref) {
1068 if (database_ref) {
1069 database_ref->merge(*inline_ref);
1070 delete inline_ref;
1071 return database_ref;
1073 else
1074 return inline_ref;
1076 else if (database_ref)
1077 return database_ref;
1078 else
1079 return new reference;
1082 static void do_ref(const string &str)
1084 if (accumulate)
1085 (void)store_reference(str);
1086 else {
1087 (void)immediately_handle_reference(str);
1088 immediately_output_references();
1092 static void trim_blanks(string &str)
1094 const char *start = str.contents();
1095 const char *end = start + str.length();
1096 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1097 --end;
1098 str.set_length(end - start);
1101 void do_bib(const char *filename)
1103 FILE *fp;
1104 if (strcmp(filename, "-") == 0)
1105 fp = stdin;
1106 else {
1107 errno = 0;
1108 fp = fopen(filename, "r");
1109 if (fp == 0) {
1110 error("can't open `%1': %2", filename, strerror(errno));
1111 return;
1113 current_filename = filename;
1115 enum {
1116 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1117 } state = START;
1118 string body;
1119 for (;;) {
1120 int c = getc(fp);
1121 if (c == EOF)
1122 break;
1123 if (invalid_input_char(c)) {
1124 error("invalid input character code %1", c);
1125 continue;
1127 switch (state) {
1128 case START:
1129 if (c == '%') {
1130 body = c;
1131 state = BODY;
1133 else if (c != '\n')
1134 state = MIDDLE;
1135 break;
1136 case MIDDLE:
1137 if (c == '\n')
1138 state = START;
1139 break;
1140 case BODY:
1141 body += c;
1142 if (c == '\n')
1143 state = BODY_START;
1144 break;
1145 case BODY_START:
1146 if (c == '\n') {
1147 do_ref(body);
1148 state = START;
1150 else if (c == '.')
1151 state = BODY_DOT;
1152 else if (csspace(c)) {
1153 state = BODY_BLANK;
1154 body += c;
1156 else {
1157 body += c;
1158 state = BODY;
1160 break;
1161 case BODY_BLANK:
1162 if (c == '\n') {
1163 trim_blanks(body);
1164 do_ref(body);
1165 state = START;
1167 else if (csspace(c))
1168 body += c;
1169 else {
1170 body += c;
1171 state = BODY;
1173 break;
1174 case BODY_DOT:
1175 if (c == ']') {
1176 do_ref(body);
1177 state = MIDDLE;
1179 else {
1180 body += '.';
1181 body += c;
1182 state = c == '\n' ? BODY_START : BODY;
1184 break;
1185 default:
1186 assert(0);
1188 if (c == '\n')
1189 current_lineno++;
1191 switch (state) {
1192 case START:
1193 case MIDDLE:
1194 break;
1195 case BODY:
1196 body += '\n';
1197 do_ref(body);
1198 break;
1199 case BODY_DOT:
1200 case BODY_START:
1201 do_ref(body);
1202 break;
1203 case BODY_BLANK:
1204 trim_blanks(body);
1205 do_ref(body);
1206 break;
1208 fclose(fp);
1211 // from the Dragon Book
1213 unsigned hash_string(const char *s, int len)
1215 const char *end = s + len;
1216 unsigned h = 0, g;
1217 while (s < end) {
1218 h <<= 4;
1219 h += *s++;
1220 if ((g = h & 0xf0000000) != 0) {
1221 h ^= g >> 24;
1222 h ^= g;
1225 return h;
1228 int next_size(int n)
1230 static const int table_sizes[] = {
1231 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1232 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1233 16000057, 32000011, 64000031, 128000003, 0
1236 const int *p;
1237 for (p = table_sizes; *p <= n && *p != 0; p++)
1239 assert(*p != 0);
1240 return *p;