* tmac/doc-syms (doc-str-St--susv3): New string.
[s-roff.git] / src / preproc / refer / refer.cpp
blobb593d2c96f928ee310d00f80befd7128794324d8
1 // -*- C++ -*-
2 /* Copyright (C) 1989-1992, 2000, 2001, 2002, 2004, 2006
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "refer.h"
23 #include "refid.h"
24 #include "ref.h"
25 #include "token.h"
26 #include "search.h"
27 #include "command.h"
29 extern "C" const char *Version_string;
31 const char PRE_LABEL_MARKER = '\013';
32 const char POST_LABEL_MARKER = '\014';
33 const char LABEL_MARKER = '\015'; // label_type is added on
35 #define FORCE_LEFT_BRACKET 04
36 #define FORCE_RIGHT_BRACKET 010
38 static FILE *outfp = stdout;
40 string capitalize_fields;
41 string reverse_fields;
42 string abbreviate_fields;
43 string period_before_last_name = ". ";
44 string period_before_initial = ".";
45 string period_before_hyphen = "";
46 string period_before_other = ". ";
47 string sort_fields;
48 int annotation_field = -1;
49 string annotation_macro;
50 string discard_fields = "XYZ";
51 string pre_label = "\\*([.";
52 string post_label = "\\*(.]";
53 string sep_label = ", ";
54 int have_bibliography = 0;
55 int accumulate = 0;
56 int move_punctuation = 0;
57 int abbreviate_label_ranges = 0;
58 string label_range_indicator;
59 int label_in_text = 1;
60 int label_in_reference = 1;
61 int date_as_label = 0;
62 int sort_adjacent_labels = 0;
63 // Join exactly two authors with this.
64 string join_authors_exactly_two = " and ";
65 // When there are more than two authors join the last two with this.
66 string join_authors_last_two = ", and ";
67 // Otherwise join authors with this.
68 string join_authors_default = ", ";
69 string separate_label_second_parts = ", ";
70 // Use this string to represent that there are other authors.
71 string et_al = " et al";
72 // Use et al only if it can replace at least this many authors.
73 int et_al_min_elide = 2;
74 // Use et al only if the total number of authors is at least this.
75 int et_al_min_total = 3;
78 int compatible_flag = 0;
80 int short_label_flag = 0;
82 static int recognize_R1_R2 = 1;
84 search_list database_list;
85 int search_default = 1;
86 static int default_database_loaded = 0;
88 static reference **citation = 0;
89 static int ncitations = 0;
90 static int citation_max = 0;
92 static reference **reference_hash_table = 0;
93 static int hash_table_size;
94 static int nreferences = 0;
96 static int need_syncing = 0;
97 string pending_line;
98 string pending_lf_lines;
100 static void output_pending_line();
101 static unsigned immediately_handle_reference(const string &);
102 static void immediately_output_references();
103 static unsigned store_reference(const string &);
104 static void divert_to_temporary_file();
105 static reference *make_reference(const string &, unsigned *);
106 static void usage(FILE *stream);
107 static void do_file(const char *);
108 static void split_punct(string &line, string &punct);
109 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
110 static void possibly_load_default_database();
112 int main(int argc, char **argv)
114 program_name = argv[0];
115 static char stderr_buf[BUFSIZ];
116 setbuf(stderr, stderr_buf);
117 outfp = stdout;
118 int finished_options = 0;
119 int bib_flag = 0;
120 int done_spec = 0;
122 for (--argc, ++argv;
123 !finished_options && argc > 0 && argv[0][0] == '-'
124 && argv[0][1] != '\0';
125 argv++, argc--) {
126 const char *opt = argv[0] + 1;
127 while (opt != 0 && *opt != '\0') {
128 switch (*opt) {
129 case 'C':
130 compatible_flag = 1;
131 opt++;
132 break;
133 case 'B':
134 bib_flag = 1;
135 label_in_reference = 0;
136 label_in_text = 0;
137 ++opt;
138 if (*opt == '\0') {
139 annotation_field = 'X';
140 annotation_macro = "AP";
142 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
143 annotation_field = opt[0];
144 annotation_macro = opt + 2;
146 opt = 0;
147 break;
148 case 'P':
149 move_punctuation = 1;
150 opt++;
151 break;
152 case 'R':
153 recognize_R1_R2 = 0;
154 opt++;
155 break;
156 case 'S':
157 // Not a very useful spec.
158 set_label_spec("(A.n|Q)', '(D.y|D)");
159 done_spec = 1;
160 pre_label = " (";
161 post_label = ")";
162 sep_label = "; ";
163 opt++;
164 break;
165 case 'V':
166 verify_flag = 1;
167 opt++;
168 break;
169 case 'f':
171 const char *num = 0;
172 if (*++opt == '\0') {
173 if (argc > 1) {
174 num = *++argv;
175 --argc;
177 else {
178 error("option `f' requires an argument");
179 usage(stderr);
180 exit(1);
183 else {
184 num = opt;
185 opt = 0;
187 const char *ptr;
188 for (ptr = num; *ptr; ptr++)
189 if (!csdigit(*ptr)) {
190 error("bad character `%1' in argument to -f option", *ptr);
191 break;
193 if (*ptr == '\0') {
194 string spec;
195 spec = '%';
196 spec += num;
197 spec += '\0';
198 set_label_spec(spec.contents());
199 done_spec = 1;
201 break;
203 case 'b':
204 label_in_text = 0;
205 label_in_reference = 0;
206 opt++;
207 break;
208 case 'e':
209 accumulate = 1;
210 opt++;
211 break;
212 case 'c':
213 capitalize_fields = ++opt;
214 opt = 0;
215 break;
216 case 'k':
218 char buf[5];
219 if (csalpha(*++opt))
220 buf[0] = *opt++;
221 else {
222 if (*opt != '\0')
223 error("bad field name `%1'", *opt++);
224 buf[0] = 'L';
226 buf[1] = '~';
227 buf[2] = '%';
228 buf[3] = 'a';
229 buf[4] = '\0';
230 set_label_spec(buf);
231 done_spec = 1;
233 break;
234 case 'a':
236 const char *ptr;
237 for (ptr = ++opt; *ptr; ptr++)
238 if (!csdigit(*ptr)) {
239 error("argument to `a' option not a number");
240 break;
242 if (*ptr == '\0') {
243 reverse_fields = 'A';
244 reverse_fields += opt;
246 opt = 0;
248 break;
249 case 'i':
250 linear_ignore_fields = ++opt;
251 opt = 0;
252 break;
253 case 'l':
255 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
256 strcpy(buf, "A.n");
257 if (*++opt != '\0' && *opt != ',') {
258 char *ptr;
259 long n = strtol(opt, &ptr, 10);
260 if (n == 0 && ptr == opt) {
261 error("bad integer `%1' in `l' option", opt);
262 opt = 0;
263 break;
265 if (n < 0)
266 n = 0;
267 opt = ptr;
268 sprintf(strchr(buf, '\0'), "+%ld", n);
270 strcat(buf, "D.y");
271 if (*opt == ',')
272 opt++;
273 if (*opt != '\0') {
274 char *ptr;
275 long n = strtol(opt, &ptr, 10);
276 if (n == 0 && ptr == opt) {
277 error("bad integer `%1' in `l' option", opt);
278 opt = 0;
279 break;
281 if (n < 0)
282 n = 0;
283 sprintf(strchr(buf, '\0'), "-%ld", n);
284 opt = ptr;
285 if (*opt != '\0')
286 error("argument to `l' option not of form `m,n'");
288 strcat(buf, "%a");
289 if (!set_label_spec(buf))
290 assert(0);
291 done_spec = 1;
293 break;
294 case 'n':
295 search_default = 0;
296 opt++;
297 break;
298 case 'p':
300 const char *filename = 0;
301 if (*++opt == '\0') {
302 if (argc > 1) {
303 filename = *++argv;
304 argc--;
306 else {
307 error("option `p' requires an argument");
308 usage(stderr);
309 exit(1);
312 else {
313 filename = opt;
314 opt = 0;
316 database_list.add_file(filename);
318 break;
319 case 's':
320 if (*++opt == '\0')
321 sort_fields = "AD";
322 else {
323 sort_fields = opt;
324 opt = 0;
326 accumulate = 1;
327 break;
328 case 't':
330 char *ptr;
331 long n = strtol(opt, &ptr, 10);
332 if (n == 0 && ptr == opt) {
333 error("bad integer `%1' in `t' option", opt);
334 opt = 0;
335 break;
337 if (n < 1)
338 n = 1;
339 linear_truncate_len = int(n);
340 opt = ptr;
341 break;
343 case '-':
344 if (opt[1] == '\0') {
345 finished_options = 1;
346 opt++;
347 break;
349 if (strcmp(opt,"-version")==0) {
350 case 'v':
351 printf("GNU refer (groff) version %s\n", Version_string);
352 exit(0);
353 break;
355 if (strcmp(opt,"-help")==0) {
356 usage(stdout);
357 exit(0);
358 break;
360 // fall through
361 default:
362 error("unrecognized option `%1'", *opt);
363 usage(stderr);
364 exit(1);
365 break;
369 if (!done_spec)
370 set_label_spec("%1");
371 if (argc <= 0) {
372 if (bib_flag)
373 do_bib("-");
374 else
375 do_file("-");
377 else {
378 for (int i = 0; i < argc; i++) {
379 if (bib_flag)
380 do_bib(argv[i]);
381 else
382 do_file(argv[i]);
385 if (accumulate)
386 output_references();
387 if (fflush(stdout) < 0)
388 fatal("output error");
389 return 0;
392 static void usage(FILE *stream)
394 fprintf(stream,
395 "usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
396 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
397 program_name);
400 static void possibly_load_default_database()
402 if (search_default && !default_database_loaded) {
403 char *filename = getenv("REFER");
404 if (filename)
405 database_list.add_file(filename);
406 else
407 database_list.add_file(DEFAULT_INDEX, 1);
408 default_database_loaded = 1;
412 static int is_list(const string &str)
414 const char *start = str.contents();
415 const char *end = start + str.length();
416 while (end > start && csspace(end[-1]))
417 end--;
418 while (start < end && csspace(*start))
419 start++;
420 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
423 static void do_file(const char *filename)
425 FILE *fp;
426 if (strcmp(filename, "-") == 0) {
427 fp = stdin;
429 else {
430 errno = 0;
431 fp = fopen(filename, "r");
432 if (fp == 0) {
433 error("can't open `%1': %2", filename, strerror(errno));
434 return;
437 current_filename = filename;
438 fprintf(outfp, ".lf 1 %s\n", filename);
439 string line;
440 current_lineno = 0;
441 for (;;) {
442 line.clear();
443 for (;;) {
444 int c = getc(fp);
445 if (c == EOF) {
446 if (line.length() > 0)
447 line += '\n';
448 break;
450 if (invalid_input_char(c))
451 error("invalid input character code %1", c);
452 else {
453 line += c;
454 if (c == '\n')
455 break;
458 int len = line.length();
459 if (len == 0)
460 break;
461 current_lineno++;
462 if (len >= 2 && line[0] == '.' && line[1] == '[') {
463 int start_lineno = current_lineno;
464 int start_of_line = 1;
465 string str;
466 string post;
467 string pre(line.contents() + 2, line.length() - 3);
468 for (;;) {
469 int c = getc(fp);
470 if (c == EOF) {
471 error_with_file_and_line(current_filename, start_lineno,
472 "missing `.]' line");
473 break;
475 if (start_of_line)
476 current_lineno++;
477 if (start_of_line && c == '.') {
478 int d = getc(fp);
479 if (d == ']') {
480 while ((d = getc(fp)) != '\n' && d != EOF) {
481 if (invalid_input_char(d))
482 error("invalid input character code %1", d);
483 else
484 post += d;
486 break;
488 if (d != EOF)
489 ungetc(d, fp);
491 if (invalid_input_char(c))
492 error("invalid input character code %1", c);
493 else
494 str += c;
495 start_of_line = (c == '\n');
497 if (is_list(str)) {
498 output_pending_line();
499 if (accumulate)
500 output_references();
501 else
502 error("found `$LIST$' but not accumulating references");
504 else {
505 unsigned flags = (accumulate
506 ? store_reference(str)
507 : immediately_handle_reference(str));
508 if (label_in_text) {
509 if (accumulate && outfp == stdout)
510 divert_to_temporary_file();
511 if (pending_line.length() == 0) {
512 warning("can't attach citation to previous line");
514 else
515 pending_line.set_length(pending_line.length() - 1);
516 string punct;
517 if (move_punctuation)
518 split_punct(pending_line, punct);
519 int have_text = pre.length() > 0 || post.length() > 0;
520 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
521 |FORCE_RIGHT_BRACKET));
522 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
523 pending_line += PRE_LABEL_MARKER;
524 pending_line += pre;
525 char lm = LABEL_MARKER + (int)lt;
526 pending_line += lm;
527 pending_line += post;
528 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
529 pending_line += POST_LABEL_MARKER;
530 pending_line += punct;
531 pending_line += '\n';
534 need_syncing = 1;
536 else if (len >= 4
537 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
538 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
539 pending_lf_lines += line;
540 line += '\0';
541 if (interpret_lf_args(line.contents() + 3))
542 current_lineno--;
544 else if (recognize_R1_R2
545 && len >= 4
546 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
547 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
548 line.clear();
549 int start_of_line = 1;
550 int start_lineno = current_lineno;
551 for (;;) {
552 int c = getc(fp);
553 if (c != EOF && start_of_line)
554 current_lineno++;
555 if (start_of_line && c == '.') {
556 c = getc(fp);
557 if (c == 'R') {
558 c = getc(fp);
559 if (c == '2') {
560 c = getc(fp);
561 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
562 while (c != EOF && c != '\n')
563 c = getc(fp);
564 break;
566 else {
567 line += '.';
568 line += 'R';
569 line += '2';
572 else {
573 line += '.';
574 line += 'R';
577 else
578 line += '.';
580 if (c == EOF) {
581 error_with_file_and_line(current_filename, start_lineno,
582 "missing `.R2' line");
583 break;
585 if (invalid_input_char(c))
586 error("invalid input character code %1", int(c));
587 else {
588 line += c;
589 start_of_line = c == '\n';
592 output_pending_line();
593 if (accumulate)
594 output_references();
595 else
596 nreferences = 0;
597 process_commands(line, current_filename, start_lineno + 1);
598 need_syncing = 1;
600 else {
601 output_pending_line();
602 pending_line = line;
605 need_syncing = 0;
606 output_pending_line();
607 if (fp != stdin)
608 fclose(fp);
611 class label_processing_state {
612 enum {
613 NORMAL,
614 PENDING_LABEL,
615 PENDING_LABEL_POST,
616 PENDING_LABEL_POST_PRE,
617 PENDING_POST
618 } state;
619 label_type type; // type of pending labels
620 int count; // number of pending labels
621 reference **rptr; // pointer to next reference
622 int rcount; // number of references left
623 FILE *fp;
624 int handle_pending(int c);
625 public:
626 label_processing_state(reference **, int, FILE *);
627 ~label_processing_state();
628 void process(int c);
631 static void output_pending_line()
633 if (label_in_text && !accumulate && ncitations > 0) {
634 label_processing_state state(citation, ncitations, outfp);
635 int len = pending_line.length();
636 for (int i = 0; i < len; i++)
637 state.process((unsigned char)(pending_line[i]));
639 else
640 put_string(pending_line, outfp);
641 pending_line.clear();
642 if (pending_lf_lines.length() > 0) {
643 put_string(pending_lf_lines, outfp);
644 pending_lf_lines.clear();
646 if (!accumulate)
647 immediately_output_references();
648 if (need_syncing) {
649 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
650 need_syncing = 0;
654 static void split_punct(string &line, string &punct)
656 const char *start = line.contents();
657 const char *end = start + line.length();
658 const char *ptr = start;
659 const char *last_token_start = 0;
660 for (;;) {
661 if (ptr >= end)
662 break;
663 last_token_start = ptr;
664 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
665 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
666 ptr++;
667 else if (!get_token(&ptr, end))
668 break;
670 if (last_token_start) {
671 const token_info *ti = lookup_token(last_token_start, end);
672 if (ti->is_punct()) {
673 punct.append(last_token_start, end - last_token_start);
674 line.set_length(last_token_start - start);
679 static void divert_to_temporary_file()
681 outfp = xtmpfile();
684 static void store_citation(reference *ref)
686 if (ncitations >= citation_max) {
687 if (citation == 0)
688 citation = new reference*[citation_max = 100];
689 else {
690 reference **old_citation = citation;
691 citation_max *= 2;
692 citation = new reference *[citation_max];
693 memcpy(citation, old_citation, ncitations*sizeof(reference *));
694 a_delete old_citation;
697 citation[ncitations++] = ref;
700 static unsigned store_reference(const string &str)
702 if (reference_hash_table == 0) {
703 reference_hash_table = new reference *[17];
704 hash_table_size = 17;
705 for (int i = 0; i < hash_table_size; i++)
706 reference_hash_table[i] = 0;
708 unsigned flags;
709 reference *ref = make_reference(str, &flags);
710 ref->compute_hash_code();
711 unsigned h = ref->hash();
712 reference **ptr;
713 for (ptr = reference_hash_table + (h % hash_table_size);
714 *ptr != 0;
715 ((ptr == reference_hash_table)
716 ? (ptr = reference_hash_table + hash_table_size - 1)
717 : --ptr))
718 if (same_reference(**ptr, *ref))
719 break;
720 if (*ptr != 0) {
721 if (ref->is_merged())
722 warning("fields ignored because reference already used");
723 delete ref;
724 ref = *ptr;
726 else {
727 *ptr = ref;
728 ref->set_number(nreferences);
729 nreferences++;
730 ref->pre_compute_label();
731 ref->compute_sort_key();
732 if (nreferences*2 >= hash_table_size) {
733 // Rehash it.
734 reference **old_table = reference_hash_table;
735 int old_size = hash_table_size;
736 hash_table_size = next_size(hash_table_size);
737 reference_hash_table = new reference*[hash_table_size];
738 int i;
739 for (i = 0; i < hash_table_size; i++)
740 reference_hash_table[i] = 0;
741 for (i = 0; i < old_size; i++)
742 if (old_table[i]) {
743 reference **p;
744 for (p = (reference_hash_table
745 + (old_table[i]->hash() % hash_table_size));
747 ((p == reference_hash_table)
748 ? (p = reference_hash_table + hash_table_size - 1)
749 : --p))
751 *p = old_table[i];
753 a_delete old_table;
756 if (label_in_text)
757 store_citation(ref);
758 return flags;
761 unsigned immediately_handle_reference(const string &str)
763 unsigned flags;
764 reference *ref = make_reference(str, &flags);
765 ref->set_number(nreferences);
766 if (label_in_text || label_in_reference) {
767 ref->pre_compute_label();
768 ref->immediate_compute_label();
770 nreferences++;
771 store_citation(ref);
772 return flags;
775 static void immediately_output_references()
777 for (int i = 0; i < ncitations; i++) {
778 reference *ref = citation[i];
779 if (label_in_reference) {
780 fputs(".ds [F ", outfp);
781 const string &label = ref->get_label(NORMAL_LABEL);
782 if (label.length() > 0
783 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
784 putc('"', outfp);
785 put_string(label, outfp);
786 putc('\n', outfp);
788 ref->output(outfp);
789 delete ref;
791 ncitations = 0;
794 static void output_citation_group(reference **v, int n, label_type type,
795 FILE *fp)
797 if (sort_adjacent_labels) {
798 // Do an insertion sort. Usually n will be very small.
799 for (int i = 1; i < n; i++) {
800 int num = v[i]->get_number();
801 reference *temp = v[i];
802 int j;
803 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
804 v[j + 1] = v[j];
805 v[j + 1] = temp;
808 // This messes up if !accumulate.
809 if (accumulate && n > 1) {
810 // remove duplicates
811 int j = 1;
812 for (int i = 1; i < n; i++)
813 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
814 v[j++] = v[i];
815 n = j;
817 string merged_label;
818 for (int i = 0; i < n; i++) {
819 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
820 if (nmerged > 0) {
821 put_string(merged_label, fp);
822 i += nmerged;
824 else
825 put_string(v[i]->get_label(type), fp);
826 if (i < n - 1)
827 put_string(sep_label, fp);
832 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
833 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
837 label_processing_state::~label_processing_state()
839 int handled = handle_pending(EOF);
840 assert(!handled);
841 assert(rcount == 0);
844 int label_processing_state::handle_pending(int c)
846 switch (state) {
847 case NORMAL:
848 break;
849 case PENDING_LABEL:
850 if (c == POST_LABEL_MARKER) {
851 state = PENDING_LABEL_POST;
852 return 1;
854 else {
855 output_citation_group(rptr, count, type, fp);
856 rptr += count ;
857 rcount -= count;
858 state = NORMAL;
860 break;
861 case PENDING_LABEL_POST:
862 if (c == PRE_LABEL_MARKER) {
863 state = PENDING_LABEL_POST_PRE;
864 return 1;
866 else {
867 output_citation_group(rptr, count, type, fp);
868 rptr += count;
869 rcount -= count;
870 put_string(post_label, fp);
871 state = NORMAL;
873 break;
874 case PENDING_LABEL_POST_PRE:
875 if (c >= LABEL_MARKER
876 && c < LABEL_MARKER + N_LABEL_TYPES
877 && c - LABEL_MARKER == type) {
878 count += 1;
879 state = PENDING_LABEL;
880 return 1;
882 else {
883 output_citation_group(rptr, count, type, fp);
884 rptr += count;
885 rcount -= count;
886 put_string(sep_label, fp);
887 state = NORMAL;
889 break;
890 case PENDING_POST:
891 if (c == PRE_LABEL_MARKER) {
892 put_string(sep_label, fp);
893 state = NORMAL;
894 return 1;
896 else {
897 put_string(post_label, fp);
898 state = NORMAL;
900 break;
902 return 0;
905 void label_processing_state::process(int c)
907 if (handle_pending(c))
908 return;
909 assert(state == NORMAL);
910 switch (c) {
911 case PRE_LABEL_MARKER:
912 put_string(pre_label, fp);
913 state = NORMAL;
914 break;
915 case POST_LABEL_MARKER:
916 state = PENDING_POST;
917 break;
918 case LABEL_MARKER:
919 case LABEL_MARKER + 1:
920 count = 1;
921 state = PENDING_LABEL;
922 type = label_type(c - LABEL_MARKER);
923 break;
924 default:
925 state = NORMAL;
926 putc(c, fp);
927 break;
931 extern "C" {
933 int rcompare(const void *p1, const void *p2)
935 return compare_reference(**(reference **)p1, **(reference **)p2);
940 void output_references()
942 assert(accumulate);
943 if (!hash_table_size) {
944 if (have_bibliography)
945 error("nothing to reference (probably `bibliography' before `sort')");
946 accumulate = 0;
947 nreferences = 0;
948 return;
950 if (nreferences > 0) {
951 int j = 0;
952 int i;
953 for (i = 0; i < hash_table_size; i++)
954 if (reference_hash_table[i] != 0)
955 reference_hash_table[j++] = reference_hash_table[i];
956 assert(j == nreferences);
957 for (; j < hash_table_size; j++)
958 reference_hash_table[j] = 0;
959 qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
960 for (i = 0; i < nreferences; i++)
961 reference_hash_table[i]->set_number(i);
962 compute_labels(reference_hash_table, nreferences);
964 if (outfp != stdout) {
965 rewind(outfp);
967 label_processing_state state(citation, ncitations, stdout);
968 int c;
969 while ((c = getc(outfp)) != EOF)
970 state.process(c);
972 ncitations = 0;
973 fclose(outfp);
974 outfp = stdout;
976 if (nreferences > 0) {
977 fputs(".]<\n", outfp);
978 for (int i = 0; i < nreferences; i++) {
979 if (sort_fields.length() > 0)
980 reference_hash_table[i]->print_sort_key_comment(outfp);
981 if (label_in_reference) {
982 fputs(".ds [F ", outfp);
983 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
984 if (label.length() > 0
985 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
986 putc('"', outfp);
987 put_string(label, outfp);
988 putc('\n', outfp);
990 reference_hash_table[i]->output(outfp);
991 delete reference_hash_table[i];
992 reference_hash_table[i] = 0;
994 fputs(".]>\n", outfp);
995 nreferences = 0;
997 clear_labels();
1000 static reference *find_reference(const char *query, int query_len)
1002 // This is so that error messages look better.
1003 while (query_len > 0 && csspace(query[query_len - 1]))
1004 query_len--;
1005 string str;
1006 for (int i = 0; i < query_len; i++)
1007 str += query[i] == '\n' ? ' ' : query[i];
1008 str += '\0';
1009 possibly_load_default_database();
1010 search_list_iterator iter(&database_list, str.contents());
1011 reference_id rid;
1012 const char *start;
1013 int len;
1014 if (!iter.next(&start, &len, &rid)) {
1015 error("no matches for `%1'", str.contents());
1016 return 0;
1018 const char *end = start + len;
1019 while (start < end) {
1020 if (*start == '%')
1021 break;
1022 while (start < end && *start++ != '\n')
1025 if (start >= end) {
1026 error("found a reference for `%1' but it didn't contain any fields",
1027 str.contents());
1028 return 0;
1030 reference *result = new reference(start, end - start, &rid);
1031 if (iter.next(&start, &len, &rid))
1032 warning("multiple matches for `%1'", str.contents());
1033 return result;
1036 static reference *make_reference(const string &str, unsigned *flagsp)
1038 const char *start = str.contents();
1039 const char *end = start + str.length();
1040 const char *ptr = start;
1041 while (ptr < end) {
1042 if (*ptr == '%')
1043 break;
1044 while (ptr < end && *ptr++ != '\n')
1047 *flagsp = 0;
1048 for (; start < ptr; start++) {
1049 if (*start == '#')
1050 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1051 | FORCE_LEFT_BRACKET)));
1052 else if (*start == '[')
1053 *flagsp |= FORCE_LEFT_BRACKET;
1054 else if (*start == ']')
1055 *flagsp |= FORCE_RIGHT_BRACKET;
1056 else if (!csspace(*start))
1057 break;
1059 if (start >= end) {
1060 error("empty reference");
1061 return new reference;
1063 reference *database_ref = 0;
1064 if (start < ptr)
1065 database_ref = find_reference(start, ptr - start);
1066 reference *inline_ref = 0;
1067 if (ptr < end)
1068 inline_ref = new reference(ptr, end - ptr);
1069 if (inline_ref) {
1070 if (database_ref) {
1071 database_ref->merge(*inline_ref);
1072 delete inline_ref;
1073 return database_ref;
1075 else
1076 return inline_ref;
1078 else if (database_ref)
1079 return database_ref;
1080 else
1081 return new reference;
1084 static void do_ref(const string &str)
1086 if (accumulate)
1087 (void)store_reference(str);
1088 else {
1089 (void)immediately_handle_reference(str);
1090 immediately_output_references();
1094 static void trim_blanks(string &str)
1096 const char *start = str.contents();
1097 const char *end = start + str.length();
1098 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1099 --end;
1100 str.set_length(end - start);
1103 void do_bib(const char *filename)
1105 FILE *fp;
1106 if (strcmp(filename, "-") == 0)
1107 fp = stdin;
1108 else {
1109 errno = 0;
1110 fp = fopen(filename, "r");
1111 if (fp == 0) {
1112 error("can't open `%1': %2", filename, strerror(errno));
1113 return;
1115 current_filename = filename;
1117 enum {
1118 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1119 } state = START;
1120 string body;
1121 for (;;) {
1122 int c = getc(fp);
1123 if (c == EOF)
1124 break;
1125 if (invalid_input_char(c)) {
1126 error("invalid input character code %1", c);
1127 continue;
1129 switch (state) {
1130 case START:
1131 if (c == '%') {
1132 body = c;
1133 state = BODY;
1135 else if (c != '\n')
1136 state = MIDDLE;
1137 break;
1138 case MIDDLE:
1139 if (c == '\n')
1140 state = START;
1141 break;
1142 case BODY:
1143 body += c;
1144 if (c == '\n')
1145 state = BODY_START;
1146 break;
1147 case BODY_START:
1148 if (c == '\n') {
1149 do_ref(body);
1150 state = START;
1152 else if (c == '.')
1153 state = BODY_DOT;
1154 else if (csspace(c)) {
1155 state = BODY_BLANK;
1156 body += c;
1158 else {
1159 body += c;
1160 state = BODY;
1162 break;
1163 case BODY_BLANK:
1164 if (c == '\n') {
1165 trim_blanks(body);
1166 do_ref(body);
1167 state = START;
1169 else if (csspace(c))
1170 body += c;
1171 else {
1172 body += c;
1173 state = BODY;
1175 break;
1176 case BODY_DOT:
1177 if (c == ']') {
1178 do_ref(body);
1179 state = MIDDLE;
1181 else {
1182 body += '.';
1183 body += c;
1184 state = c == '\n' ? BODY_START : BODY;
1186 break;
1187 default:
1188 assert(0);
1190 if (c == '\n')
1191 current_lineno++;
1193 switch (state) {
1194 case START:
1195 case MIDDLE:
1196 break;
1197 case BODY:
1198 body += '\n';
1199 do_ref(body);
1200 break;
1201 case BODY_DOT:
1202 case BODY_START:
1203 do_ref(body);
1204 break;
1205 case BODY_BLANK:
1206 trim_blanks(body);
1207 do_ref(body);
1208 break;
1210 fclose(fp);
1213 // from the Dragon Book
1215 unsigned hash_string(const char *s, int len)
1217 const char *end = s + len;
1218 unsigned h = 0, g;
1219 while (s < end) {
1220 h <<= 4;
1221 h += *s++;
1222 if ((g = h & 0xf0000000) != 0) {
1223 h ^= g >> 24;
1224 h ^= g;
1227 return h;
1230 int next_size(int n)
1232 static const int table_sizes[] = {
1233 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1234 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1235 16000057, 32000011, 64000031, 128000003, 0
1238 const int *p;
1239 for (p = table_sizes; *p <= n && *p != 0; p++)
1241 assert(*p != 0);
1242 return *p;