Sync-to-go: update copyright for 2015
[s-roff.git] / src / pre-refer / refer.cpp
blob14204388041ca19182dca15b99a08cba4498f925
1 /*@
2 * Copyright (c) 2014 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
4 * Copyright (C) 1989 - 1992, 2000 - 2002, 2004, 2006
5 * Free Software Foundation, Inc.
6 * Written by James Clark (jjc@jclark.com)
8 * This is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2, or (at your option) any later
11 * version.
13 * This is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 * for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with groff; see the file COPYING. If not, write to the Free Software
20 * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
23 #include "config.h"
24 #include "refer-config.h"
26 #include "file_case.h"
27 #include "refid.h"
28 #include "search.h"
30 #include "command.h"
31 #include "ref.h"
32 #include "refer.h"
33 #include "token.h"
35 const char PRE_LABEL_MARKER = '\013';
36 const char POST_LABEL_MARKER = '\014';
37 const char LABEL_MARKER = '\015'; // label_type is added on
39 #define FORCE_LEFT_BRACKET 04
40 #define FORCE_RIGHT_BRACKET 010
42 static FILE *outfp = stdout;
44 string capitalize_fields;
45 string reverse_fields;
46 string abbreviate_fields;
47 string period_before_last_name = ". ";
48 string period_before_initial = ".";
49 string period_before_hyphen = "";
50 string period_before_other = ". ";
51 string sort_fields;
52 int annotation_field = -1;
53 string annotation_macro;
54 string discard_fields = "XYZ";
55 string pre_label = "\\*([.";
56 string post_label = "\\*(.]";
57 string sep_label = ", ";
58 int have_bibliography = 0;
59 int accumulate = 0;
60 int move_punctuation = 0;
61 int abbreviate_label_ranges = 0;
62 string label_range_indicator;
63 int label_in_text = 1;
64 int label_in_reference = 1;
65 int date_as_label = 0;
66 int sort_adjacent_labels = 0;
67 // Join exactly two authors with this.
68 string join_authors_exactly_two = " and ";
69 // When there are more than two authors join the last two with this.
70 string join_authors_last_two = ", and ";
71 // Otherwise join authors with this.
72 string join_authors_default = ", ";
73 string separate_label_second_parts = ", ";
74 // Use this string to represent that there are other authors.
75 string et_al = " et al";
76 // Use et al only if it can replace at least this many authors.
77 int et_al_min_elide = 2;
78 // Use et al only if the total number of authors is at least this.
79 int et_al_min_total = 3;
81 int compatible_flag = 0;
83 int short_label_flag = 0;
85 static int recognize_R1_R2 = 1;
87 search_list database_list;
88 int search_default = 1;
89 static int default_database_loaded = 0;
91 static reference **citation = 0;
92 static int ncitations = 0;
93 static int citation_max = 0;
95 static reference **reference_hash_table = 0;
96 static int hash_table_size;
97 static int nreferences = 0;
99 static int need_syncing = 0;
100 string pending_line;
101 string pending_lf_lines;
103 static void output_pending_line();
104 static unsigned immediately_handle_reference(const string &);
105 static void immediately_output_references();
106 static unsigned store_reference(const string &);
107 static void divert_to_temporary_file();
108 static reference *make_reference(const string &, unsigned *);
109 static void usage(FILE *stream);
110 static void do_file(const char *);
111 static void split_punct(string &line, string &punct);
112 static void output_citation_group(reference **v, int n, label_type, FILE *fp);
113 static void possibly_load_default_database();
115 int main(int argc, char **argv)
117 program_name = argv[0];
118 static char stderr_buf[BUFSIZ];
119 setbuf(stderr, stderr_buf);
120 outfp = stdout;
121 int finished_options = 0;
122 int bib_flag = 0;
123 int done_spec = 0;
125 for (--argc, ++argv;
126 !finished_options && argc > 0 && argv[0][0] == '-'
127 && argv[0][1] != '\0';
128 argv++, argc--) {
129 const char *opt = argv[0] + 1;
130 while (opt != 0 && *opt != '\0') {
131 switch (*opt) {
132 case 'C':
133 compatible_flag = 1;
134 opt++;
135 break;
136 case 'B':
137 bib_flag = 1;
138 label_in_reference = 0;
139 label_in_text = 0;
140 ++opt;
141 if (*opt == '\0') {
142 annotation_field = 'X';
143 annotation_macro = "AP";
145 else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
146 annotation_field = opt[0];
147 annotation_macro = opt + 2;
149 opt = 0;
150 break;
151 case 'P':
152 move_punctuation = 1;
153 opt++;
154 break;
155 case 'R':
156 recognize_R1_R2 = 0;
157 opt++;
158 break;
159 case 'S':
160 // Not a very useful spec.
161 set_label_spec("(A.n|Q)', '(D.y|D)");
162 done_spec = 1;
163 pre_label = " (";
164 post_label = ")";
165 sep_label = "; ";
166 opt++;
167 break;
168 case 'V':
169 verify_flag = 1;
170 opt++;
171 break;
172 case 'f':
174 const char *num = 0;
175 if (*++opt == '\0') {
176 if (argc > 1) {
177 num = *++argv;
178 --argc;
180 else {
181 error("option `f' requires an argument");
182 usage(stderr);
183 exit(1);
186 else {
187 num = opt;
188 opt = 0;
190 const char *ptr;
191 for (ptr = num; *ptr; ptr++)
192 if (!csdigit(*ptr)) {
193 error("bad character `%1' in argument to -f option", *ptr);
194 break;
196 if (*ptr == '\0') {
197 string spec;
198 spec = '%';
199 spec += num;
200 spec += '\0';
201 set_label_spec(spec.contents());
202 done_spec = 1;
204 break;
206 case 'b':
207 label_in_text = 0;
208 label_in_reference = 0;
209 opt++;
210 break;
211 case 'e':
212 accumulate = 1;
213 opt++;
214 break;
215 case 'c':
216 capitalize_fields = ++opt;
217 opt = 0;
218 break;
219 case 'k':
221 char buf[5];
222 if (csalpha(*++opt))
223 buf[0] = *opt++;
224 else {
225 if (*opt != '\0')
226 error("bad field name `%1'", *opt++);
227 buf[0] = 'L';
229 buf[1] = '~';
230 buf[2] = '%';
231 buf[3] = 'a';
232 buf[4] = '\0';
233 set_label_spec(buf);
234 done_spec = 1;
236 break;
237 case 'a':
239 const char *ptr;
240 for (ptr = ++opt; *ptr; ptr++)
241 if (!csdigit(*ptr)) {
242 error("argument to `a' option not a number");
243 break;
245 if (*ptr == '\0') {
246 reverse_fields = 'A';
247 reverse_fields += opt;
249 opt = 0;
251 break;
252 case 'i':
253 linear_ignore_fields = ++opt;
254 opt = 0;
255 break;
256 case 'l':
258 char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
259 strcpy(buf, "A.n");
260 if (*++opt != '\0' && *opt != ',') {
261 char *ptr;
262 long n = strtol(opt, &ptr, 10);
263 if (n == 0 && ptr == opt) {
264 error("bad integer `%1' in `l' option", opt);
265 opt = 0;
266 break;
268 if (n < 0)
269 n = 0;
270 opt = ptr;
271 sprintf(strchr(buf, '\0'), "+%ld", n);
273 strcat(buf, "D.y");
274 if (*opt == ',')
275 opt++;
276 if (*opt != '\0') {
277 char *ptr;
278 long n = strtol(opt, &ptr, 10);
279 if (n == 0 && ptr == opt) {
280 error("bad integer `%1' in `l' option", opt);
281 opt = 0;
282 break;
284 if (n < 0)
285 n = 0;
286 sprintf(strchr(buf, '\0'), "-%ld", n);
287 opt = ptr;
288 if (*opt != '\0')
289 error("argument to `l' option not of form `m,n'");
291 strcat(buf, "%a");
292 if (!set_label_spec(buf))
293 assert(0);
294 done_spec = 1;
296 break;
297 case 'n':
298 search_default = 0;
299 opt++;
300 break;
301 case 'p':
303 const char *filename = 0;
304 if (*++opt == '\0') {
305 if (argc > 1) {
306 filename = *++argv;
307 argc--;
309 else {
310 error("option `p' requires an argument");
311 usage(stderr);
312 exit(1);
315 else {
316 filename = opt;
317 opt = 0;
319 database_list.add_file(filename);
321 break;
322 case 's':
323 if (*++opt == '\0')
324 sort_fields = "AD";
325 else {
326 sort_fields = opt;
327 opt = 0;
329 accumulate = 1;
330 break;
331 case 't':
333 char *ptr;
334 long n = strtol(opt, &ptr, 10);
335 if (n == 0 && ptr == opt) {
336 error("bad integer `%1' in `t' option", opt);
337 opt = 0;
338 break;
340 if (n < 1)
341 n = 1;
342 linear_truncate_len = int(n);
343 opt = ptr;
344 break;
346 case '-':
347 if (opt[1] == '\0') {
348 finished_options = 1;
349 opt++;
350 break;
352 if (strcmp(opt,"-version")==0) {
353 case 'v':
354 puts(L_P_REFER " (" T_ROFF ") v " VERSION);
355 exit(0);
356 break;
358 if (strcmp(opt,"-help")==0) {
359 usage(stdout);
360 exit(0);
361 break;
363 // fall through
364 default:
365 error("unrecognized option `%1'", *opt);
366 usage(stderr);
367 exit(1);
368 break;
372 if (!done_spec)
373 set_label_spec("%1");
374 if (argc <= 0) {
375 if (bib_flag)
376 do_bib("-");
377 else
378 do_file("-");
380 else {
381 for (int i = 0; i < argc; i++) {
382 if (bib_flag)
383 do_bib(argv[i]);
384 else
385 do_file(argv[i]);
388 if (accumulate)
389 output_references();
390 if (fflush(stdout) < 0)
391 fatal("output error");
392 return 0;
395 static void usage(FILE *stream)
397 fprintf(stream,
398 "Synopsis: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
399 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
400 program_name);
403 static void possibly_load_default_database()
405 if (search_default && !default_database_loaded) {
406 char *filename = getenv("REFER");
407 if (filename)
408 database_list.add_file(filename);
409 else
410 database_list.add_file(DEFAULT_INDEX, 1);
411 default_database_loaded = 1;
415 static int is_list(const string &str)
417 const char *start = str.contents();
418 const char *end = start + str.length();
419 while (end > start && csspace(end[-1]))
420 end--;
421 while (start < end && csspace(*start))
422 start++;
423 return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
426 static void do_file(const char *filename)
428 file_case *fcp;
429 if ((fcp = file_case::muxer(filename)) == NULL) {
430 assert(strcmp(filename, "-"));
431 error("can't open `%1': %2", filename, strerror(errno));
432 return;
435 current_filename = filename;
436 fprintf(outfp, ".lf 1 %s\n", filename);
437 string line;
438 current_lineno = 0;
439 for (;;) {
440 line.clear();
441 for (;;) {
442 int c = fcp->get_c();
443 if (c == EOF) {
444 if (line.length() > 0)
445 line += '\n';
446 break;
448 if (invalid_input_char(c))
449 error("invalid input character code %1", c);
450 else {
451 line += c;
452 if (c == '\n')
453 break;
456 int len = line.length();
457 if (len == 0)
458 break;
459 current_lineno++;
460 if (len >= 2 && line[0] == '.' && line[1] == '[') {
461 int start_lineno = current_lineno;
462 int start_of_line = 1;
463 string str;
464 string post;
465 string pre(line.contents() + 2, line.length() - 3);
466 for (;;) {
467 int c = fcp->get_c();
468 if (c == EOF) {
469 error_with_file_and_line(current_filename, start_lineno,
470 "missing `.]' line");
471 break;
473 if (start_of_line)
474 current_lineno++;
475 if (start_of_line && c == '.') {
476 int d = fcp->get_c();
477 if (d == ']') {
478 while ((d = fcp->get_c()) != '\n' && d != EOF) {
479 if (invalid_input_char(d))
480 error("invalid input character code %1", d);
481 else
482 post += d;
484 break;
486 if (d != EOF)
487 fcp->unget_c(d);
489 if (invalid_input_char(c))
490 error("invalid input character code %1", c);
491 else
492 str += c;
493 start_of_line = (c == '\n');
495 if (is_list(str)) {
496 output_pending_line();
497 if (accumulate)
498 output_references();
499 else
500 error("found `$LIST$' but not accumulating references");
502 else {
503 unsigned flags = (accumulate
504 ? store_reference(str)
505 : immediately_handle_reference(str));
506 if (label_in_text) {
507 if (accumulate && outfp == stdout)
508 divert_to_temporary_file();
509 if (pending_line.length() == 0) {
510 warning("can't attach citation to previous line");
512 else
513 pending_line.set_length(pending_line.length() - 1);
514 string punct;
515 if (move_punctuation)
516 split_punct(pending_line, punct);
517 int have_text = pre.length() > 0 || post.length() > 0;
518 label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
519 |FORCE_RIGHT_BRACKET));
520 if ((flags & FORCE_LEFT_BRACKET) || !have_text)
521 pending_line += PRE_LABEL_MARKER;
522 pending_line += pre;
523 char lm = LABEL_MARKER + (int)lt;
524 pending_line += lm;
525 pending_line += post;
526 if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
527 pending_line += POST_LABEL_MARKER;
528 pending_line += punct;
529 pending_line += '\n';
532 need_syncing = 1;
534 else if (len >= 4
535 && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
536 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
537 pending_lf_lines += line;
538 line += '\0';
539 if (interpret_lf_args(line.contents() + 3))
540 current_lineno--;
542 else if (recognize_R1_R2
543 && len >= 4
544 && line[0] == '.' && line[1] == 'R' && line[2] == '1'
545 && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
546 line.clear();
547 int start_of_line = 1;
548 int start_lineno = current_lineno;
549 for (;;) {
550 int c = fcp->get_c();
551 if (c != EOF && start_of_line)
552 current_lineno++;
553 if (start_of_line && c == '.') {
554 c = fcp->get_c();
555 if (c == 'R') {
556 c = fcp->get_c();
557 if (c == '2') {
558 c = fcp->get_c();
559 if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
560 while (c != EOF && c != '\n')
561 c = fcp->get_c();
562 break;
563 } else {
564 line += '.';
565 line += 'R';
566 line += '2';
569 else {
570 line += '.';
571 line += 'R';
574 else
575 line += '.';
577 if (c == EOF) {
578 error_with_file_and_line(current_filename, start_lineno,
579 "missing `.R2' line");
580 break;
582 if (invalid_input_char(c))
583 error("invalid input character code %1", int(c));
584 else {
585 line += c;
586 start_of_line = c == '\n';
589 output_pending_line();
590 if (accumulate)
591 output_references();
592 else
593 nreferences = 0;
594 process_commands(line, current_filename, start_lineno + 1);
595 need_syncing = 1;
597 else {
598 output_pending_line();
599 pending_line = line;
602 need_syncing = 0;
603 output_pending_line();
605 delete fcp;
608 class label_processing_state {
609 enum {
610 NORMAL,
611 PENDING_LABEL,
612 PENDING_LABEL_POST,
613 PENDING_LABEL_POST_PRE,
614 PENDING_POST
615 } state;
616 label_type type; // type of pending labels
617 int count; // number of pending labels
618 reference **rptr; // pointer to next reference
619 int rcount; // number of references left
620 FILE *fp;
621 int handle_pending(int c);
622 public:
623 label_processing_state(reference **, int, FILE *);
624 ~label_processing_state();
625 void process(int c);
628 static void output_pending_line()
630 if (label_in_text && !accumulate && ncitations > 0) {
631 label_processing_state state(citation, ncitations, outfp);
632 int len = pending_line.length();
633 for (int i = 0; i < len; i++)
634 state.process((unsigned char)(pending_line[i]));
636 else
637 put_string(pending_line, outfp);
638 pending_line.clear();
639 if (pending_lf_lines.length() > 0) {
640 put_string(pending_lf_lines, outfp);
641 pending_lf_lines.clear();
643 if (!accumulate)
644 immediately_output_references();
645 if (need_syncing) {
646 fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
647 need_syncing = 0;
651 static void split_punct(string &line, string &punct)
653 const char *start = line.contents();
654 const char *end = start + line.length();
655 const char *ptr = start;
656 const char *last_token_start = 0;
657 for (;;) {
658 if (ptr >= end)
659 break;
660 last_token_start = ptr;
661 if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
662 || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
663 ptr++;
664 else if (!get_token(&ptr, end))
665 break;
667 if (last_token_start) {
668 const token_info *ti = lookup_token(last_token_start, end);
669 if (ti->is_punct()) {
670 punct.append(last_token_start, end - last_token_start);
671 line.set_length(last_token_start - start);
676 static void divert_to_temporary_file()
678 outfp = xtmpfile();
681 static void store_citation(reference *ref)
683 if (ncitations >= citation_max) {
684 if (citation == 0)
685 citation = new reference*[citation_max = 100];
686 else {
687 reference **old_citation = citation;
688 citation_max *= 2;
689 citation = new reference *[citation_max];
690 memcpy(citation, old_citation, ncitations*sizeof(reference *));
691 a_delete old_citation;
694 citation[ncitations++] = ref;
697 static unsigned store_reference(const string &str)
699 if (reference_hash_table == 0) {
700 reference_hash_table = new reference *[17];
701 hash_table_size = 17;
702 for (int i = 0; i < hash_table_size; i++)
703 reference_hash_table[i] = 0;
705 unsigned flags;
706 reference *ref = make_reference(str, &flags);
707 ref->compute_hash_code();
708 unsigned h = ref->hash();
709 reference **ptr;
710 for (ptr = reference_hash_table + (h % hash_table_size);
711 *ptr != 0;
712 ((ptr == reference_hash_table)
713 ? (ptr = reference_hash_table + hash_table_size - 1)
714 : --ptr))
715 if (same_reference(**ptr, *ref))
716 break;
717 if (*ptr != 0) {
718 if (ref->is_merged())
719 warning("fields ignored because reference already used");
720 delete ref;
721 ref = *ptr;
723 else {
724 *ptr = ref;
725 ref->set_number(nreferences);
726 nreferences++;
727 ref->pre_compute_label();
728 ref->compute_sort_key();
729 if (nreferences*2 >= hash_table_size) {
730 // Rehash it.
731 reference **old_table = reference_hash_table;
732 int old_size = hash_table_size;
733 hash_table_size = next_size(hash_table_size);
734 reference_hash_table = new reference*[hash_table_size];
735 int i;
736 for (i = 0; i < hash_table_size; i++)
737 reference_hash_table[i] = 0;
738 for (i = 0; i < old_size; i++)
739 if (old_table[i]) {
740 reference **p;
741 for (p = (reference_hash_table
742 + (old_table[i]->hash() % hash_table_size));
744 ((p == reference_hash_table)
745 ? (p = reference_hash_table + hash_table_size - 1)
746 : --p))
748 *p = old_table[i];
750 a_delete old_table;
753 if (label_in_text)
754 store_citation(ref);
755 return flags;
758 unsigned immediately_handle_reference(const string &str)
760 unsigned flags;
761 reference *ref = make_reference(str, &flags);
762 ref->set_number(nreferences);
763 if (label_in_text || label_in_reference) {
764 ref->pre_compute_label();
765 ref->immediate_compute_label();
767 nreferences++;
768 store_citation(ref);
769 return flags;
772 static void immediately_output_references()
774 for (int i = 0; i < ncitations; i++) {
775 reference *ref = citation[i];
776 if (label_in_reference) {
777 fputs(".ds [F ", outfp);
778 const string &label = ref->get_label(NORMAL_LABEL);
779 if (label.length() > 0
780 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
781 putc('"', outfp);
782 put_string(label, outfp);
783 putc('\n', outfp);
785 ref->output(outfp);
786 delete ref;
788 ncitations = 0;
791 static void output_citation_group(reference **v, int n, label_type type,
792 FILE *fp)
794 if (sort_adjacent_labels) {
795 // Do an insertion sort. Usually n will be very small.
796 for (int i = 1; i < n; i++) {
797 int num = v[i]->get_number();
798 reference *temp = v[i];
799 int j;
800 for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
801 v[j + 1] = v[j];
802 v[j + 1] = temp;
805 // This messes up if !accumulate.
806 if (accumulate && n > 1) {
807 // remove duplicates
808 int j = 1;
809 for (int i = 1; i < n; i++)
810 if (v[i]->get_label(type) != v[i - 1]->get_label(type))
811 v[j++] = v[i];
812 n = j;
814 string merged_label;
815 for (int i = 0; i < n; i++) {
816 int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
817 if (nmerged > 0) {
818 put_string(merged_label, fp);
819 i += nmerged;
821 else
822 put_string(v[i]->get_label(type), fp);
823 if (i < n - 1)
824 put_string(sep_label, fp);
828 label_processing_state::label_processing_state(reference **p, int n, FILE *f)
829 : state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
833 label_processing_state::~label_processing_state()
835 int handled = handle_pending(EOF);
836 assert(!handled);
837 assert(rcount == 0);
840 int label_processing_state::handle_pending(int c)
842 switch (state) {
843 case NORMAL:
844 break;
845 case PENDING_LABEL:
846 if (c == POST_LABEL_MARKER) {
847 state = PENDING_LABEL_POST;
848 return 1;
850 else {
851 output_citation_group(rptr, count, type, fp);
852 rptr += count ;
853 rcount -= count;
854 state = NORMAL;
856 break;
857 case PENDING_LABEL_POST:
858 if (c == PRE_LABEL_MARKER) {
859 state = PENDING_LABEL_POST_PRE;
860 return 1;
862 else {
863 output_citation_group(rptr, count, type, fp);
864 rptr += count;
865 rcount -= count;
866 put_string(post_label, fp);
867 state = NORMAL;
869 break;
870 case PENDING_LABEL_POST_PRE:
871 if (c >= LABEL_MARKER
872 && c < LABEL_MARKER + N_LABEL_TYPES
873 && c - LABEL_MARKER == type) {
874 count += 1;
875 state = PENDING_LABEL;
876 return 1;
878 else {
879 output_citation_group(rptr, count, type, fp);
880 rptr += count;
881 rcount -= count;
882 put_string(sep_label, fp);
883 state = NORMAL;
885 break;
886 case PENDING_POST:
887 if (c == PRE_LABEL_MARKER) {
888 put_string(sep_label, fp);
889 state = NORMAL;
890 return 1;
892 else {
893 put_string(post_label, fp);
894 state = NORMAL;
896 break;
898 return 0;
901 void label_processing_state::process(int c)
903 if (handle_pending(c))
904 return;
905 assert(state == NORMAL);
906 switch (c) {
907 case PRE_LABEL_MARKER:
908 put_string(pre_label, fp);
909 state = NORMAL;
910 break;
911 case POST_LABEL_MARKER:
912 state = PENDING_POST;
913 break;
914 case LABEL_MARKER:
915 case LABEL_MARKER + 1:
916 count = 1;
917 state = PENDING_LABEL;
918 type = label_type(c - LABEL_MARKER);
919 break;
920 default:
921 state = NORMAL;
922 putc(c, fp);
923 break;
927 static int rcompare(const void *p1, const void *p2)
929 return compare_reference(**(reference **)p1, **(reference **)p2);
932 void output_references()
934 assert(accumulate);
935 if (!hash_table_size) {
936 if (have_bibliography)
937 error("nothing to reference (probably `bibliography' before `sort')");
938 accumulate = 0;
939 nreferences = 0;
940 return;
942 if (nreferences > 0) {
943 int j = 0;
944 int i;
945 for (i = 0; i < hash_table_size; i++)
946 if (reference_hash_table[i] != 0)
947 reference_hash_table[j++] = reference_hash_table[i];
948 assert(j == nreferences);
949 for (; j < hash_table_size; j++)
950 reference_hash_table[j] = 0;
951 qsort(reference_hash_table, nreferences, sizeof(reference*), &rcompare);
952 for (i = 0; i < nreferences; i++)
953 reference_hash_table[i]->set_number(i);
954 compute_labels(reference_hash_table, nreferences);
956 if (outfp != stdout) {
957 rewind(outfp);
959 label_processing_state state(citation, ncitations, stdout);
960 int c;
961 while ((c = getc(outfp)) != EOF)
962 state.process(c);
964 ncitations = 0;
965 fclose(outfp);
966 outfp = stdout;
968 if (nreferences > 0) {
969 fputs(".]<\n", outfp);
970 for (int i = 0; i < nreferences; i++) {
971 if (sort_fields.length() > 0)
972 reference_hash_table[i]->print_sort_key_comment(outfp);
973 if (label_in_reference) {
974 fputs(".ds [F ", outfp);
975 const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
976 if (label.length() > 0
977 && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
978 putc('"', outfp);
979 put_string(label, outfp);
980 putc('\n', outfp);
982 reference_hash_table[i]->output(outfp);
983 delete reference_hash_table[i];
984 reference_hash_table[i] = 0;
986 fputs(".]>\n", outfp);
987 nreferences = 0;
989 clear_labels();
992 static reference *find_reference(const char *query, int query_len)
994 // This is so that error messages look better.
995 while (query_len > 0 && csspace(query[query_len - 1]))
996 query_len--;
997 string str;
998 for (int i = 0; i < query_len; i++)
999 str += query[i] == '\n' ? ' ' : query[i];
1000 str += '\0';
1001 possibly_load_default_database();
1002 search_list_iterator iter(&database_list, str.contents());
1003 reference_id rid;
1004 const char *start;
1005 int len;
1006 if (!iter.next(&start, &len, &rid)) {
1007 error("no matches for `%1'", str.contents());
1008 return 0;
1010 const char *end = start + len;
1011 while (start < end) {
1012 if (*start == '%')
1013 break;
1014 while (start < end && *start++ != '\n')
1017 if (start >= end) {
1018 error("found a reference for `%1' but it didn't contain any fields",
1019 str.contents());
1020 return 0;
1022 reference *result = new reference(start, end - start, &rid);
1023 if (iter.next(&start, &len, &rid))
1024 warning("multiple matches for `%1'", str.contents());
1025 return result;
1028 static reference *make_reference(const string &str, unsigned *flagsp)
1030 const char *start = str.contents();
1031 const char *end = start + str.length();
1032 const char *ptr = start;
1033 while (ptr < end) {
1034 if (*ptr == '%')
1035 break;
1036 while (ptr < end && *ptr++ != '\n')
1039 *flagsp = 0;
1040 for (; start < ptr; start++) {
1041 if (*start == '#')
1042 *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
1043 | FORCE_LEFT_BRACKET)));
1044 else if (*start == '[')
1045 *flagsp |= FORCE_LEFT_BRACKET;
1046 else if (*start == ']')
1047 *flagsp |= FORCE_RIGHT_BRACKET;
1048 else if (!csspace(*start))
1049 break;
1051 if (start >= end) {
1052 error("empty reference");
1053 return new reference;
1055 reference *database_ref = 0;
1056 if (start < ptr)
1057 database_ref = find_reference(start, ptr - start);
1058 reference *inline_ref = 0;
1059 if (ptr < end)
1060 inline_ref = new reference(ptr, end - ptr);
1061 if (inline_ref) {
1062 if (database_ref) {
1063 database_ref->merge(*inline_ref);
1064 delete inline_ref;
1065 return database_ref;
1067 else
1068 return inline_ref;
1070 else if (database_ref)
1071 return database_ref;
1072 else
1073 return new reference;
1076 static void do_ref(const string &str)
1078 if (accumulate)
1079 (void)store_reference(str);
1080 else {
1081 (void)immediately_handle_reference(str);
1082 immediately_output_references();
1086 static void trim_blanks(string &str)
1088 const char *start = str.contents();
1089 const char *end = start + str.length();
1090 while (end > start && end[-1] != '\n' && csspace(end[-1]))
1091 --end;
1092 str.set_length(end - start);
1095 void do_bib(const char *filename)
1097 FILE *fp;
1098 if (strcmp(filename, "-") == 0)
1099 fp = stdin;
1100 else {
1101 errno = 0;
1102 fp = fopen(filename, "r");
1103 if (fp == 0) {
1104 error("can't open `%1': %2", filename, strerror(errno));
1105 return;
1107 current_filename = filename;
1109 enum {
1110 START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
1111 } state = START;
1112 string body;
1113 for (;;) {
1114 int c = getc(fp);
1115 if (c == EOF)
1116 break;
1117 if (invalid_input_char(c)) {
1118 error("invalid input character code %1", c);
1119 continue;
1121 switch (state) {
1122 case START:
1123 if (c == '%') {
1124 body = c;
1125 state = BODY;
1127 else if (c != '\n')
1128 state = MIDDLE;
1129 break;
1130 case MIDDLE:
1131 if (c == '\n')
1132 state = START;
1133 break;
1134 case BODY:
1135 body += c;
1136 if (c == '\n')
1137 state = BODY_START;
1138 break;
1139 case BODY_START:
1140 if (c == '\n') {
1141 do_ref(body);
1142 state = START;
1144 else if (c == '.')
1145 state = BODY_DOT;
1146 else if (csspace(c)) {
1147 state = BODY_BLANK;
1148 body += c;
1150 else {
1151 body += c;
1152 state = BODY;
1154 break;
1155 case BODY_BLANK:
1156 if (c == '\n') {
1157 trim_blanks(body);
1158 do_ref(body);
1159 state = START;
1161 else if (csspace(c))
1162 body += c;
1163 else {
1164 body += c;
1165 state = BODY;
1167 break;
1168 case BODY_DOT:
1169 if (c == ']') {
1170 do_ref(body);
1171 state = MIDDLE;
1173 else {
1174 body += '.';
1175 body += c;
1176 state = c == '\n' ? BODY_START : BODY;
1178 break;
1179 default:
1180 assert(0);
1182 if (c == '\n')
1183 current_lineno++;
1185 switch (state) {
1186 case START:
1187 case MIDDLE:
1188 break;
1189 case BODY:
1190 body += '\n';
1191 do_ref(body);
1192 break;
1193 case BODY_DOT:
1194 case BODY_START:
1195 do_ref(body);
1196 break;
1197 case BODY_BLANK:
1198 trim_blanks(body);
1199 do_ref(body);
1200 break;
1202 fclose(fp);
1205 // from the Dragon Book
1207 unsigned hash_string(const char *s, int len) // FIXME Torek's hash (lib-roff!)
1209 const char *end = s + len;
1210 unsigned h = 0, g;
1211 while (s < end) {
1212 h <<= 4;
1213 h += *s++;
1214 if ((g = h & 0xf0000000) != 0) {
1215 h ^= g >> 24;
1216 h ^= g;
1219 return h;
1222 int next_size(int n) // FIXME PRIME LOOKUP -> lib-roff!!
1224 static const int table_sizes[] = {
1225 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1226 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1227 16000057, 32000011, 64000031, 128000003, 0
1230 const int *p;
1231 for (p = table_sizes; *p <= n && *p != 0; p++)
1233 assert(*p != 0);
1234 return *p;
1237 // s-it2-mode