2 * Copyright (c) 2014 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
4 * Copyright (C) 1989 - 1992, 2000 - 2002, 2004, 2006
5 * Free Software Foundation, Inc.
6 * Written by James Clark (jjc@jclark.com)
8 * This is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2, or (at your option) any later
13 * This is distributed in the hope that it will be useful, but WITHOUT ANY
14 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * You should have received a copy of the GNU General Public License along
19 * with groff; see the file COPYING. If not, write to the Free Software
20 * Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
24 #include "refer-config.h"
26 #include "file_case.h"
35 const char PRE_LABEL_MARKER
= '\013';
36 const char POST_LABEL_MARKER
= '\014';
37 const char LABEL_MARKER
= '\015'; // label_type is added on
39 #define FORCE_LEFT_BRACKET 04
40 #define FORCE_RIGHT_BRACKET 010
42 static FILE *outfp
= stdout
;
44 string capitalize_fields
;
45 string reverse_fields
;
46 string abbreviate_fields
;
47 string period_before_last_name
= ". ";
48 string period_before_initial
= ".";
49 string period_before_hyphen
= "";
50 string period_before_other
= ". ";
52 int annotation_field
= -1;
53 string annotation_macro
;
54 string discard_fields
= "XYZ";
55 string pre_label
= "\\*([.";
56 string post_label
= "\\*(.]";
57 string sep_label
= ", ";
58 int have_bibliography
= 0;
60 int move_punctuation
= 0;
61 int abbreviate_label_ranges
= 0;
62 string label_range_indicator
;
63 int label_in_text
= 1;
64 int label_in_reference
= 1;
65 int date_as_label
= 0;
66 int sort_adjacent_labels
= 0;
67 // Join exactly two authors with this.
68 string join_authors_exactly_two
= " and ";
69 // When there are more than two authors join the last two with this.
70 string join_authors_last_two
= ", and ";
71 // Otherwise join authors with this.
72 string join_authors_default
= ", ";
73 string separate_label_second_parts
= ", ";
74 // Use this string to represent that there are other authors.
75 string et_al
= " et al";
76 // Use et al only if it can replace at least this many authors.
77 int et_al_min_elide
= 2;
78 // Use et al only if the total number of authors is at least this.
79 int et_al_min_total
= 3;
81 int compatible_flag
= 0;
83 int short_label_flag
= 0;
85 static int recognize_R1_R2
= 1;
87 search_list database_list
;
88 int search_default
= 1;
89 static int default_database_loaded
= 0;
91 static reference
**citation
= 0;
92 static int ncitations
= 0;
93 static int citation_max
= 0;
95 static reference
**reference_hash_table
= 0;
96 static int hash_table_size
;
97 static int nreferences
= 0;
99 static int need_syncing
= 0;
101 string pending_lf_lines
;
103 static void output_pending_line();
104 static unsigned immediately_handle_reference(const string
&);
105 static void immediately_output_references();
106 static unsigned store_reference(const string
&);
107 static void divert_to_temporary_file();
108 static reference
*make_reference(const string
&, unsigned *);
109 static void usage(FILE *stream
);
110 static void do_file(const char *);
111 static void split_punct(string
&line
, string
&punct
);
112 static void output_citation_group(reference
**v
, int n
, label_type
, FILE *fp
);
113 static void possibly_load_default_database();
115 int main(int argc
, char **argv
)
117 program_name
= argv
[0];
118 static char stderr_buf
[BUFSIZ
];
119 setbuf(stderr
, stderr_buf
);
121 int finished_options
= 0;
126 !finished_options
&& argc
> 0 && argv
[0][0] == '-'
127 && argv
[0][1] != '\0';
129 const char *opt
= argv
[0] + 1;
130 while (opt
!= 0 && *opt
!= '\0') {
138 label_in_reference
= 0;
142 annotation_field
= 'X';
143 annotation_macro
= "AP";
145 else if (csalnum(opt
[0]) && opt
[1] == '.' && opt
[2] != '\0') {
146 annotation_field
= opt
[0];
147 annotation_macro
= opt
+ 2;
152 move_punctuation
= 1;
160 // Not a very useful spec.
161 set_label_spec("(A.n|Q)', '(D.y|D)");
175 if (*++opt
== '\0') {
181 error("option `f' requires an argument");
191 for (ptr
= num
; *ptr
; ptr
++)
192 if (!csdigit(*ptr
)) {
193 error("bad character `%1' in argument to -f option", *ptr
);
201 set_label_spec(spec
.contents());
208 label_in_reference
= 0;
216 capitalize_fields
= ++opt
;
226 error("bad field name `%1'", *opt
++);
240 for (ptr
= ++opt
; *ptr
; ptr
++)
241 if (!csdigit(*ptr
)) {
242 error("argument to `a' option not a number");
246 reverse_fields
= 'A';
247 reverse_fields
+= opt
;
253 linear_ignore_fields
= ++opt
;
258 char buf
[INT_DIGITS
*2 + 11]; // A.n+2D.y-3%a
260 if (*++opt
!= '\0' && *opt
!= ',') {
262 long n
= strtol(opt
, &ptr
, 10);
263 if (n
== 0 && ptr
== opt
) {
264 error("bad integer `%1' in `l' option", opt
);
271 sprintf(strchr(buf
, '\0'), "+%ld", n
);
278 long n
= strtol(opt
, &ptr
, 10);
279 if (n
== 0 && ptr
== opt
) {
280 error("bad integer `%1' in `l' option", opt
);
286 sprintf(strchr(buf
, '\0'), "-%ld", n
);
289 error("argument to `l' option not of form `m,n'");
292 if (!set_label_spec(buf
))
303 const char *filename
= 0;
304 if (*++opt
== '\0') {
310 error("option `p' requires an argument");
319 database_list
.add_file(filename
);
334 long n
= strtol(opt
, &ptr
, 10);
335 if (n
== 0 && ptr
== opt
) {
336 error("bad integer `%1' in `t' option", opt
);
342 linear_truncate_len
= int(n
);
347 if (opt
[1] == '\0') {
348 finished_options
= 1;
352 if (strcmp(opt
,"-version")==0) {
354 puts(L_P_REFER
" (" T_ROFF
") v " VERSION
);
358 if (strcmp(opt
,"-help")==0) {
365 error("unrecognized option `%1'", *opt
);
373 set_label_spec("%1");
381 for (int i
= 0; i
< argc
; i
++) {
390 if (fflush(stdout
) < 0)
391 fatal("output error");
395 static void usage(FILE *stream
)
398 "Synopsis: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
399 " [-sXYZ] [-tN] [-BL.M] [files ...]\n",
403 static void possibly_load_default_database()
405 if (search_default
&& !default_database_loaded
) {
406 char *filename
= getenv("REFER");
408 database_list
.add_file(filename
);
410 database_list
.add_file(DEFAULT_INDEX
, 1);
411 default_database_loaded
= 1;
415 static int is_list(const string
&str
)
417 const char *start
= str
.contents();
418 const char *end
= start
+ str
.length();
419 while (end
> start
&& csspace(end
[-1]))
421 while (start
< end
&& csspace(*start
))
423 return end
- start
== 6 && memcmp(start
, "$LIST$", 6) == 0;
426 static void do_file(const char *filename
)
429 if ((fcp
= file_case::muxer(filename
)) == NULL
) {
430 assert(strcmp(filename
, "-"));
431 error("can't open `%1': %2", filename
, strerror(errno
));
435 current_filename
= filename
;
436 fprintf(outfp
, ".lf 1 %s\n", filename
);
442 int c
= fcp
->get_c();
444 if (line
.length() > 0)
448 if (invalid_input_char(c
))
449 error("invalid input character code %1", c
);
456 int len
= line
.length();
460 if (len
>= 2 && line
[0] == '.' && line
[1] == '[') {
461 int start_lineno
= current_lineno
;
462 int start_of_line
= 1;
465 string
pre(line
.contents() + 2, line
.length() - 3);
467 int c
= fcp
->get_c();
469 error_with_file_and_line(current_filename
, start_lineno
,
470 "missing `.]' line");
475 if (start_of_line
&& c
== '.') {
476 int d
= fcp
->get_c();
478 while ((d
= fcp
->get_c()) != '\n' && d
!= EOF
) {
479 if (invalid_input_char(d
))
480 error("invalid input character code %1", d
);
489 if (invalid_input_char(c
))
490 error("invalid input character code %1", c
);
493 start_of_line
= (c
== '\n');
496 output_pending_line();
500 error("found `$LIST$' but not accumulating references");
503 unsigned flags
= (accumulate
504 ? store_reference(str
)
505 : immediately_handle_reference(str
));
507 if (accumulate
&& outfp
== stdout
)
508 divert_to_temporary_file();
509 if (pending_line
.length() == 0) {
510 warning("can't attach citation to previous line");
513 pending_line
.set_length(pending_line
.length() - 1);
515 if (move_punctuation
)
516 split_punct(pending_line
, punct
);
517 int have_text
= pre
.length() > 0 || post
.length() > 0;
518 label_type lt
= label_type(flags
& ~(FORCE_LEFT_BRACKET
519 |FORCE_RIGHT_BRACKET
));
520 if ((flags
& FORCE_LEFT_BRACKET
) || !have_text
)
521 pending_line
+= PRE_LABEL_MARKER
;
523 char lm
= LABEL_MARKER
+ (int)lt
;
525 pending_line
+= post
;
526 if ((flags
& FORCE_RIGHT_BRACKET
) || !have_text
)
527 pending_line
+= POST_LABEL_MARKER
;
528 pending_line
+= punct
;
529 pending_line
+= '\n';
535 && line
[0] == '.' && line
[1] == 'l' && line
[2] == 'f'
536 && (compatible_flag
|| line
[3] == '\n' || line
[3] == ' ')) {
537 pending_lf_lines
+= line
;
539 if (interpret_lf_args(line
.contents() + 3))
542 else if (recognize_R1_R2
544 && line
[0] == '.' && line
[1] == 'R' && line
[2] == '1'
545 && (compatible_flag
|| line
[3] == '\n' || line
[3] == ' ')) {
547 int start_of_line
= 1;
548 int start_lineno
= current_lineno
;
550 int c
= fcp
->get_c();
551 if (c
!= EOF
&& start_of_line
)
553 if (start_of_line
&& c
== '.') {
559 if (compatible_flag
|| c
== ' ' || c
== '\n' || c
== EOF
) {
560 while (c
!= EOF
&& c
!= '\n')
578 error_with_file_and_line(current_filename
, start_lineno
,
579 "missing `.R2' line");
582 if (invalid_input_char(c
))
583 error("invalid input character code %1", int(c
));
586 start_of_line
= c
== '\n';
589 output_pending_line();
594 process_commands(line
, current_filename
, start_lineno
+ 1);
598 output_pending_line();
603 output_pending_line();
608 class label_processing_state
{
613 PENDING_LABEL_POST_PRE
,
616 label_type type
; // type of pending labels
617 int count
; // number of pending labels
618 reference
**rptr
; // pointer to next reference
619 int rcount
; // number of references left
621 int handle_pending(int c
);
623 label_processing_state(reference
**, int, FILE *);
624 ~label_processing_state();
628 static void output_pending_line()
630 if (label_in_text
&& !accumulate
&& ncitations
> 0) {
631 label_processing_state
state(citation
, ncitations
, outfp
);
632 int len
= pending_line
.length();
633 for (int i
= 0; i
< len
; i
++)
634 state
.process((unsigned char)(pending_line
[i
]));
637 put_string(pending_line
, outfp
);
638 pending_line
.clear();
639 if (pending_lf_lines
.length() > 0) {
640 put_string(pending_lf_lines
, outfp
);
641 pending_lf_lines
.clear();
644 immediately_output_references();
646 fprintf(outfp
, ".lf %d %s\n", current_lineno
, current_filename
);
651 static void split_punct(string
&line
, string
&punct
)
653 const char *start
= line
.contents();
654 const char *end
= start
+ line
.length();
655 const char *ptr
= start
;
656 const char *last_token_start
= 0;
660 last_token_start
= ptr
;
661 if (*ptr
== PRE_LABEL_MARKER
|| *ptr
== POST_LABEL_MARKER
662 || (*ptr
>= LABEL_MARKER
&& *ptr
< LABEL_MARKER
+ N_LABEL_TYPES
))
664 else if (!get_token(&ptr
, end
))
667 if (last_token_start
) {
668 const token_info
*ti
= lookup_token(last_token_start
, end
);
669 if (ti
->is_punct()) {
670 punct
.append(last_token_start
, end
- last_token_start
);
671 line
.set_length(last_token_start
- start
);
676 static void divert_to_temporary_file()
681 static void store_citation(reference
*ref
)
683 if (ncitations
>= citation_max
) {
685 citation
= new reference
*[citation_max
= 100];
687 reference
**old_citation
= citation
;
689 citation
= new reference
*[citation_max
];
690 memcpy(citation
, old_citation
, ncitations
*sizeof(reference
*));
691 a_delete old_citation
;
694 citation
[ncitations
++] = ref
;
697 static unsigned store_reference(const string
&str
)
699 if (reference_hash_table
== 0) {
700 reference_hash_table
= new reference
*[17];
701 hash_table_size
= 17;
702 for (int i
= 0; i
< hash_table_size
; i
++)
703 reference_hash_table
[i
] = 0;
706 reference
*ref
= make_reference(str
, &flags
);
707 ref
->compute_hash_code();
708 unsigned h
= ref
->hash();
710 for (ptr
= reference_hash_table
+ (h
% hash_table_size
);
712 ((ptr
== reference_hash_table
)
713 ? (ptr
= reference_hash_table
+ hash_table_size
- 1)
715 if (same_reference(**ptr
, *ref
))
718 if (ref
->is_merged())
719 warning("fields ignored because reference already used");
725 ref
->set_number(nreferences
);
727 ref
->pre_compute_label();
728 ref
->compute_sort_key();
729 if (nreferences
*2 >= hash_table_size
) {
731 reference
**old_table
= reference_hash_table
;
732 int old_size
= hash_table_size
;
733 hash_table_size
= next_size(hash_table_size
);
734 reference_hash_table
= new reference
*[hash_table_size
];
736 for (i
= 0; i
< hash_table_size
; i
++)
737 reference_hash_table
[i
] = 0;
738 for (i
= 0; i
< old_size
; i
++)
741 for (p
= (reference_hash_table
742 + (old_table
[i
]->hash() % hash_table_size
));
744 ((p
== reference_hash_table
)
745 ? (p
= reference_hash_table
+ hash_table_size
- 1)
758 unsigned immediately_handle_reference(const string
&str
)
761 reference
*ref
= make_reference(str
, &flags
);
762 ref
->set_number(nreferences
);
763 if (label_in_text
|| label_in_reference
) {
764 ref
->pre_compute_label();
765 ref
->immediate_compute_label();
772 static void immediately_output_references()
774 for (int i
= 0; i
< ncitations
; i
++) {
775 reference
*ref
= citation
[i
];
776 if (label_in_reference
) {
777 fputs(".ds [F ", outfp
);
778 const string
&label
= ref
->get_label(NORMAL_LABEL
);
779 if (label
.length() > 0
780 && (label
[0] == ' ' || label
[0] == '\\' || label
[0] == '"'))
782 put_string(label
, outfp
);
791 static void output_citation_group(reference
**v
, int n
, label_type type
,
794 if (sort_adjacent_labels
) {
795 // Do an insertion sort. Usually n will be very small.
796 for (int i
= 1; i
< n
; i
++) {
797 int num
= v
[i
]->get_number();
798 reference
*temp
= v
[i
];
800 for (j
= i
- 1; j
>= 0 && v
[j
]->get_number() > num
; j
--)
805 // This messes up if !accumulate.
806 if (accumulate
&& n
> 1) {
809 for (int i
= 1; i
< n
; i
++)
810 if (v
[i
]->get_label(type
) != v
[i
- 1]->get_label(type
))
815 for (int i
= 0; i
< n
; i
++) {
816 int nmerged
= v
[i
]->merge_labels(v
+ i
+ 1, n
- i
- 1, type
, merged_label
);
818 put_string(merged_label
, fp
);
822 put_string(v
[i
]->get_label(type
), fp
);
824 put_string(sep_label
, fp
);
828 label_processing_state::label_processing_state(reference
**p
, int n
, FILE *f
)
829 : state(NORMAL
), count(0), rptr(p
), rcount(n
), fp(f
)
833 label_processing_state::~label_processing_state()
835 int handled
= handle_pending(EOF
);
840 int label_processing_state::handle_pending(int c
)
846 if (c
== POST_LABEL_MARKER
) {
847 state
= PENDING_LABEL_POST
;
851 output_citation_group(rptr
, count
, type
, fp
);
857 case PENDING_LABEL_POST
:
858 if (c
== PRE_LABEL_MARKER
) {
859 state
= PENDING_LABEL_POST_PRE
;
863 output_citation_group(rptr
, count
, type
, fp
);
866 put_string(post_label
, fp
);
870 case PENDING_LABEL_POST_PRE
:
871 if (c
>= LABEL_MARKER
872 && c
< LABEL_MARKER
+ N_LABEL_TYPES
873 && c
- LABEL_MARKER
== type
) {
875 state
= PENDING_LABEL
;
879 output_citation_group(rptr
, count
, type
, fp
);
882 put_string(sep_label
, fp
);
887 if (c
== PRE_LABEL_MARKER
) {
888 put_string(sep_label
, fp
);
893 put_string(post_label
, fp
);
901 void label_processing_state::process(int c
)
903 if (handle_pending(c
))
905 assert(state
== NORMAL
);
907 case PRE_LABEL_MARKER
:
908 put_string(pre_label
, fp
);
911 case POST_LABEL_MARKER
:
912 state
= PENDING_POST
;
915 case LABEL_MARKER
+ 1:
917 state
= PENDING_LABEL
;
918 type
= label_type(c
- LABEL_MARKER
);
927 static int rcompare(const void *p1
, const void *p2
)
929 return compare_reference(**(reference
**)p1
, **(reference
**)p2
);
932 void output_references()
935 if (!hash_table_size
) {
936 if (have_bibliography
)
937 error("nothing to reference (probably `bibliography' before `sort')");
942 if (nreferences
> 0) {
945 for (i
= 0; i
< hash_table_size
; i
++)
946 if (reference_hash_table
[i
] != 0)
947 reference_hash_table
[j
++] = reference_hash_table
[i
];
948 assert(j
== nreferences
);
949 for (; j
< hash_table_size
; j
++)
950 reference_hash_table
[j
] = 0;
951 qsort(reference_hash_table
, nreferences
, sizeof(reference
*), &rcompare
);
952 for (i
= 0; i
< nreferences
; i
++)
953 reference_hash_table
[i
]->set_number(i
);
954 compute_labels(reference_hash_table
, nreferences
);
956 if (outfp
!= stdout
) {
959 label_processing_state
state(citation
, ncitations
, stdout
);
961 while ((c
= getc(outfp
)) != EOF
)
968 if (nreferences
> 0) {
969 fputs(".]<\n", outfp
);
970 for (int i
= 0; i
< nreferences
; i
++) {
971 if (sort_fields
.length() > 0)
972 reference_hash_table
[i
]->print_sort_key_comment(outfp
);
973 if (label_in_reference
) {
974 fputs(".ds [F ", outfp
);
975 const string
&label
= reference_hash_table
[i
]->get_label(NORMAL_LABEL
);
976 if (label
.length() > 0
977 && (label
[0] == ' ' || label
[0] == '\\' || label
[0] == '"'))
979 put_string(label
, outfp
);
982 reference_hash_table
[i
]->output(outfp
);
983 delete reference_hash_table
[i
];
984 reference_hash_table
[i
] = 0;
986 fputs(".]>\n", outfp
);
992 static reference
*find_reference(const char *query
, int query_len
)
994 // This is so that error messages look better.
995 while (query_len
> 0 && csspace(query
[query_len
- 1]))
998 for (int i
= 0; i
< query_len
; i
++)
999 str
+= query
[i
] == '\n' ? ' ' : query
[i
];
1001 possibly_load_default_database();
1002 search_list_iterator
iter(&database_list
, str
.contents());
1006 if (!iter
.next(&start
, &len
, &rid
)) {
1007 error("no matches for `%1'", str
.contents());
1010 const char *end
= start
+ len
;
1011 while (start
< end
) {
1014 while (start
< end
&& *start
++ != '\n')
1018 error("found a reference for `%1' but it didn't contain any fields",
1022 reference
*result
= new reference(start
, end
- start
, &rid
);
1023 if (iter
.next(&start
, &len
, &rid
))
1024 warning("multiple matches for `%1'", str
.contents());
1028 static reference
*make_reference(const string
&str
, unsigned *flagsp
)
1030 const char *start
= str
.contents();
1031 const char *end
= start
+ str
.length();
1032 const char *ptr
= start
;
1036 while (ptr
< end
&& *ptr
++ != '\n')
1040 for (; start
< ptr
; start
++) {
1042 *flagsp
= (SHORT_LABEL
| (*flagsp
& (FORCE_RIGHT_BRACKET
1043 | FORCE_LEFT_BRACKET
)));
1044 else if (*start
== '[')
1045 *flagsp
|= FORCE_LEFT_BRACKET
;
1046 else if (*start
== ']')
1047 *flagsp
|= FORCE_RIGHT_BRACKET
;
1048 else if (!csspace(*start
))
1052 error("empty reference");
1053 return new reference
;
1055 reference
*database_ref
= 0;
1057 database_ref
= find_reference(start
, ptr
- start
);
1058 reference
*inline_ref
= 0;
1060 inline_ref
= new reference(ptr
, end
- ptr
);
1063 database_ref
->merge(*inline_ref
);
1065 return database_ref
;
1070 else if (database_ref
)
1071 return database_ref
;
1073 return new reference
;
1076 static void do_ref(const string
&str
)
1079 (void)store_reference(str
);
1081 (void)immediately_handle_reference(str
);
1082 immediately_output_references();
1086 static void trim_blanks(string
&str
)
1088 const char *start
= str
.contents();
1089 const char *end
= start
+ str
.length();
1090 while (end
> start
&& end
[-1] != '\n' && csspace(end
[-1]))
1092 str
.set_length(end
- start
);
1095 void do_bib(const char *filename
)
1098 if (strcmp(filename
, "-") == 0)
1102 fp
= fopen(filename
, "r");
1104 error("can't open `%1': %2", filename
, strerror(errno
));
1107 current_filename
= filename
;
1110 START
, MIDDLE
, BODY
, BODY_START
, BODY_BLANK
, BODY_DOT
1117 if (invalid_input_char(c
)) {
1118 error("invalid input character code %1", c
);
1146 else if (csspace(c
)) {
1161 else if (csspace(c
))
1176 state
= c
== '\n' ? BODY_START
: BODY
;
1205 // from the Dragon Book
1207 unsigned hash_string(const char *s
, int len
) // FIXME Torek's hash (lib-roff!)
1209 const char *end
= s
+ len
;
1214 if ((g
= h
& 0xf0000000) != 0) {
1222 int next_size(int n
) // FIXME PRIME LOOKUP -> lib-roff!!
1224 static const int table_sizes
[] = {
1225 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
1226 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
1227 16000057, 32000011, 64000031, 128000003, 0
1231 for (p
= table_sizes
; *p
<= n
&& *p
!= 0; p
++)