Update to groff 1.19.2.
[dragonfly.git] / contrib / groff-1.19 / src / preproc / refer / label.y
blobd76f95ef3c33929bec85cdb420e2257a3e80d8bf
1 /* -*- C++ -*-
2 Copyright (C) 1989, 1990, 1991, 1992, 2000, 2004
3 Free Software Foundation, Inc.
4 Written by James Clark (jjc@jclark.com)
6 This file is part of groff.
8 groff is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 groff is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License along
19 with groff; see the file COPYING. If not, write to the Free Software
20 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
24 #include "refer.h"
25 #include "refid.h"
26 #include "ref.h"
27 #include "token.h"
29 int yylex();
30 void yyerror(const char *);
31 int yyparse();
33 static const char *format_serial(char c, int n);
35 struct label_info {
36 int start;
37 int length;
38 int count;
39 int total;
40 label_info(const string &);
43 label_info *lookup_label(const string &label);
45 struct expression {
46 enum {
47 // Does the tentative label depend on the reference?
48 CONTAINS_VARIABLE = 01,
49 CONTAINS_STAR = 02,
50 CONTAINS_FORMAT = 04,
51 CONTAINS_AT = 010
53 virtual ~expression() { }
54 virtual void evaluate(int, const reference &, string &,
55 substring_position &) = 0;
56 virtual unsigned analyze() { return 0; }
59 class at_expr : public expression {
60 public:
61 at_expr() { }
62 void evaluate(int, const reference &, string &, substring_position &);
63 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
66 class format_expr : public expression {
67 char type;
68 int width;
69 int first_number;
70 public:
71 format_expr(char c, int w = 0, int f = 1)
72 : type(c), width(w), first_number(f) { }
73 void evaluate(int, const reference &, string &, substring_position &);
74 unsigned analyze() { return CONTAINS_FORMAT; }
77 class field_expr : public expression {
78 int number;
79 char name;
80 public:
81 field_expr(char nm, int num) : number(num), name(nm) { }
82 void evaluate(int, const reference &, string &, substring_position &);
83 unsigned analyze() { return CONTAINS_VARIABLE; }
86 class literal_expr : public expression {
87 string s;
88 public:
89 literal_expr(const char *ptr, int len) : s(ptr, len) { }
90 void evaluate(int, const reference &, string &, substring_position &);
93 class unary_expr : public expression {
94 protected:
95 expression *expr;
96 public:
97 unary_expr(expression *e) : expr(e) { }
98 ~unary_expr() { delete expr; }
99 void evaluate(int, const reference &, string &, substring_position &) = 0;
100 unsigned analyze() { return expr ? expr->analyze() : 0; }
103 // This caches the analysis of an expression.
105 class analyzed_expr : public unary_expr {
106 unsigned flags;
107 public:
108 analyzed_expr(expression *);
109 void evaluate(int, const reference &, string &, substring_position &);
110 unsigned analyze() { return flags; }
113 class star_expr : public unary_expr {
114 public:
115 star_expr(expression *e) : unary_expr(e) { }
116 void evaluate(int, const reference &, string &, substring_position &);
117 unsigned analyze() {
118 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
119 | CONTAINS_STAR);
123 typedef void map_func(const char *, const char *, string &);
125 class map_expr : public unary_expr {
126 map_func *func;
127 public:
128 map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
129 void evaluate(int, const reference &, string &, substring_position &);
132 typedef const char *extractor_func(const char *, const char *, const char **);
134 class extractor_expr : public unary_expr {
135 int part;
136 extractor_func *func;
137 public:
138 enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
139 extractor_expr(expression *e, extractor_func *f, int pt)
140 : unary_expr(e), part(pt), func(f) { }
141 void evaluate(int, const reference &, string &, substring_position &);
144 class truncate_expr : public unary_expr {
145 int n;
146 public:
147 truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
148 void evaluate(int, const reference &, string &, substring_position &);
151 class separator_expr : public unary_expr {
152 public:
153 separator_expr(expression *e) : unary_expr(e) { }
154 void evaluate(int, const reference &, string &, substring_position &);
157 class binary_expr : public expression {
158 protected:
159 expression *expr1;
160 expression *expr2;
161 public:
162 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
163 ~binary_expr() { delete expr1; delete expr2; }
164 void evaluate(int, const reference &, string &, substring_position &) = 0;
165 unsigned analyze() {
166 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
170 class alternative_expr : public binary_expr {
171 public:
172 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
173 void evaluate(int, const reference &, string &, substring_position &);
176 class list_expr : public binary_expr {
177 public:
178 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
179 void evaluate(int, const reference &, string &, substring_position &);
182 class substitute_expr : public binary_expr {
183 public:
184 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
185 void evaluate(int, const reference &, string &, substring_position &);
188 class ternary_expr : public expression {
189 protected:
190 expression *expr1;
191 expression *expr2;
192 expression *expr3;
193 public:
194 ternary_expr(expression *e1, expression *e2, expression *e3)
195 : expr1(e1), expr2(e2), expr3(e3) { }
196 ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
197 void evaluate(int, const reference &, string &, substring_position &) = 0;
198 unsigned analyze() {
199 return ((expr1 ? expr1->analyze() : 0)
200 | (expr2 ? expr2->analyze() : 0)
201 | (expr3 ? expr3->analyze() : 0));
205 class conditional_expr : public ternary_expr {
206 public:
207 conditional_expr(expression *e1, expression *e2, expression *e3)
208 : ternary_expr(e1, e2, e3) { }
209 void evaluate(int, const reference &, string &, substring_position &);
212 static expression *parsed_label = 0;
213 static expression *parsed_date_label = 0;
214 static expression *parsed_short_label = 0;
216 static expression *parse_result;
218 string literals;
222 %union {
223 int num;
224 expression *expr;
225 struct { int ndigits; int val; } dig;
226 struct { int start; int len; } str;
229 /* uppercase or lowercase letter */
230 %token <num> TOKEN_LETTER
231 /* literal characters */
232 %token <str> TOKEN_LITERAL
233 /* digit */
234 %token <num> TOKEN_DIGIT
236 %type <expr> conditional
237 %type <expr> alternative
238 %type <expr> list
239 %type <expr> string
240 %type <expr> substitute
241 %type <expr> optional_conditional
242 %type <num> number
243 %type <dig> digits
244 %type <num> optional_number
245 %type <num> flag
249 expr:
250 optional_conditional
251 { parse_result = ($1 ? new analyzed_expr($1) : 0); }
254 conditional:
255 alternative
256 { $$ = $1; }
257 | alternative '?' optional_conditional ':' conditional
258 { $$ = new conditional_expr($1, $3, $5); }
261 optional_conditional:
262 /* empty */
263 { $$ = 0; }
264 | conditional
265 { $$ = $1; }
268 alternative:
269 list
270 { $$ = $1; }
271 | alternative '|' list
272 { $$ = new alternative_expr($1, $3); }
273 | alternative '&' list
274 { $$ = new conditional_expr($1, $3, 0); }
277 list:
278 substitute
279 { $$ = $1; }
280 | list substitute
281 { $$ = new list_expr($1, $2); }
284 substitute:
285 string
286 { $$ = $1; }
287 | substitute '~' string
288 { $$ = new substitute_expr($1, $3); }
291 string:
293 { $$ = new at_expr; }
294 | TOKEN_LITERAL
296 $$ = new literal_expr(literals.contents() + $1.start,
297 $1.len);
299 | TOKEN_LETTER
300 { $$ = new field_expr($1, 0); }
301 | TOKEN_LETTER number
302 { $$ = new field_expr($1, $2 - 1); }
303 | '%' TOKEN_LETTER
305 switch ($2) {
306 case 'I':
307 case 'i':
308 case 'A':
309 case 'a':
310 $$ = new format_expr($2);
311 break;
312 default:
313 command_error("unrecognized format `%1'", char($2));
314 $$ = new format_expr('a');
315 break;
319 | '%' digits
321 $$ = new format_expr('0', $2.ndigits, $2.val);
323 | string '.' flag TOKEN_LETTER optional_number
325 switch ($4) {
326 case 'l':
327 $$ = new map_expr($1, lowercase);
328 break;
329 case 'u':
330 $$ = new map_expr($1, uppercase);
331 break;
332 case 'c':
333 $$ = new map_expr($1, capitalize);
334 break;
335 case 'r':
336 $$ = new map_expr($1, reverse_name);
337 break;
338 case 'a':
339 $$ = new map_expr($1, abbreviate_name);
340 break;
341 case 'y':
342 $$ = new extractor_expr($1, find_year, $3);
343 break;
344 case 'n':
345 $$ = new extractor_expr($1, find_last_name, $3);
346 break;
347 default:
348 $$ = $1;
349 command_error("unknown function `%1'", char($4));
350 break;
354 | string '+' number
355 { $$ = new truncate_expr($1, $3); }
356 | string '-' number
357 { $$ = new truncate_expr($1, -$3); }
358 | string '*'
359 { $$ = new star_expr($1); }
360 | '(' optional_conditional ')'
361 { $$ = $2; }
362 | '<' optional_conditional '>'
363 { $$ = new separator_expr($2); }
366 optional_number:
367 /* empty */
368 { $$ = -1; }
369 | number
370 { $$ = $1; }
373 number:
374 TOKEN_DIGIT
375 { $$ = $1; }
376 | number TOKEN_DIGIT
377 { $$ = $1*10 + $2; }
380 digits:
381 TOKEN_DIGIT
382 { $$.ndigits = 1; $$.val = $1; }
383 | digits TOKEN_DIGIT
384 { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
388 flag:
389 /* empty */
390 { $$ = 0; }
391 | '+'
392 { $$ = 1; }
393 | '-'
394 { $$ = -1; }
399 /* bison defines const to be empty unless __STDC__ is defined, which it
400 isn't under cfront */
402 #ifdef const
403 #undef const
404 #endif
406 const char *spec_ptr;
407 const char *spec_end;
408 const char *spec_cur;
410 static char uppercase_array[] = {
411 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
412 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
413 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
414 'Y', 'Z',
417 static char lowercase_array[] = {
418 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
419 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
420 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
421 'y', 'z',
424 int yylex()
426 while (spec_ptr < spec_end && csspace(*spec_ptr))
427 spec_ptr++;
428 spec_cur = spec_ptr;
429 if (spec_ptr >= spec_end)
430 return 0;
431 unsigned char c = *spec_ptr++;
432 if (csalpha(c)) {
433 yylval.num = c;
434 return TOKEN_LETTER;
436 if (csdigit(c)) {
437 yylval.num = c - '0';
438 return TOKEN_DIGIT;
440 if (c == '\'') {
441 yylval.str.start = literals.length();
442 for (; spec_ptr < spec_end; spec_ptr++) {
443 if (*spec_ptr == '\'') {
444 if (++spec_ptr < spec_end && *spec_ptr == '\'')
445 literals += '\'';
446 else {
447 yylval.str.len = literals.length() - yylval.str.start;
448 return TOKEN_LITERAL;
451 else
452 literals += *spec_ptr;
454 yylval.str.len = literals.length() - yylval.str.start;
455 return TOKEN_LITERAL;
457 return c;
460 int set_label_spec(const char *label_spec)
462 spec_cur = spec_ptr = label_spec;
463 spec_end = strchr(label_spec, '\0');
464 literals.clear();
465 if (yyparse())
466 return 0;
467 delete parsed_label;
468 parsed_label = parse_result;
469 return 1;
472 int set_date_label_spec(const char *label_spec)
474 spec_cur = spec_ptr = label_spec;
475 spec_end = strchr(label_spec, '\0');
476 literals.clear();
477 if (yyparse())
478 return 0;
479 delete parsed_date_label;
480 parsed_date_label = parse_result;
481 return 1;
484 int set_short_label_spec(const char *label_spec)
486 spec_cur = spec_ptr = label_spec;
487 spec_end = strchr(label_spec, '\0');
488 literals.clear();
489 if (yyparse())
490 return 0;
491 delete parsed_short_label;
492 parsed_short_label = parse_result;
493 return 1;
496 void yyerror(const char *message)
498 if (spec_cur < spec_end)
499 command_error("label specification %1 before `%2'", message, spec_cur);
500 else
501 command_error("label specification %1 at end of string",
502 message, spec_cur);
505 void at_expr::evaluate(int tentative, const reference &ref,
506 string &result, substring_position &)
508 if (tentative)
509 ref.canonicalize_authors(result);
510 else {
511 const char *end, *start = ref.get_authors(&end);
512 if (start)
513 result.append(start, end - start);
517 void format_expr::evaluate(int tentative, const reference &ref,
518 string &result, substring_position &)
520 if (tentative)
521 return;
522 const label_info *lp = ref.get_label_ptr();
523 int num = lp == 0 ? ref.get_number() : lp->count;
524 if (type != '0')
525 result += format_serial(type, num + 1);
526 else {
527 const char *ptr = i_to_a(num + first_number);
528 int pad = width - strlen(ptr);
529 while (--pad >= 0)
530 result += '0';
531 result += ptr;
535 static const char *format_serial(char c, int n)
537 assert(n > 0);
538 static char buf[128]; // more than enough.
539 switch (c) {
540 case 'i':
541 case 'I':
543 char *p = buf;
544 // troff uses z and w to represent 10000 and 5000 in Roman
545 // numerals; I can find no historical basis for this usage
546 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
547 if (n >= 40000)
548 return i_to_a(n);
549 while (n >= 10000) {
550 *p++ = s[0];
551 n -= 10000;
553 for (int i = 1000; i > 0; i /= 10, s += 2) {
554 int m = n/i;
555 n -= m*i;
556 switch (m) {
557 case 3:
558 *p++ = s[2];
559 /* falls through */
560 case 2:
561 *p++ = s[2];
562 /* falls through */
563 case 1:
564 *p++ = s[2];
565 break;
566 case 4:
567 *p++ = s[2];
568 *p++ = s[1];
569 break;
570 case 8:
571 *p++ = s[1];
572 *p++ = s[2];
573 *p++ = s[2];
574 *p++ = s[2];
575 break;
576 case 7:
577 *p++ = s[1];
578 *p++ = s[2];
579 *p++ = s[2];
580 break;
581 case 6:
582 *p++ = s[1];
583 *p++ = s[2];
584 break;
585 case 5:
586 *p++ = s[1];
587 break;
588 case 9:
589 *p++ = s[2];
590 *p++ = s[0];
593 *p = 0;
594 break;
596 case 'a':
597 case 'A':
599 char *p = buf;
600 // this is derived from troff/reg.c
601 while (n > 0) {
602 int d = n % 26;
603 if (d == 0)
604 d = 26;
605 n -= d;
606 n /= 26;
607 *p++ = c == 'a' ? lowercase_array[d - 1] :
608 uppercase_array[d - 1];
610 *p-- = 0;
611 // Reverse it.
612 char *q = buf;
613 while (q < p) {
614 char temp = *q;
615 *q = *p;
616 *p = temp;
617 --p;
618 ++q;
620 break;
622 default:
623 assert(0);
625 return buf;
628 void field_expr::evaluate(int, const reference &ref,
629 string &result, substring_position &)
631 const char *end;
632 const char *start = ref.get_field(name, &end);
633 if (start) {
634 start = nth_field(number, start, &end);
635 if (start)
636 result.append(start, end - start);
640 void literal_expr::evaluate(int, const reference &,
641 string &result, substring_position &)
643 result += s;
646 analyzed_expr::analyzed_expr(expression *e)
647 : unary_expr(e), flags(e ? e->analyze() : 0)
651 void analyzed_expr::evaluate(int tentative, const reference &ref,
652 string &result, substring_position &pos)
654 if (expr)
655 expr->evaluate(tentative, ref, result, pos);
658 void star_expr::evaluate(int tentative, const reference &ref,
659 string &result, substring_position &pos)
661 const label_info *lp = ref.get_label_ptr();
662 if (!tentative
663 && (lp == 0 || lp->total > 1)
664 && expr)
665 expr->evaluate(tentative, ref, result, pos);
668 void separator_expr::evaluate(int tentative, const reference &ref,
669 string &result, substring_position &pos)
671 int start_length = result.length();
672 int is_first = pos.start < 0;
673 if (expr)
674 expr->evaluate(tentative, ref, result, pos);
675 if (is_first) {
676 pos.start = start_length;
677 pos.length = result.length() - start_length;
681 void map_expr::evaluate(int tentative, const reference &ref,
682 string &result, substring_position &)
684 if (expr) {
685 string temp;
686 substring_position temp_pos;
687 expr->evaluate(tentative, ref, temp, temp_pos);
688 (*func)(temp.contents(), temp.contents() + temp.length(), result);
692 void extractor_expr::evaluate(int tentative, const reference &ref,
693 string &result, substring_position &)
695 if (expr) {
696 string temp;
697 substring_position temp_pos;
698 expr->evaluate(tentative, ref, temp, temp_pos);
699 const char *end, *start = (*func)(temp.contents(),
700 temp.contents() + temp.length(),
701 &end);
702 switch (part) {
703 case BEFORE:
704 if (start)
705 result.append(temp.contents(), start - temp.contents());
706 else
707 result += temp;
708 break;
709 case MATCH:
710 if (start)
711 result.append(start, end - start);
712 break;
713 case AFTER:
714 if (start)
715 result.append(end, temp.contents() + temp.length() - end);
716 break;
717 default:
718 assert(0);
723 static void first_part(int len, const char *ptr, const char *end,
724 string &result)
726 for (;;) {
727 const char *token_start = ptr;
728 if (!get_token(&ptr, end))
729 break;
730 const token_info *ti = lookup_token(token_start, ptr);
731 int counts = ti->sortify_non_empty(token_start, ptr);
732 if (counts && --len < 0)
733 break;
734 if (counts || ti->is_accent())
735 result.append(token_start, ptr - token_start);
739 static void last_part(int len, const char *ptr, const char *end,
740 string &result)
742 const char *start = ptr;
743 int count = 0;
744 for (;;) {
745 const char *token_start = ptr;
746 if (!get_token(&ptr, end))
747 break;
748 const token_info *ti = lookup_token(token_start, ptr);
749 if (ti->sortify_non_empty(token_start, ptr))
750 count++;
752 ptr = start;
753 int skip = count - len;
754 if (skip > 0) {
755 for (;;) {
756 const char *token_start = ptr;
757 if (!get_token(&ptr, end))
758 assert(0);
759 const token_info *ti = lookup_token(token_start, ptr);
760 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
761 ptr = token_start;
762 break;
766 first_part(len, ptr, end, result);
769 void truncate_expr::evaluate(int tentative, const reference &ref,
770 string &result, substring_position &)
772 if (expr) {
773 string temp;
774 substring_position temp_pos;
775 expr->evaluate(tentative, ref, temp, temp_pos);
776 const char *start = temp.contents();
777 const char *end = start + temp.length();
778 if (n > 0)
779 first_part(n, start, end, result);
780 else if (n < 0)
781 last_part(-n, start, end, result);
785 void alternative_expr::evaluate(int tentative, const reference &ref,
786 string &result, substring_position &pos)
788 int start_length = result.length();
789 if (expr1)
790 expr1->evaluate(tentative, ref, result, pos);
791 if (result.length() == start_length && expr2)
792 expr2->evaluate(tentative, ref, result, pos);
795 void list_expr::evaluate(int tentative, const reference &ref,
796 string &result, substring_position &pos)
798 if (expr1)
799 expr1->evaluate(tentative, ref, result, pos);
800 if (expr2)
801 expr2->evaluate(tentative, ref, result, pos);
804 void substitute_expr::evaluate(int tentative, const reference &ref,
805 string &result, substring_position &pos)
807 int start_length = result.length();
808 if (expr1)
809 expr1->evaluate(tentative, ref, result, pos);
810 if (result.length() > start_length && result[result.length() - 1] == '-') {
811 // ought to see if pos covers the -
812 result.set_length(result.length() - 1);
813 if (expr2)
814 expr2->evaluate(tentative, ref, result, pos);
818 void conditional_expr::evaluate(int tentative, const reference &ref,
819 string &result, substring_position &pos)
821 string temp;
822 substring_position temp_pos;
823 if (expr1)
824 expr1->evaluate(tentative, ref, temp, temp_pos);
825 if (temp.length() > 0) {
826 if (expr2)
827 expr2->evaluate(tentative, ref, result, pos);
829 else {
830 if (expr3)
831 expr3->evaluate(tentative, ref, result, pos);
835 void reference::pre_compute_label()
837 if (parsed_label != 0
838 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
839 label.clear();
840 substring_position temp_pos;
841 parsed_label->evaluate(1, *this, label, temp_pos);
842 label_ptr = lookup_label(label);
846 void reference::compute_label()
848 label.clear();
849 if (parsed_label)
850 parsed_label->evaluate(0, *this, label, separator_pos);
851 if (short_label_flag && parsed_short_label)
852 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
853 if (date_as_label) {
854 string new_date;
855 if (parsed_date_label) {
856 substring_position temp_pos;
857 parsed_date_label->evaluate(0, *this, new_date, temp_pos);
859 set_date(new_date);
861 if (label_ptr)
862 label_ptr->count += 1;
865 void reference::immediate_compute_label()
867 if (label_ptr)
868 label_ptr->total = 2; // force use of disambiguator
869 compute_label();
872 int reference::merge_labels(reference **v, int n, label_type type,
873 string &result)
875 if (abbreviate_label_ranges)
876 return merge_labels_by_number(v, n, type, result);
877 else
878 return merge_labels_by_parts(v, n, type, result);
881 int reference::merge_labels_by_number(reference **v, int n, label_type type,
882 string &result)
884 if (n <= 1)
885 return 0;
886 int num = get_number();
887 // Only merge three or more labels.
888 if (v[0]->get_number() != num + 1
889 || v[1]->get_number() != num + 2)
890 return 0;
891 int i;
892 for (i = 2; i < n; i++)
893 if (v[i]->get_number() != num + i + 1)
894 break;
895 result = get_label(type);
896 result += label_range_indicator;
897 result += v[i - 1]->get_label(type);
898 return i;
901 const substring_position &reference::get_separator_pos(label_type type) const
903 if (type == SHORT_LABEL && short_label_flag)
904 return short_separator_pos;
905 else
906 return separator_pos;
909 const string &reference::get_label(label_type type) const
911 if (type == SHORT_LABEL && short_label_flag)
912 return short_label;
913 else
914 return label;
917 int reference::merge_labels_by_parts(reference **v, int n, label_type type,
918 string &result)
920 if (n <= 0)
921 return 0;
922 const string &lb = get_label(type);
923 const substring_position &sp = get_separator_pos(type);
924 if (sp.start < 0
925 || sp.start != v[0]->get_separator_pos(type).start
926 || memcmp(lb.contents(), v[0]->get_label(type).contents(),
927 sp.start) != 0)
928 return 0;
929 result = lb;
930 int i = 0;
931 do {
932 result += separate_label_second_parts;
933 const substring_position &s = v[i]->get_separator_pos(type);
934 int sep_end_pos = s.start + s.length;
935 result.append(v[i]->get_label(type).contents() + sep_end_pos,
936 v[i]->get_label(type).length() - sep_end_pos);
937 } while (++i < n
938 && sp.start == v[i]->get_separator_pos(type).start
939 && memcmp(lb.contents(), v[i]->get_label(type).contents(),
940 sp.start) == 0);
941 return i;
944 string label_pool;
946 label_info::label_info(const string &s)
947 : start(label_pool.length()), length(s.length()), count(0), total(1)
949 label_pool += s;
952 static label_info **label_table = 0;
953 static int label_table_size = 0;
954 static int label_table_used = 0;
956 label_info *lookup_label(const string &label)
958 if (label_table == 0) {
959 label_table = new label_info *[17];
960 label_table_size = 17;
961 for (int i = 0; i < 17; i++)
962 label_table[i] = 0;
964 unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
965 label_info **ptr;
966 for (ptr = label_table + h;
967 *ptr != 0;
968 (ptr == label_table)
969 ? (ptr = label_table + label_table_size - 1)
970 : ptr--)
971 if ((*ptr)->length == label.length()
972 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
973 label.length()) == 0) {
974 (*ptr)->total += 1;
975 return *ptr;
977 label_info *result = *ptr = new label_info(label);
978 if (++label_table_used * 2 > label_table_size) {
979 // Rehash the table.
980 label_info **old_table = label_table;
981 int old_size = label_table_size;
982 label_table_size = next_size(label_table_size);
983 label_table = new label_info *[label_table_size];
984 int i;
985 for (i = 0; i < label_table_size; i++)
986 label_table[i] = 0;
987 for (i = 0; i < old_size; i++)
988 if (old_table[i]) {
989 h = hash_string(label_pool.contents() + old_table[i]->start,
990 old_table[i]->length);
991 label_info **p;
992 for (p = label_table + (h % label_table_size);
993 *p != 0;
994 (p == label_table)
995 ? (p = label_table + label_table_size - 1)
996 : --p)
998 *p = old_table[i];
1000 a_delete old_table;
1002 return result;
1005 void clear_labels()
1007 for (int i = 0; i < label_table_size; i++) {
1008 delete label_table[i];
1009 label_table[i] = 0;
1011 label_table_used = 0;
1012 label_pool.clear();
1015 static void consider_authors(reference **start, reference **end, int i);
1017 void compute_labels(reference **v, int n)
1019 if (parsed_label
1020 && (parsed_label->analyze() & expression::CONTAINS_AT)
1021 && sort_fields.length() >= 2
1022 && sort_fields[0] == 'A'
1023 && sort_fields[1] == '+')
1024 consider_authors(v, v + n, 0);
1025 for (int i = 0; i < n; i++)
1026 v[i]->compute_label();
1030 /* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
1031 where 0 <= i <= N if there exists a reference with a list of authors
1032 <B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
1033 and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
1034 A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
1035 <B0,B1,...,BM>. If a reference needs author i we only have to call
1036 need_author(j) for some j >= i such that the reference also needs
1037 author j. */
1039 /* This function handles 2 tasks:
1040 determine which authors are needed (cannot be elided with et al.);
1041 determine which authors can have only last names in the labels.
1043 References >= start and < end have the same first i author names.
1044 Also they're sorted by A+. */
1046 static void consider_authors(reference **start, reference **end, int i)
1048 if (start >= end)
1049 return;
1050 reference **p = start;
1051 if (i >= (*p)->get_nauthors()) {
1052 for (++p; p < end && i >= (*p)->get_nauthors(); p++)
1054 if (p < end && i > 0) {
1055 // If we have an author list <A B C> and an author list <A B C D>,
1056 // then both lists need C.
1057 for (reference **q = start; q < end; q++)
1058 (*q)->need_author(i - 1);
1060 start = p;
1062 while (p < end) {
1063 reference **last_name_start = p;
1064 reference **name_start = p;
1065 for (++p;
1066 p < end && i < (*p)->get_nauthors()
1067 && same_author_last_name(**last_name_start, **p, i);
1068 p++) {
1069 if (!same_author_name(**name_start, **p, i)) {
1070 consider_authors(name_start, p, i + 1);
1071 name_start = p;
1074 consider_authors(name_start, p, i + 1);
1075 if (last_name_start == name_start) {
1076 for (reference **q = last_name_start; q < p; q++)
1077 (*q)->set_last_name_unambiguous(i);
1079 // If we have an author list <A B C D> and <A B C E>, then the lists
1080 // need author D and E respectively.
1081 if (name_start > start || p < end) {
1082 for (reference **q = last_name_start; q < p; q++)
1083 (*q)->need_author(i);
1088 int same_author_last_name(const reference &r1, const reference &r2, int n)
1090 const char *ae1;
1091 const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
1092 const char *ae2;
1093 const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1094 if (!as1 && !as2) return 1; // they are the same
1095 if (!as1 || !as2) return 0;
1096 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1099 int same_author_name(const reference &r1, const reference &r2, int n)
1101 const char *ae1;
1102 const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
1103 const char *ae2;
1104 const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1105 if (!as1 && !as2) return 1; // they are the same
1106 if (!as1 || !as2) return 0;
1107 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1111 void int_set::set(int i)
1113 assert(i >= 0);
1114 int bytei = i >> 3;
1115 if (bytei >= v.length()) {
1116 int old_length = v.length();
1117 v.set_length(bytei + 1);
1118 for (int j = old_length; j <= bytei; j++)
1119 v[j] = 0;
1121 v[bytei] |= 1 << (i & 7);
1124 int int_set::get(int i) const
1126 assert(i >= 0);
1127 int bytei = i >> 3;
1128 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
1131 void reference::set_last_name_unambiguous(int i)
1133 last_name_unambiguous.set(i);
1136 void reference::need_author(int n)
1138 if (n > last_needed_author)
1139 last_needed_author = n;
1142 const char *reference::get_authors(const char **end) const
1144 if (!computed_authors) {
1145 ((reference *)this)->computed_authors = 1;
1146 string &result = ((reference *)this)->authors;
1147 int na = get_nauthors();
1148 result.clear();
1149 for (int i = 0; i < na; i++) {
1150 if (last_name_unambiguous.get(i)) {
1151 const char *e, *start = get_author_last_name(i, &e);
1152 assert(start != 0);
1153 result.append(start, e - start);
1155 else {
1156 const char *e, *start = get_author(i, &e);
1157 assert(start != 0);
1158 result.append(start, e - start);
1160 if (i == last_needed_author
1161 && et_al.length() > 0
1162 && et_al_min_elide > 0
1163 && last_needed_author + et_al_min_elide < na
1164 && na >= et_al_min_total) {
1165 result += et_al;
1166 break;
1168 if (i < na - 1) {
1169 if (na == 2)
1170 result += join_authors_exactly_two;
1171 else if (i < na - 2)
1172 result += join_authors_default;
1173 else
1174 result += join_authors_last_two;
1178 const char *start = authors.contents();
1179 *end = start + authors.length();
1180 return start;
1183 int reference::get_nauthors() const
1185 if (nauthors < 0) {
1186 const char *dummy;
1187 int na;
1188 for (na = 0; get_author(na, &dummy) != 0; na++)
1190 ((reference *)this)->nauthors = na;
1192 return nauthors;