Removed some obsolete cruft which has been replaced by gnulib
[findutils.git] / find / tree.c
blob9616f0415b1a99222c820b7c27945e1ad67b4e2a
1 /* tree.c -- helper functions to build and evaluate the expression tree.
2 Copyright (C) 1990, 91, 92, 93, 94, 2000, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
17 USA.
20 #include "defs.h"
21 #include "../gnulib/lib/xalloc.h"
23 #include <assert.h>
24 #include <stdlib.h>
26 #if ENABLE_NLS
27 # include <libintl.h>
28 # define _(Text) gettext (Text)
29 #else
30 # define _(Text) Text
31 #endif
32 #ifdef gettext_noop
33 # define N_(String) gettext_noop (String)
34 #else
35 /* See locate.c for explanation as to why not use (String) */
36 # define N_(String) String
37 #endif
41 /* All predicates for each path to process. */
42 static struct predicate *predicates = NULL;
44 /* The root of the evaluation tree. */
45 static struct predicate *eval_tree = NULL;
47 /* The last predicate allocated. */
48 static struct predicate *last_pred = NULL;
51 static struct predicate *scan_rest PARAMS((struct predicate **input,
52 struct predicate *head,
53 short int prev_prec));
54 static void merge_pred PARAMS((struct predicate *beg_list, struct predicate *end_list, struct predicate **last_p));
55 static struct predicate *set_new_parent PARAMS((struct predicate *curr, enum predicate_precedence high_prec, struct predicate **prevp));
56 static const char *cost_name PARAMS((enum EvaluationCost cost));
59 /* Return a pointer to a tree that represents the
60 expression prior to non-unary operator *INPUT.
61 Set *INPUT to point at the next input predicate node.
63 Only accepts the following:
65 <primary>
66 expression [operators of higher precedence]
67 <uni_op><primary>
68 (arbitrary expression)
69 <uni_op>(arbitrary expression)
71 In other words, you can not start out with a bi_op or close_paren.
73 If the following operator (if any) is of a higher precedence than
74 PREV_PREC, the expression just nabbed is part of a following
75 expression, which really is the expression that should be handed to
76 our caller, so get_expr recurses. */
78 struct predicate *
79 get_expr (struct predicate **input,
80 short int prev_prec,
81 const struct predicate* prev_pred)
83 struct predicate *next = NULL;
84 struct predicate *this_pred = (*input);
86 if (*input == NULL)
87 error (1, 0, _("invalid expression"));
89 switch ((*input)->p_type)
91 case NO_TYPE:
92 error (1, 0, _("invalid expression"));
93 break;
95 case BI_OP:
96 /* e.g. "find . -a" */
97 error (1, 0, _("invalid expression; you have used a binary operator '%s' with nothing before it."), this_pred->p_name);
98 break;
100 case CLOSE_PAREN:
101 if ((UNI_OP == prev_pred->p_type
102 || BI_OP == prev_pred->p_type)
103 && !this_pred->artificial)
105 /* e.g. "find \( -not \)" or "find \( -true -a \" */
106 error(1, 0, _("expected an expression between '%s' and ')'"),
107 prev_pred->p_name);
109 else if ( (*input)->artificial )
111 /* We have reached the end of the user-supplied predicates
112 * unexpectedly.
114 /* e.g. "find . -true -a" */
115 error (1, 0, _("expected an expression after '%s'"), prev_pred->p_name);
117 else
119 error (1, 0, _("invalid expression; you have too many ')'"));
121 break;
123 case PRIMARY_TYPE:
124 next = *input;
125 *input = (*input)->pred_next;
126 break;
128 case UNI_OP:
129 next = *input;
130 *input = (*input)->pred_next;
131 next->pred_right = get_expr (input, NEGATE_PREC, next);
132 break;
134 case OPEN_PAREN:
135 if ( (NULL == (*input)->pred_next) || (*input)->pred_next->artificial )
137 /* user typed something like "find . (", and so the ) we are
138 * looking at is from the artificial "( ) -print" that we
139 * add.
141 error (1, 0, _("invalid expression; expected to find a ')' but didn't see one. Perhaps you need an extra predicate after '%s'"), this_pred->p_name);
143 prev_pred = (*input);
144 *input = (*input)->pred_next;
145 if ( (*input)->p_type == CLOSE_PAREN )
147 error (1, 0, _("invalid expression; empty parentheses are not allowed."));
149 next = get_expr (input, NO_PREC, prev_pred);
150 if ((*input == NULL)
151 || ((*input)->p_type != CLOSE_PAREN))
152 error (1, 0, _("invalid expression; I was expecting to find a ')' somewhere but did not see one."));
153 *input = (*input)->pred_next; /* move over close */
154 break;
156 default:
157 error (1, 0, _("oops -- invalid expression type!"));
158 break;
161 /* We now have the first expression and are positioned to check
162 out the next operator. If NULL, all done. Otherwise, if
163 PREV_PREC < the current node precedence, we must continue;
164 the expression we just nabbed is more tightly bound to the
165 following expression than to the previous one. */
166 if (*input == NULL)
167 return (next);
168 if ((int) (*input)->p_prec > (int) prev_prec)
170 next = scan_rest (input, next, prev_prec);
171 if (next == NULL)
172 error (1, 0, _("invalid expression"));
174 return (next);
177 /* Scan across the remainder of a predicate input list starting
178 at *INPUT, building the rest of the expression tree to return.
179 Stop at the first close parenthesis or the end of the input list.
180 Assumes that get_expr has been called to nab the first element
181 of the expression tree.
183 *INPUT points to the current input predicate list element.
184 It is updated as we move along the list to point to the
185 terminating input element.
186 HEAD points to the predicate element that was obtained
187 by the call to get_expr.
188 PREV_PREC is the precedence of the previous predicate element. */
190 static struct predicate *
191 scan_rest (struct predicate **input,
192 struct predicate *head,
193 short int prev_prec)
195 struct predicate *tree; /* The new tree we are building. */
197 if ((*input == NULL) || ((*input)->p_type == CLOSE_PAREN))
198 return (NULL);
199 tree = head;
200 while ((*input != NULL) && ((int) (*input)->p_prec > (int) prev_prec))
202 switch ((*input)->p_type)
204 case NO_TYPE:
205 case PRIMARY_TYPE:
206 case UNI_OP:
207 case OPEN_PAREN:
208 /* I'm not sure how we get here, so it is not obvious what
209 * sort of mistakes might give rise to this condition.
211 error (1, 0, _("invalid expression"));
212 break;
214 case BI_OP:
216 struct predicate *prev = (*input);
217 (*input)->pred_left = tree;
218 tree = *input;
219 *input = (*input)->pred_next;
220 tree->pred_right = get_expr (input, tree->p_prec, prev);
221 break;
224 case CLOSE_PAREN:
225 return tree;
227 default:
228 error (1, 0,
229 _("oops -- invalid expression type (%d)!"),
230 (int)(*input)->p_type);
231 break;
234 return tree;
237 /* Returns true if the specified predicate is reorderable. */
238 static boolean
239 predicate_is_cost_free(const struct predicate *p)
241 if (pred_is(p, pred_name) ||
242 pred_is(p, pred_path) ||
243 pred_is(p, pred_iname) ||
244 pred_is(p, pred_ipath))
246 /* Traditionally (at least 4.1.7 through 4.2.x) GNU find always
247 * optimised these cases.
249 return true;
251 else if (options.optimisation_level > 0)
253 if (pred_is(p, pred_and) ||
254 pred_is(p, pred_negate) ||
255 pred_is(p, pred_comma) ||
256 pred_is(p, pred_or))
257 return false;
258 else
259 return NeedsNothing == p->p_cost;
261 else
263 return false;
267 /* Prints a predicate */
268 void print_predicate(FILE *fp, const struct predicate *p)
270 fprintf (fp, "%s%s%s",
271 p->p_name,
272 p->arg_text ? " " : "",
273 p->arg_text ? p->arg_text : "");
277 struct predlist
279 struct predicate *head;
280 struct predicate *tail;
283 static void
284 predlist_init(struct predlist *p)
286 p->head = p->tail = NULL;
289 static void
290 predlist_insert(struct predlist *list,
291 struct predicate *curr,
292 struct predicate **pprev)
294 struct predicate **insertpos = &(list->head);
296 *pprev = curr->pred_left;
297 if (options.optimisation_level > 2)
299 /* Insert the new node in the list after any other entries which
300 * are more selective.
302 if (0)
303 while ( (*insertpos) && ((*insertpos)->est_success_rate < curr->est_success_rate) )
305 insertpos = &((*insertpos)->pred_left);
308 curr->pred_left = (*insertpos);
309 (*insertpos) = curr;
310 if (NULL == list->tail)
311 list->tail = list->head;
314 static int
315 pred_cost_compare(const struct predicate *p1, const struct predicate *p2, boolean wantfailure)
317 if (p1->p_cost == p2->p_cost)
319 if (p1->est_success_rate == p2->est_success_rate)
320 return 0;
321 else if (wantfailure)
322 return p1->est_success_rate < p2->est_success_rate ? -1 : 1;
323 else
324 return p1->est_success_rate < p2->est_success_rate ? 1 : -1;
326 else
328 return p1->p_cost < p2->p_cost ? -1 : 1;
333 static void
334 predlist_merge_sort(struct predlist *list,
335 struct predicate **last)
337 struct predlist new_list;
338 struct predicate *p, *q;
340 if (NULL == list->head)
341 return; /* nothing to do */
343 if (options.debug_options & DebugTreeOpt)
345 fprintf(stderr, "%s:\n", "predlist before merge sort");
346 print_tree(stderr, list->head, 2);
349 calculate_derived_rates(list->head);
350 predlist_init(&new_list);
351 while (list->head)
353 /* remove head of source list */
354 q = list->head;
355 list->head = list->head->pred_left;
356 q->pred_left = NULL;
358 /* insert it into the new list */
359 for (p=new_list.head; p; p=p->pred_left)
361 /* If these operations are OR operations, we want to get a
362 * successful test as soon as possible, to take advantage of
363 * the short-circuit evaluation. If they're AND, we want to
364 * get an unsuccessful result early for the same reason.
365 * Therefore we invert the sense of the comparison for the
366 * OR case. We only want to invert the sense of the success
367 * rate comparison, not the operation cost comparison. Hence we
368 * pass a flag into pred_cost_compare().
370 boolean wantfailure = (OR_PREC != p->p_prec);
371 if (pred_cost_compare(p->pred_right, q->pred_right, wantfailure) >= 0)
372 break;
374 if (p)
376 /* insert into existing list */
377 q->pred_left = p->pred_left;
378 if (NULL == q->pred_left)
379 new_list.tail = q;
380 p->pred_left = q;
382 else
384 q->pred_left = new_list.head; /* prepend */
385 new_list.head = q;
386 if (NULL == new_list.tail)
387 new_list.tail = q; /* first item in new list */
390 if (options.debug_options & DebugTreeOpt)
392 fprintf(stderr, "%s:\n", "predlist after merge sort");
393 print_tree(stderr, new_list.head, 2);
396 calculate_derived_rates(new_list.head);
397 merge_pred(new_list.head, new_list.tail, last);
398 predlist_init(list);
401 static void
402 merge_lists(struct predlist lists[], int nlists,
403 struct predlist *name_list,
404 struct predlist *regex_list,
405 struct predicate **last)
407 int i;
408 static void (*mergefn)(struct predlist *, struct predicate**);
410 mergefn = predlist_merge_sort;
412 mergefn(name_list, last);
413 mergefn(regex_list, last);
415 for (i=0; i<nlists; i++)
416 mergefn(&lists[i], last);
421 static boolean
422 subtree_has_side_effects(const struct predicate *p)
424 if (p)
426 return p->side_effects
427 || subtree_has_side_effects(p->pred_left)
428 || subtree_has_side_effects(p->pred_right);
430 else
433 return false;
437 static int
438 worst_cost (const struct predicate *p)
440 if (p)
442 unsigned int cost_r, cost_l, worst;
443 cost_l = worst_cost(p->pred_left);
444 cost_r = worst_cost(p->pred_right);
445 worst = (cost_l > cost_r) ? cost_l : cost_r;
446 if (worst < p->p_cost)
447 worst = p->p_cost;
448 return worst;
450 else
452 return 0;
458 static void
459 perform_arm_swap(struct predicate *p)
461 struct predicate *tmp = p->pred_left->pred_right;
462 p->pred_left->pred_right = p->pred_right;
463 p->pred_right = tmp;
466 /* Consider swapping p->pred_left->pred_right with p->pred_right,
467 * if that yields a faster evaluation. Normally the left predicate is
468 * evaluated first.
470 * If the operation is an OR, we want the left predicate to be the one that
471 * succeeds most often. If it is an AND, we want it to be the predicate that
472 * fails most often.
474 * We don't consider swapping arms of an operator where their cost is
475 * different or where they have side effects.
477 * A viable test case for this is
478 * ./find -D opt -O3 . \! -type f -o -type d
479 * Here, the ! -type f should be evaluated first,
480 * as we assume that 95% of inodes are vanilla files.
482 static boolean
483 consider_arm_swap(struct predicate *p)
485 int left_cost, right_cost;
486 const char *reason = NULL;
487 struct predicate **pl, **pr;
489 if (BI_OP != p->p_type)
490 reason = "Not a binary operation";
492 if (!reason)
494 if (NULL == p->pred_left || NULL == p->pred_right)
495 reason = "Doesn't have two arms";
499 if (!reason)
501 if (NULL == p->pred_left->pred_right)
502 reason = "Left arm has no child on RHS";
504 pr = &p->pred_right;
505 pl = &p->pred_left->pred_right;
507 if (!reason)
509 if (subtree_has_side_effects(*pl))
510 reason = "Left subtree has side-effects";
512 if (!reason)
514 if (subtree_has_side_effects(*pr))
515 reason = "Right subtree has side-effects";
518 if (!reason)
520 left_cost = worst_cost(*pl);
521 right_cost = worst_cost(*pr);
523 if (left_cost < right_cost)
525 reason = "efficient as-is";
528 if (!reason)
530 boolean want_swap;
532 if (left_cost == right_cost)
534 /* it's a candidate */
535 float succ_rate_l = (*pl)->est_success_rate;
536 float succ_rate_r = (*pr)->est_success_rate;
538 if (options.debug_options & DebugTreeOpt)
540 fprintf(stderr, "Success rates: l=%f, r=%f\n", succ_rate_l, succ_rate_r);
543 if (pred_is(p, pred_or))
545 want_swap = succ_rate_r < succ_rate_l;
546 if (!want_swap)
547 reason = "Operation is OR and right success rate >= left";
549 else if (pred_is(p, pred_and))
551 want_swap = succ_rate_r > succ_rate_l;
552 if (!want_swap)
553 reason = "Operation is AND and right success rate <= left";
555 else
557 want_swap = false;
558 reason = "Not AND or OR";
561 else
563 want_swap = true;
566 if (want_swap)
568 if (options.debug_options & DebugTreeOpt)
570 fprintf(stderr, "Performing arm swap on:\n");
571 print_tree (stderr, p, 0);
573 perform_arm_swap(p);
574 return true;
579 if (options.debug_options & DebugTreeOpt)
581 fprintf(stderr, "Not an arm swap candidate (%s):\n", reason);
582 print_tree (stderr, p, 0);
584 return false;
587 static boolean
588 do_arm_swaps(struct predicate *p)
590 if (p)
592 boolean swapped;
595 swapped = false;
596 if (consider_arm_swap(p)
597 || do_arm_swaps(p->pred_left)
598 || do_arm_swaps(p->pred_right))
600 swapped = true;
602 } while (swapped);
603 return swapped;
605 else
607 return false;
613 /* Optimize the ordering of the predicates in the tree. Rearrange
614 them to minimize work. Strategies:
615 * Evaluate predicates that don't need inode information first;
616 the predicates are divided into 1 or more groups separated by
617 predicates (if any) which have "side effects", such as printing.
618 The grouping implements the partial ordering on predicates which
619 those with side effects impose.
621 * Place -name, -iname, -path, -ipath, -regex and -iregex at the front
622 of a group, with -name, -iname, -path and -ipath ahead of
623 -regex and -iregex. Predicates which are moved to the front
624 of a group by definition do not have side effects. Both
625 -regex and -iregex both use pred_regex.
627 If higher optimisation levels have been selected, reordering also
628 occurs according to the p_cost member of each predicate (which
629 reflects the performance cost of the test). The ordering also
630 bears in mind whether these operations are more likely to succeed
631 or fail. When evauating a chain of OR conditions, we prefer
632 tests likely to succeed at the front of the list. For AND, we
633 prefer tests likely to fail at the front of the list.
635 This routine "normalizes" the predicate tree by ensuring that
636 all expression predicates have AND (or OR or COMMA) parent nodes
637 which are linked along the left edge of the expression tree.
638 This makes manipulation of subtrees easier.
640 EVAL_TREEP points to the root pointer of the predicate tree
641 to be rearranged. opt_expr may return a new root pointer there.
642 Return true if the tree contains side effects, false if not. */
644 static boolean
645 opt_expr (struct predicate **eval_treep)
647 struct predlist regex_list={NULL,NULL}, name_list={NULL,NULL};
648 struct predlist cbo_list[NumEvaluationCosts];
649 int i;
650 struct predicate *curr;
651 struct predicate **prevp; /* Address of `curr' node. */
652 struct predicate **last_sidep; /* Last predicate with side effects. */
653 PRED_FUNC pred_func;
654 enum predicate_type p_type;
655 boolean has_side_effects = false; /* Return value. */
656 enum predicate_precedence prev_prec, /* precedence of last BI_OP in branch */
657 biop_prec; /* topmost BI_OP precedence in branch */
659 if (eval_treep == NULL || *eval_treep == NULL)
660 return (false);
662 for (i=0; i<NumEvaluationCosts; i++)
663 predlist_init(&cbo_list[i]);
665 /* Set up to normalize tree as a left-linked list of ANDs or ORs.
666 Set `curr' to the leftmost node, `prevp' to its address, and
667 `pred_func' to the predicate type of its parent. */
668 prevp = eval_treep;
669 prev_prec = AND_PREC;
670 curr = *prevp;
671 while (curr->pred_left != NULL)
673 prevp = &curr->pred_left;
674 prev_prec = curr->p_prec; /* must be a BI_OP */
675 curr = curr->pred_left;
678 /* Link in the appropriate BI_OP for the last expression, if needed. */
679 if (curr->p_type != BI_OP)
680 set_new_parent (curr, prev_prec, prevp);
682 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
684 /* Normalized tree. */
685 fprintf (stderr, "Normalized Eval Tree:\n");
686 print_tree (stderr, *eval_treep, 0);
689 /* Rearrange the predicates. */
690 prevp = eval_treep;
691 biop_prec = NO_PREC; /* not COMMA_PREC */
692 if ((*prevp) && (*prevp)->p_type == BI_OP)
693 biop_prec = (*prevp)->p_prec;
694 while ((curr = *prevp) != NULL)
696 /* If there is a BI_OP of different precedence from the first
697 in the pred_left chain, create a new parent of the
698 original precedence, link the new parent to the left of the
699 previous and link CURR to the right of the new parent.
700 This preserves the precedence of expressions in the tree
701 in case we rearrange them. */
702 if (curr->p_type == BI_OP)
704 if (curr->p_prec != biop_prec)
705 curr = set_new_parent(curr, biop_prec, prevp);
708 /* See which predicate type we have. */
709 p_type = curr->pred_right->p_type;
710 pred_func = curr->pred_right->pred_func;
713 switch (p_type)
715 case NO_TYPE:
716 case PRIMARY_TYPE:
717 /* Don't rearrange the arguments of the comma operator, it is
718 not commutative. */
719 if (biop_prec == COMMA_PREC)
720 break;
722 /* If this predicate has no side effects, consider reordering it. */
723 if (!curr->pred_right->side_effects)
725 boolean reorder;
727 /* If it's one of our special primaries, move it to the
728 front of the list for that primary. */
729 if (predicate_is_cost_free(curr->pred_right))
731 if (options.debug_options & DebugTreeOpt)
733 fprintf(stderr, "-O%d: promoting cheap predicate ",
734 (int)options.optimisation_level);
735 print_predicate(stderr, curr->pred_right);
736 fprintf(stderr, " into name_list\n");
738 predlist_insert(&name_list, curr, prevp);
739 continue;
742 if (pred_func == pred_regex)
744 predlist_insert(&regex_list, curr, prevp);
745 continue;
748 reorder = ((options.optimisation_level > 1)
749 && (NeedsType == curr->pred_right->p_cost)
750 && !curr->pred_right->need_stat) ||
751 (options.optimisation_level > 2);
753 if (reorder)
755 if (options.debug_options & DebugTreeOpt)
757 fprintf(stderr, "-O%d: categorising predicate ",
758 (int)options.optimisation_level);
759 print_predicate(stderr, curr->pred_right);
760 fprintf(stderr, " by cost (%s)\n",
761 cost_name(curr->pred_right->p_cost));
763 predlist_insert(&cbo_list[curr->pred_right->p_cost], curr, prevp);
764 continue;
768 break;
770 case UNI_OP:
771 /* For NOT, check the expression trees below the NOT. */
772 curr->pred_right->side_effects
773 = opt_expr (&curr->pred_right->pred_right);
774 break;
776 case BI_OP:
777 /* For nested AND or OR, recurse (AND/OR form layers on the left of
778 the tree), and continue scanning this level of AND or OR. */
779 curr->pred_right->side_effects = opt_expr (&curr->pred_right);
780 break;
782 /* At this point, get_expr and scan_rest have already removed
783 all of the user's parentheses. */
785 default:
786 error (1, 0, _("oops -- invalid expression type!"));
787 break;
790 if (curr->pred_right->side_effects == true)
792 last_sidep = prevp;
794 /* Incorporate lists and reset list pointers for this group. */
795 merge_lists(cbo_list, NumEvaluationCosts, &name_list, &regex_list, last_sidep);
796 has_side_effects = true;
799 prevp = &curr->pred_left;
802 /* Do final list merges. */
803 last_sidep = prevp;
804 merge_lists(cbo_list, NumEvaluationCosts, &name_list, &regex_list, last_sidep);
805 return has_side_effects;
808 static float
809 constrain_rate(float rate)
811 if (rate > 1.0f)
812 return 1.0;
813 else if (rate < 0.0)
814 return 0.0;
815 else
816 return rate;
819 /* Link in a new parent BI_OP node for CURR, at *PREVP, with precedence
820 HIGH_PREC. */
822 static struct predicate *
823 set_new_parent (struct predicate *curr, enum predicate_precedence high_prec, struct predicate **prevp)
825 struct predicate *new_parent;
827 new_parent = (struct predicate *) xmalloc (sizeof (struct predicate));
828 new_parent->p_type = BI_OP;
829 new_parent->p_prec = high_prec;
830 new_parent->need_stat = false;
831 new_parent->need_type = false;
832 new_parent->p_cost = NeedsNothing;
834 switch (high_prec)
836 case COMMA_PREC:
837 new_parent->pred_func = pred_comma;
838 new_parent->p_name = ",";
839 new_parent->est_success_rate = 1.0;
840 break;
841 case OR_PREC:
842 new_parent->pred_func = pred_or;
843 new_parent->p_name = "-o";
844 new_parent->est_success_rate = constrain_rate(curr->est_success_rate);
845 break;
846 case AND_PREC:
847 new_parent->pred_func = pred_and;
848 new_parent->p_name = "-a";
849 new_parent->est_success_rate = constrain_rate(curr->est_success_rate);
850 break;
851 default:
852 ; /* empty */
855 new_parent->side_effects = false;
856 new_parent->no_default_print = false;
857 new_parent->args.str = NULL;
858 new_parent->pred_next = NULL;
860 /* Link in new_parent.
861 Pushes rest of left branch down 1 level to new_parent->pred_right. */
862 new_parent->pred_left = NULL;
863 new_parent->pred_right = curr;
864 *prevp = new_parent;
866 return new_parent;
869 /* Merge the predicate list that starts at BEG_LIST and ends at END_LIST
870 into the tree at LAST_P. */
872 static void
873 merge_pred (struct predicate *beg_list, struct predicate *end_list, struct predicate **last_p)
875 end_list->pred_left = *last_p;
876 *last_p = beg_list;
879 /* Find the first node in expression tree TREE that requires
880 a stat call and mark the operator above it as needing a stat
881 before calling the node. Since the expression precedences
882 are represented in the tree, some preds that need stat may not
883 get executed (because the expression value is determined earlier.)
884 So every expression needing stat must be marked as such, not just
885 the earliest, to be sure to obtain the stat. This still guarantees
886 that a stat is made as late as possible. Return true if the top node
887 in TREE requires a stat, false if not. */
890 struct pred_cost_lookup
892 PRED_FUNC fn;
893 enum EvaluationCost cost;
895 static struct pred_cost_lookup costlookup[] =
897 { pred_amin , NeedsStatInfo },
898 { pred_and , NeedsNothing, },
899 { pred_anewer , NeedsStatInfo, },
900 { pred_atime , NeedsStatInfo, },
901 { pred_closeparen, NeedsNothing },
902 { pred_cmin , NeedsStatInfo, },
903 { pred_cnewer , NeedsStatInfo, },
904 { pred_comma , NeedsNothing, },
905 { pred_ctime , NeedsStatInfo, },
906 { pred_delete , NeedsSyncDiskHit },
907 { pred_empty , NeedsStatInfo },
908 { pred_exec , NeedsEventualExec },
909 { pred_execdir , NeedsEventualExec },
910 { pred_executable, NeedsAccessInfo },
911 { pred_false , NeedsNothing },
912 { pred_fprint , NeedsNothing },
913 { pred_fprint0 , NeedsNothing },
914 { pred_fprintf , NeedsNothing },
915 { pred_fstype , NeedsStatInfo }, /* true for amortised cost */
916 { pred_gid , NeedsStatInfo },
917 { pred_group , NeedsStatInfo },
918 { pred_ilname , NeedsLinkName },
919 { pred_iname , NeedsNothing },
920 { pred_inum , NeedsStatInfo },
921 { pred_ipath , NeedsNothing },
922 { pred_links , NeedsStatInfo },
923 { pred_lname , NeedsLinkName },
924 { pred_ls , NeedsStatInfo },
925 { pred_mmin , NeedsStatInfo },
926 { pred_mtime , NeedsStatInfo },
927 { pred_name , NeedsNothing },
928 { pred_negate , NeedsNothing, },
929 { pred_newer , NeedsStatInfo, },
930 { pred_newerXY , NeedsStatInfo, },
931 { pred_nogroup , NeedsStatInfo }, /* true for amortised cost if caching is on */
932 { pred_nouser , NeedsStatInfo }, /* true for amortised cost if caching is on */
933 { pred_ok , NeedsUserInteraction },
934 { pred_okdir , NeedsUserInteraction },
935 { pred_openparen , NeedsNothing },
936 { pred_or , NeedsNothing, },
937 { pred_path , NeedsNothing },
938 { pred_perm , NeedsStatInfo },
939 { pred_print , NeedsNothing },
940 { pred_print0 , NeedsNothing },
941 { pred_prune , NeedsNothing },
942 { pred_quit , NeedsNothing },
943 { pred_readable , NeedsAccessInfo },
944 { pred_regex , NeedsNothing },
945 { pred_samefile , NeedsStatInfo },
946 { pred_size , NeedsStatInfo },
947 { pred_true , NeedsNothing },
948 { pred_type , NeedsType },
949 { pred_uid , NeedsStatInfo },
950 { pred_used , NeedsStatInfo },
951 { pred_user , NeedsStatInfo },
952 { pred_writable , NeedsAccessInfo },
953 { pred_xtype , NeedsType } /* roughly correct unless most files are symlinks */
955 static int pred_table_sorted = 0;
957 static boolean
958 check_sorted(void *base, size_t members, size_t membersize,
959 int (*cmpfn)(const void*, const void*))
961 const char *p = base;
962 size_t i;
963 for (i=1u; i<members; ++i)
965 int result = cmpfn(p+i*membersize, p+(i-1)*membersize);
966 if (result < 0)
967 return false;
968 result = cmpfn(p+(i-1)*membersize, p+i*membersize);
969 assert(result <= 0);
971 for (i=1u; i<members; ++i)
973 const struct pred_cost_lookup *pl1 = (const struct pred_cost_lookup*)(p+(i-1)*membersize);
974 const struct pred_cost_lookup *pl2 = (const struct pred_cost_lookup*)(p+(i-0)*membersize);
975 assert(pl1->fn <= pl2->fn);
977 return true;
981 static int
982 cost_table_comparison(const void *p1, const void *p2)
984 const struct pred_cost_lookup *pc1 = p1;
985 const struct pred_cost_lookup *pc2 = p2;
988 if (pc1->fn == pc2->fn)
989 return 0;
990 else if (pc1->fn > pc2->fn)
991 return 1;
992 else
993 return -1;
996 static enum EvaluationCost
997 get_pred_cost(const struct predicate *p)
999 enum EvaluationCost data_requirement_cost = NeedsNothing;
1000 enum EvaluationCost inherent_cost = NeedsUnknown;
1002 if (p->need_stat)
1004 data_requirement_cost = NeedsStatInfo;
1006 else if (p->need_type)
1008 data_requirement_cost = NeedsType;
1010 else
1012 data_requirement_cost = NeedsNothing;
1015 if (pred_is(p, pred_exec) || pred_is(p, pred_execdir))
1017 if (p->args.exec_vec.multiple)
1018 inherent_cost = NeedsEventualExec;
1019 else
1020 inherent_cost = NeedsImmediateExec;
1022 else if (pred_is(p, pred_fprintf))
1024 /* the parser calculated the cost for us. */
1025 inherent_cost = p->p_cost;
1027 else
1029 struct pred_cost_lookup key;
1030 void *entry;
1032 if (!pred_table_sorted)
1034 qsort(costlookup,
1035 sizeof(costlookup)/sizeof(costlookup[0]),
1036 sizeof(costlookup[0]),
1037 cost_table_comparison);
1039 if (!check_sorted(costlookup,
1040 sizeof(costlookup)/sizeof(costlookup[0]),
1041 sizeof(costlookup[0]),
1042 cost_table_comparison))
1044 error(1, 0, "Failed to sort the costlookup array.");
1046 pred_table_sorted = 1;
1048 key.fn = p->pred_func;
1049 entry = bsearch(&key, costlookup,
1050 sizeof(costlookup)/sizeof(costlookup[0]),
1051 sizeof(costlookup[0]),
1052 cost_table_comparison);
1053 if (entry)
1055 inherent_cost = ((const struct pred_cost_lookup*)entry)->cost;
1057 else
1059 error(0, 0, "warning: no cost entry for predicate %s", p->p_name);
1060 inherent_cost = NeedsUnknown;
1064 if (inherent_cost > data_requirement_cost)
1065 return inherent_cost;
1066 else
1067 return data_requirement_cost;
1070 static void
1071 estimate_costs (struct predicate *tree)
1073 if (tree)
1075 estimate_costs(tree->pred_right);
1076 estimate_costs(tree->pred_left);
1078 tree->p_cost = get_pred_cost(tree);
1082 struct predicate*
1083 get_eval_tree(void)
1085 return eval_tree;
1088 static float
1089 getrate(const struct predicate *p)
1091 if (p)
1092 return p->est_success_rate;
1093 else
1094 return 1.0f;
1098 float
1099 calculate_derived_rates(struct predicate *p)
1101 assert(NULL != p);
1103 if (p->pred_right)
1104 calculate_derived_rates(p->pred_right);
1105 if (p->pred_left)
1106 calculate_derived_rates(p->pred_left);
1108 assert(p->p_type != CLOSE_PAREN);
1109 assert(p->p_type != OPEN_PAREN);
1111 switch (p->p_type)
1113 case NO_TYPE:
1114 assert(NULL == p->pred_right);
1115 assert(NULL == p->pred_left);
1116 return p->est_success_rate;
1118 case PRIMARY_TYPE:
1119 assert(NULL == p->pred_right);
1120 assert(NULL == p->pred_left);
1121 return p->est_success_rate;
1123 case UNI_OP:
1124 /* Unary operators must have exactly one operand */
1125 assert(pred_is(p, pred_negate));
1126 assert(NULL == p->pred_left);
1127 p->est_success_rate = (1.0 - p->pred_right->est_success_rate);
1128 return p->est_success_rate;
1130 case BI_OP:
1132 float rate;
1133 /* Binary operators must have two operands */
1134 if (pred_is(p, pred_and))
1136 rate = getrate(p->pred_right) * getrate(p->pred_left);
1138 else if (pred_is(p, pred_comma))
1140 rate = 1.0f;
1142 else if (pred_is(p, pred_or))
1144 rate = getrate(p->pred_right) + getrate(p->pred_left);
1146 else
1148 /* only and, or and comma are BI_OP. */
1149 assert(0);
1150 rate = 0.0f;
1152 p->est_success_rate = constrain_rate(rate);
1154 return p->est_success_rate;
1156 case OPEN_PAREN:
1157 case CLOSE_PAREN:
1158 p->est_success_rate = 1.0;
1159 return p->est_success_rate;
1163 /* opt_expr() rearranges predicates such that each left subtree is
1164 * rooted at a logical predicate (e.g. and or or). check_normalization()
1165 * asserts that this property still holds.
1168 static void check_normalization(struct predicate *p, boolean at_root)
1170 if (at_root)
1172 assert(BI_OP == p->p_type);
1175 if (p->pred_left)
1177 assert(BI_OP == p->pred_left->p_type);
1178 check_normalization(p->pred_left, false);
1180 if (p->pred_right)
1182 check_normalization(p->pred_right, false);
1186 struct predicate*
1187 build_expression_tree(int argc, char *argv[], int end_of_leading_options)
1189 const struct parser_table *parse_entry; /* Pointer to the parsing table entry for this expression. */
1190 char *predicate_name; /* Name of predicate being parsed. */
1191 struct predicate *cur_pred;
1192 const struct parser_table *entry_close, *entry_print, *entry_open;
1193 int i, oldi;
1195 predicates = NULL;
1197 /* Find where in ARGV the predicates begin by skipping the list of
1198 * start points.
1200 for (i = end_of_leading_options; i < argc && !looks_like_expression(argv[i], true); i++)
1202 /* Do nothing. */ ;
1205 /* Enclose the expression in `( ... )' so a default -print will
1206 apply to the whole expression. */
1207 entry_open = find_parser("(");
1208 entry_close = find_parser(")");
1209 entry_print = find_parser("print");
1210 assert(entry_open != NULL);
1211 assert(entry_close != NULL);
1212 assert(entry_print != NULL);
1214 parse_openparen (entry_open, argv, &argc);
1215 last_pred->p_name = "(";
1216 predicates->artificial = true;
1217 parse_begin_user_args(argv, argc, last_pred, predicates);
1218 pred_sanity_check(last_pred);
1220 /* Build the input order list. */
1221 while (i < argc )
1223 if (!looks_like_expression(argv[i], false))
1225 error (0, 0, _("paths must precede expression: %s"), argv[i]);
1226 usage(stderr, 1, NULL);
1229 predicate_name = argv[i];
1230 parse_entry = find_parser (predicate_name);
1231 if (parse_entry == NULL)
1233 /* Command line option not recognized */
1234 error (1, 0, _("unknown predicate `%s'"), predicate_name);
1237 /* We have recognised a test of the form -foo. Eat that,
1238 * unless it is a predicate like -newerXY.
1240 if (parse_entry->type != ARG_SPECIAL_PARSE)
1242 i++;
1244 oldi = i;
1245 if (!(*(parse_entry->parser_func)) (parse_entry, argv, &i))
1247 if (argv[i])
1249 if ( (ARG_SPECIAL_PARSE == parse_entry->type) && (i == oldi) )
1251 /* The special parse function spat out the
1252 * predicate. It must be invalid, or not tasty.
1254 error (1, 0, _("invalid predicate `%s'"),
1255 argv[i], predicate_name);
1257 else
1259 error (1, 0, _("invalid argument `%s' to `%s'"),
1260 argv[i], predicate_name);
1263 else
1265 /* Command line option requires an argument */
1266 error (1, 0, _("missing argument to `%s'"), predicate_name);
1269 else
1271 last_pred->p_name = predicate_name;
1273 /* If the parser consumed an argument, save it. */
1274 if (i != oldi)
1275 last_pred->arg_text = argv[oldi];
1276 else
1277 last_pred->arg_text = NULL;
1279 pred_sanity_check(last_pred);
1280 pred_sanity_check(predicates); /* XXX: expensive */
1282 parse_end_user_args(argv, argc, last_pred, predicates);
1283 if (predicates->pred_next == NULL)
1285 /* No predicates that do something other than set a global variable
1286 were given; remove the unneeded initial `(' and add `-print'. */
1287 cur_pred = predicates;
1288 predicates = last_pred = predicates->pred_next;
1289 free ((char *) cur_pred);
1290 parse_print (entry_print, argv, &argc);
1291 last_pred->p_name = "-print";
1292 pred_sanity_check(last_pred);
1293 pred_sanity_check(predicates); /* XXX: expensive */
1295 else if (!default_prints (predicates->pred_next))
1297 /* One or more predicates that produce output were given;
1298 remove the unneeded initial `('. */
1299 cur_pred = predicates;
1300 predicates = predicates->pred_next;
1301 pred_sanity_check(predicates); /* XXX: expensive */
1302 free ((char *) cur_pred);
1304 else
1306 /* `( user-supplied-expression ) -print'. */
1307 parse_closeparen (entry_close, argv, &argc);
1308 last_pred->p_name = ")";
1309 last_pred->artificial = true;
1310 pred_sanity_check(last_pred);
1311 parse_print (entry_print, argv, &argc);
1312 last_pred->p_name = "-print";
1313 last_pred->artificial = true;
1314 pred_sanity_check(last_pred);
1315 pred_sanity_check(predicates); /* XXX: expensive */
1318 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1320 fprintf (stderr, "Predicate List:\n");
1321 print_list (stderr, predicates);
1324 /* do a sanity check */
1325 pred_sanity_check(predicates);
1327 /* Done parsing the predicates. Build the evaluation tree. */
1328 cur_pred = predicates;
1329 eval_tree = get_expr (&cur_pred, NO_PREC, NULL);
1330 calculate_derived_rates(eval_tree);
1332 /* Check if we have any left-over predicates (this fixes
1333 * Debian bug #185202).
1335 if (cur_pred != NULL)
1337 /* cur_pred->p_name is often NULL here */
1338 if (pred_is(cur_pred, pred_closeparen))
1340 /* e.g. "find \( -true \) \)" */
1341 error (1, 0, _("you have too many ')'"), cur_pred->p_name);
1343 else
1345 if (cur_pred->p_name)
1346 error (1, 0, _("unexpected extra predicate '%s'"), cur_pred->p_name);
1347 else
1348 error (1, 0, _("unexpected extra predicate"));
1352 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1354 fprintf (stderr, "Eval Tree:\n");
1355 print_tree (stderr, eval_tree, 0);
1358 estimate_costs(eval_tree);
1360 /* Rearrange the eval tree in optimal-predicate order. */
1361 opt_expr (&eval_tree);
1363 /* Check that the tree is in normalised order (opt_expr does this) */
1364 check_normalization(eval_tree, true);
1366 do_arm_swaps(eval_tree);
1368 /* Check that the tree is still in normalised order */
1369 check_normalization(eval_tree, true);
1371 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1373 fprintf (stderr, "Optimized Eval Tree:\n");
1374 print_tree (stderr, eval_tree, 0);
1375 fprintf (stderr, "Optimized command line:\n");
1376 print_optlist(stderr, eval_tree);
1377 fprintf(stderr, "\n");
1380 return eval_tree;
1383 /* Initialise the performance data for a predicate.
1385 static void
1386 init_pred_perf(struct predicate *pred)
1388 struct predicate_performance_info *p = &pred->perf;
1389 p->visits = p->successes = 0;
1393 /* Return a pointer to a new predicate structure, which has been
1394 linked in as the last one in the predicates list.
1396 Set `predicates' to point to the start of the predicates list.
1397 Set `last_pred' to point to the new last predicate in the list.
1399 Set all cells in the new structure to the default values. */
1401 struct predicate *
1402 get_new_pred (const struct parser_table *entry)
1404 register struct predicate *new_pred;
1405 (void) entry;
1407 /* Options should not be turned into predicates. */
1408 assert(entry->type != ARG_OPTION);
1409 assert(entry->type != ARG_POSITIONAL_OPTION);
1411 if (predicates == NULL)
1413 predicates = (struct predicate *)
1414 xmalloc (sizeof (struct predicate));
1415 last_pred = predicates;
1417 else
1419 new_pred = (struct predicate *) xmalloc (sizeof (struct predicate));
1420 last_pred->pred_next = new_pred;
1421 last_pred = new_pred;
1423 last_pred->parser_entry = entry;
1424 last_pred->pred_func = NULL;
1425 last_pred->p_name = NULL;
1426 last_pred->p_type = NO_TYPE;
1427 last_pred->p_prec = NO_PREC;
1428 last_pred->side_effects = false;
1429 last_pred->no_default_print = false;
1430 last_pred->need_stat = true;
1431 last_pred->need_type = true;
1432 last_pred->args.str = NULL;
1433 last_pred->pred_next = NULL;
1434 last_pred->pred_left = NULL;
1435 last_pred->pred_right = NULL;
1436 last_pred->literal_control_chars = options.literal_control_chars;
1437 last_pred->artificial = false;
1438 last_pred->est_success_rate = 1.0;
1439 init_pred_perf(last_pred);
1440 return last_pred;
1443 /* Return a pointer to a new predicate, with operator check.
1444 Like get_new_pred, but it checks to make sure that the previous
1445 predicate is an operator. If it isn't, the AND operator is inserted. */
1447 struct predicate *
1448 get_new_pred_chk_op (const struct parser_table *entry)
1450 struct predicate *new_pred;
1451 static const struct parser_table *entry_and = NULL;
1453 /* Locate the entry in the parser table for the "and" operator */
1454 if (NULL == entry_and)
1455 entry_and = find_parser("and");
1457 /* Check that it's actually there. If not, that is a bug.*/
1458 assert(entry_and != NULL);
1460 if (last_pred)
1461 switch (last_pred->p_type)
1463 case NO_TYPE:
1464 error (1, 0, _("oops -- invalid default insertion of and!"));
1465 break;
1467 case PRIMARY_TYPE:
1468 case CLOSE_PAREN:
1469 /* We need to interpose the and operator. */
1470 new_pred = get_new_pred (entry_and);
1471 new_pred->pred_func = pred_and;
1472 new_pred->p_name = "-a";
1473 new_pred->p_type = BI_OP;
1474 new_pred->p_prec = AND_PREC;
1475 new_pred->need_stat = false;
1476 new_pred->need_type = false;
1477 new_pred->args.str = NULL;
1478 new_pred->side_effects = false;
1479 new_pred->no_default_print = false;
1480 break;
1482 default:
1483 break;
1486 new_pred = get_new_pred (entry);
1487 new_pred->parser_entry = entry;
1488 return new_pred;
1491 struct cost_assoc
1493 enum EvaluationCost cost;
1494 char *name;
1496 struct cost_assoc cost_table[] =
1498 { NeedsNothing, "Nothing" },
1499 { NeedsType, "Type" },
1500 { NeedsStatInfo, "StatInfo" },
1501 { NeedsLinkName, "LinkName" },
1502 { NeedsAccessInfo, "AccessInfo" },
1503 { NeedsSyncDiskHit, "SyncDiskHit" },
1504 { NeedsEventualExec, "EventualExec" },
1505 { NeedsImmediateExec, "ImmediateExec" },
1506 { NeedsUserInteraction, "UserInteraction" },
1507 { NeedsUnknown, "Unknown" }
1510 struct prec_assoc
1512 short prec;
1513 char *prec_name;
1516 static struct prec_assoc prec_table[] =
1518 {NO_PREC, "no"},
1519 {COMMA_PREC, "comma"},
1520 {OR_PREC, "or"},
1521 {AND_PREC, "and"},
1522 {NEGATE_PREC, "negate"},
1523 {MAX_PREC, "max"},
1524 {-1, "unknown "}
1527 struct op_assoc
1529 short type;
1530 char *type_name;
1533 static struct op_assoc type_table[] =
1535 {NO_TYPE, "no"},
1536 {PRIMARY_TYPE, "primary"},
1537 {UNI_OP, "uni_op"},
1538 {BI_OP, "bi_op"},
1539 {OPEN_PAREN, "open_paren "},
1540 {CLOSE_PAREN, "close_paren "},
1541 {-1, "unknown"}
1544 static const char *
1545 cost_name (enum EvaluationCost cost)
1547 unsigned int i;
1548 unsigned int n = sizeof(cost_table)/sizeof(cost_table[0]);
1550 for (i = 0; i<n; ++i)
1551 if (cost_table[i].cost == cost)
1552 return cost_table[i].name;
1553 return "unknown";
1557 static char *
1558 type_name (type)
1559 short type;
1561 int i;
1563 for (i = 0; type_table[i].type != (short) -1; i++)
1564 if (type_table[i].type == type)
1565 break;
1566 return (type_table[i].type_name);
1569 static char *
1570 prec_name (prec)
1571 short prec;
1573 int i;
1575 for (i = 0; prec_table[i].prec != (short) -1; i++)
1576 if (prec_table[i].prec == prec)
1577 break;
1578 return (prec_table[i].prec_name);
1582 /* Walk the expression tree NODE to stdout.
1583 INDENT is the number of levels to indent the left margin. */
1585 void
1586 print_tree (FILE *fp, struct predicate *node, int indent)
1588 int i;
1590 if (node == NULL)
1591 return;
1592 for (i = 0; i < indent; i++)
1593 fprintf (fp, " ");
1594 fprintf (fp, "pred=[");
1595 print_predicate(fp, node);
1596 fprintf (fp, "] type=%s prec=%s",
1597 type_name (node->p_type), prec_name (node->p_prec));
1598 fprintf (fp, " cost=%s rate=%#03.2g %sside effects ",
1599 cost_name(node->p_cost),
1600 node->est_success_rate,
1601 (node->side_effects ? "" : "no "));
1603 if (node->need_stat || node->need_type)
1605 int comma = 0;
1607 fprintf (fp, "Needs ");
1608 if (node->need_stat)
1610 fprintf (fp, "stat");
1611 comma = 1;
1613 if (node->need_type)
1615 fprintf (fp, "%stype", comma ? "," : "");
1618 fprintf (fp, "\n");
1621 for (i = 0; i < indent; i++)
1622 fprintf (fp, " ");
1623 if (NULL == node->pred_left && NULL == node->pred_right)
1625 fprintf (fp, "no children.\n");
1627 else
1629 if (node->pred_left)
1631 fprintf (fp, "left:\n");
1632 print_tree (fp, node->pred_left, indent + 1);
1634 else
1636 fprintf (fp, "no left.\n");
1639 for (i = 0; i < indent; i++)
1640 fprintf (fp, " ");
1641 if (node->pred_right)
1643 fprintf (fp, "right:\n");
1644 print_tree (fp, node->pred_right, indent + 1);
1646 else
1648 fprintf (fp, "no right.\n");