Fix Savannah bug #20263 (compilation err on DEC OSF/1. Include <sys/stat.h> in files...
[findutils.git] / find / tree.c
blob66414fe4cb581876d690792fcebb79a5d9243e0f
1 /* tree.c -- helper functions to build and evaluate the expression tree.
2 Copyright (C) 1990, 91, 92, 93, 94, 2000, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
17 USA.
20 #include <config.h>
21 #include "defs.h"
23 #include <assert.h>
24 #include <stdlib.h>
26 #include "xalloc.h"
27 #include "error.h"
30 #if ENABLE_NLS
31 # include <libintl.h>
32 # define _(Text) gettext (Text)
33 #else
34 # define _(Text) Text
35 #endif
36 #ifdef gettext_noop
37 # define N_(String) gettext_noop (String)
38 #else
39 /* See locate.c for explanation as to why not use (String) */
40 # define N_(String) String
41 #endif
45 /* All predicates for each path to process. */
46 static struct predicate *predicates = NULL;
48 /* The root of the evaluation tree. */
49 static struct predicate *eval_tree = NULL;
51 /* The last predicate allocated. */
52 static struct predicate *last_pred = NULL;
55 static struct predicate *scan_rest PARAMS((struct predicate **input,
56 struct predicate *head,
57 short int prev_prec));
58 static void merge_pred PARAMS((struct predicate *beg_list, struct predicate *end_list, struct predicate **last_p));
59 static struct predicate *set_new_parent PARAMS((struct predicate *curr, enum predicate_precedence high_prec, struct predicate **prevp));
60 static const char *cost_name PARAMS((enum EvaluationCost cost));
63 /* Return a pointer to a tree that represents the
64 expression prior to non-unary operator *INPUT.
65 Set *INPUT to point at the next input predicate node.
67 Only accepts the following:
69 <primary>
70 expression [operators of higher precedence]
71 <uni_op><primary>
72 (arbitrary expression)
73 <uni_op>(arbitrary expression)
75 In other words, you can not start out with a bi_op or close_paren.
77 If the following operator (if any) is of a higher precedence than
78 PREV_PREC, the expression just nabbed is part of a following
79 expression, which really is the expression that should be handed to
80 our caller, so get_expr recurses. */
82 struct predicate *
83 get_expr (struct predicate **input,
84 short int prev_prec,
85 const struct predicate* prev_pred)
87 struct predicate *next = NULL;
88 struct predicate *this_pred = (*input);
90 if (*input == NULL)
91 error (1, 0, _("invalid expression"));
93 switch ((*input)->p_type)
95 case NO_TYPE:
96 error (1, 0, _("invalid expression"));
97 break;
99 case BI_OP:
100 /* e.g. "find . -a" */
101 error (1, 0, _("invalid expression; you have used a binary operator '%s' with nothing before it."), this_pred->p_name);
102 break;
104 case CLOSE_PAREN:
105 if ((UNI_OP == prev_pred->p_type
106 || BI_OP == prev_pred->p_type)
107 && !this_pred->artificial)
109 /* e.g. "find \( -not \)" or "find \( -true -a \" */
110 error(1, 0, _("expected an expression between '%s' and ')'"),
111 prev_pred->p_name);
113 else if ( (*input)->artificial )
115 /* We have reached the end of the user-supplied predicates
116 * unexpectedly.
118 /* e.g. "find . -true -a" */
119 error (1, 0, _("expected an expression after '%s'"), prev_pred->p_name);
121 else
123 error (1, 0, _("invalid expression; you have too many ')'"));
125 break;
127 case PRIMARY_TYPE:
128 next = *input;
129 *input = (*input)->pred_next;
130 break;
132 case UNI_OP:
133 next = *input;
134 *input = (*input)->pred_next;
135 next->pred_right = get_expr (input, NEGATE_PREC, next);
136 break;
138 case OPEN_PAREN:
139 if ( (NULL == (*input)->pred_next) || (*input)->pred_next->artificial )
141 /* user typed something like "find . (", and so the ) we are
142 * looking at is from the artificial "( ) -print" that we
143 * add.
145 error (1, 0, _("invalid expression; expected to find a ')' but didn't see one. Perhaps you need an extra predicate after '%s'"), this_pred->p_name);
147 prev_pred = (*input);
148 *input = (*input)->pred_next;
149 if ( (*input)->p_type == CLOSE_PAREN )
151 error (1, 0, _("invalid expression; empty parentheses are not allowed."));
153 next = get_expr (input, NO_PREC, prev_pred);
154 if ((*input == NULL)
155 || ((*input)->p_type != CLOSE_PAREN))
156 error (1, 0, _("invalid expression; I was expecting to find a ')' somewhere but did not see one."));
157 *input = (*input)->pred_next; /* move over close */
158 break;
160 default:
161 error (1, 0, _("oops -- invalid expression type!"));
162 break;
165 /* We now have the first expression and are positioned to check
166 out the next operator. If NULL, all done. Otherwise, if
167 PREV_PREC < the current node precedence, we must continue;
168 the expression we just nabbed is more tightly bound to the
169 following expression than to the previous one. */
170 if (*input == NULL)
171 return (next);
172 if ((int) (*input)->p_prec > (int) prev_prec)
174 next = scan_rest (input, next, prev_prec);
175 if (next == NULL)
176 error (1, 0, _("invalid expression"));
178 return (next);
181 /* Scan across the remainder of a predicate input list starting
182 at *INPUT, building the rest of the expression tree to return.
183 Stop at the first close parenthesis or the end of the input list.
184 Assumes that get_expr has been called to nab the first element
185 of the expression tree.
187 *INPUT points to the current input predicate list element.
188 It is updated as we move along the list to point to the
189 terminating input element.
190 HEAD points to the predicate element that was obtained
191 by the call to get_expr.
192 PREV_PREC is the precedence of the previous predicate element. */
194 static struct predicate *
195 scan_rest (struct predicate **input,
196 struct predicate *head,
197 short int prev_prec)
199 struct predicate *tree; /* The new tree we are building. */
201 if ((*input == NULL) || ((*input)->p_type == CLOSE_PAREN))
202 return (NULL);
203 tree = head;
204 while ((*input != NULL) && ((int) (*input)->p_prec > (int) prev_prec))
206 switch ((*input)->p_type)
208 case NO_TYPE:
209 case PRIMARY_TYPE:
210 case UNI_OP:
211 case OPEN_PAREN:
212 /* I'm not sure how we get here, so it is not obvious what
213 * sort of mistakes might give rise to this condition.
215 error (1, 0, _("invalid expression"));
216 break;
218 case BI_OP:
220 struct predicate *prev = (*input);
221 (*input)->pred_left = tree;
222 tree = *input;
223 *input = (*input)->pred_next;
224 tree->pred_right = get_expr (input, tree->p_prec, prev);
225 break;
228 case CLOSE_PAREN:
229 return tree;
231 default:
232 error (1, 0,
233 _("oops -- invalid expression type (%d)!"),
234 (int)(*input)->p_type);
235 break;
238 return tree;
241 /* Returns true if the specified predicate is reorderable. */
242 static boolean
243 predicate_is_cost_free(const struct predicate *p)
245 if (pred_is(p, pred_name) ||
246 pred_is(p, pred_path) ||
247 pred_is(p, pred_iname) ||
248 pred_is(p, pred_ipath))
250 /* Traditionally (at least 4.1.7 through 4.2.x) GNU find always
251 * optimised these cases.
253 return true;
255 else if (options.optimisation_level > 0)
257 if (pred_is(p, pred_and) ||
258 pred_is(p, pred_negate) ||
259 pred_is(p, pred_comma) ||
260 pred_is(p, pred_or))
261 return false;
262 else
263 return NeedsNothing == p->p_cost;
265 else
267 return false;
271 /* Prints a predicate */
272 void print_predicate(FILE *fp, const struct predicate *p)
274 fprintf (fp, "%s%s%s",
275 p->p_name,
276 p->arg_text ? " " : "",
277 p->arg_text ? p->arg_text : "");
281 struct predlist
283 struct predicate *head;
284 struct predicate *tail;
287 static void
288 predlist_init(struct predlist *p)
290 p->head = p->tail = NULL;
293 static void
294 predlist_insert(struct predlist *list,
295 struct predicate *curr,
296 struct predicate **pprev)
298 struct predicate **insertpos = &(list->head);
300 *pprev = curr->pred_left;
301 if (options.optimisation_level > 2)
303 /* Insert the new node in the list after any other entries which
304 * are more selective.
306 if (0)
307 while ( (*insertpos) && ((*insertpos)->est_success_rate < curr->est_success_rate) )
309 insertpos = &((*insertpos)->pred_left);
312 curr->pred_left = (*insertpos);
313 (*insertpos) = curr;
314 if (NULL == list->tail)
315 list->tail = list->head;
318 static int
319 pred_cost_compare(const struct predicate *p1, const struct predicate *p2, boolean wantfailure)
321 if (p1->p_cost == p2->p_cost)
323 if (p1->est_success_rate == p2->est_success_rate)
324 return 0;
325 else if (wantfailure)
326 return p1->est_success_rate < p2->est_success_rate ? -1 : 1;
327 else
328 return p1->est_success_rate < p2->est_success_rate ? 1 : -1;
330 else
332 return p1->p_cost < p2->p_cost ? -1 : 1;
337 static void
338 predlist_merge_sort(struct predlist *list,
339 struct predicate **last)
341 struct predlist new_list;
342 struct predicate *p, *q;
344 if (NULL == list->head)
345 return; /* nothing to do */
347 if (options.debug_options & DebugTreeOpt)
349 fprintf(stderr, "%s:\n", "predlist before merge sort");
350 print_tree(stderr, list->head, 2);
353 calculate_derived_rates(list->head);
354 predlist_init(&new_list);
355 while (list->head)
357 /* remove head of source list */
358 q = list->head;
359 list->head = list->head->pred_left;
360 q->pred_left = NULL;
362 /* insert it into the new list */
363 for (p=new_list.head; p; p=p->pred_left)
365 /* If these operations are OR operations, we want to get a
366 * successful test as soon as possible, to take advantage of
367 * the short-circuit evaluation. If they're AND, we want to
368 * get an unsuccessful result early for the same reason.
369 * Therefore we invert the sense of the comparison for the
370 * OR case. We only want to invert the sense of the success
371 * rate comparison, not the operation cost comparison. Hence we
372 * pass a flag into pred_cost_compare().
374 boolean wantfailure = (OR_PREC != p->p_prec);
375 if (pred_cost_compare(p->pred_right, q->pred_right, wantfailure) >= 0)
376 break;
378 if (p)
380 /* insert into existing list */
381 q->pred_left = p->pred_left;
382 if (NULL == q->pred_left)
383 new_list.tail = q;
384 p->pred_left = q;
386 else
388 q->pred_left = new_list.head; /* prepend */
389 new_list.head = q;
390 if (NULL == new_list.tail)
391 new_list.tail = q; /* first item in new list */
394 if (options.debug_options & DebugTreeOpt)
396 fprintf(stderr, "%s:\n", "predlist after merge sort");
397 print_tree(stderr, new_list.head, 2);
400 calculate_derived_rates(new_list.head);
401 merge_pred(new_list.head, new_list.tail, last);
402 predlist_init(list);
405 static void
406 merge_lists(struct predlist lists[], int nlists,
407 struct predlist *name_list,
408 struct predlist *regex_list,
409 struct predicate **last)
411 int i;
412 static void (*mergefn)(struct predlist *, struct predicate**);
414 mergefn = predlist_merge_sort;
416 mergefn(name_list, last);
417 mergefn(regex_list, last);
419 for (i=0; i<nlists; i++)
420 mergefn(&lists[i], last);
425 static boolean
426 subtree_has_side_effects(const struct predicate *p)
428 if (p)
430 return p->side_effects
431 || subtree_has_side_effects(p->pred_left)
432 || subtree_has_side_effects(p->pred_right);
434 else
437 return false;
441 static int
442 worst_cost (const struct predicate *p)
444 if (p)
446 unsigned int cost_r, cost_l, worst;
447 cost_l = worst_cost(p->pred_left);
448 cost_r = worst_cost(p->pred_right);
449 worst = (cost_l > cost_r) ? cost_l : cost_r;
450 if (worst < p->p_cost)
451 worst = p->p_cost;
452 return worst;
454 else
456 return 0;
462 static void
463 perform_arm_swap(struct predicate *p)
465 struct predicate *tmp = p->pred_left->pred_right;
466 p->pred_left->pred_right = p->pred_right;
467 p->pred_right = tmp;
470 /* Consider swapping p->pred_left->pred_right with p->pred_right,
471 * if that yields a faster evaluation. Normally the left predicate is
472 * evaluated first.
474 * If the operation is an OR, we want the left predicate to be the one that
475 * succeeds most often. If it is an AND, we want it to be the predicate that
476 * fails most often.
478 * We don't consider swapping arms of an operator where their cost is
479 * different or where they have side effects.
481 * A viable test case for this is
482 * ./find -D opt -O3 . \! -type f -o -type d
483 * Here, the ! -type f should be evaluated first,
484 * as we assume that 95% of inodes are vanilla files.
486 static boolean
487 consider_arm_swap(struct predicate *p)
489 int left_cost, right_cost;
490 const char *reason = NULL;
491 struct predicate **pl, **pr;
493 if (BI_OP != p->p_type)
494 reason = "Not a binary operation";
496 if (!reason)
498 if (NULL == p->pred_left || NULL == p->pred_right)
499 reason = "Doesn't have two arms";
503 if (!reason)
505 if (NULL == p->pred_left->pred_right)
506 reason = "Left arm has no child on RHS";
508 pr = &p->pred_right;
509 pl = &p->pred_left->pred_right;
511 if (!reason)
513 if (subtree_has_side_effects(*pl))
514 reason = "Left subtree has side-effects";
516 if (!reason)
518 if (subtree_has_side_effects(*pr))
519 reason = "Right subtree has side-effects";
522 if (!reason)
524 left_cost = worst_cost(*pl);
525 right_cost = worst_cost(*pr);
527 if (left_cost < right_cost)
529 reason = "efficient as-is";
532 if (!reason)
534 boolean want_swap;
536 if (left_cost == right_cost)
538 /* it's a candidate */
539 float succ_rate_l = (*pl)->est_success_rate;
540 float succ_rate_r = (*pr)->est_success_rate;
542 if (options.debug_options & DebugTreeOpt)
544 fprintf(stderr, "Success rates: l=%f, r=%f\n", succ_rate_l, succ_rate_r);
547 if (pred_is(p, pred_or))
549 want_swap = succ_rate_r < succ_rate_l;
550 if (!want_swap)
551 reason = "Operation is OR and right success rate >= left";
553 else if (pred_is(p, pred_and))
555 want_swap = succ_rate_r > succ_rate_l;
556 if (!want_swap)
557 reason = "Operation is AND and right success rate <= left";
559 else
561 want_swap = false;
562 reason = "Not AND or OR";
565 else
567 want_swap = true;
570 if (want_swap)
572 if (options.debug_options & DebugTreeOpt)
574 fprintf(stderr, "Performing arm swap on:\n");
575 print_tree (stderr, p, 0);
577 perform_arm_swap(p);
578 return true;
583 if (options.debug_options & DebugTreeOpt)
585 fprintf(stderr, "Not an arm swap candidate (%s):\n", reason);
586 print_tree (stderr, p, 0);
588 return false;
591 static boolean
592 do_arm_swaps(struct predicate *p)
594 if (p)
596 boolean swapped;
599 swapped = false;
600 if (consider_arm_swap(p)
601 || do_arm_swaps(p->pred_left)
602 || do_arm_swaps(p->pred_right))
604 swapped = true;
606 } while (swapped);
607 return swapped;
609 else
611 return false;
617 /* Optimize the ordering of the predicates in the tree. Rearrange
618 them to minimize work. Strategies:
619 * Evaluate predicates that don't need inode information first;
620 the predicates are divided into 1 or more groups separated by
621 predicates (if any) which have "side effects", such as printing.
622 The grouping implements the partial ordering on predicates which
623 those with side effects impose.
625 * Place -name, -iname, -path, -ipath, -regex and -iregex at the front
626 of a group, with -name, -iname, -path and -ipath ahead of
627 -regex and -iregex. Predicates which are moved to the front
628 of a group by definition do not have side effects. Both
629 -regex and -iregex both use pred_regex.
631 If higher optimisation levels have been selected, reordering also
632 occurs according to the p_cost member of each predicate (which
633 reflects the performance cost of the test). The ordering also
634 bears in mind whether these operations are more likely to succeed
635 or fail. When evauating a chain of OR conditions, we prefer
636 tests likely to succeed at the front of the list. For AND, we
637 prefer tests likely to fail at the front of the list.
639 This routine "normalizes" the predicate tree by ensuring that
640 all expression predicates have AND (or OR or COMMA) parent nodes
641 which are linked along the left edge of the expression tree.
642 This makes manipulation of subtrees easier.
644 EVAL_TREEP points to the root pointer of the predicate tree
645 to be rearranged. opt_expr may return a new root pointer there.
646 Return true if the tree contains side effects, false if not. */
648 static boolean
649 opt_expr (struct predicate **eval_treep)
651 struct predlist regex_list={NULL,NULL}, name_list={NULL,NULL};
652 struct predlist cbo_list[NumEvaluationCosts];
653 int i;
654 struct predicate *curr;
655 struct predicate **prevp; /* Address of `curr' node. */
656 struct predicate **last_sidep; /* Last predicate with side effects. */
657 PRED_FUNC pred_func;
658 enum predicate_type p_type;
659 boolean has_side_effects = false; /* Return value. */
660 enum predicate_precedence prev_prec, /* precedence of last BI_OP in branch */
661 biop_prec; /* topmost BI_OP precedence in branch */
663 if (eval_treep == NULL || *eval_treep == NULL)
664 return (false);
666 for (i=0; i<NumEvaluationCosts; i++)
667 predlist_init(&cbo_list[i]);
669 /* Set up to normalize tree as a left-linked list of ANDs or ORs.
670 Set `curr' to the leftmost node, `prevp' to its address, and
671 `pred_func' to the predicate type of its parent. */
672 prevp = eval_treep;
673 prev_prec = AND_PREC;
674 curr = *prevp;
675 while (curr->pred_left != NULL)
677 prevp = &curr->pred_left;
678 prev_prec = curr->p_prec; /* must be a BI_OP */
679 curr = curr->pred_left;
682 /* Link in the appropriate BI_OP for the last expression, if needed. */
683 if (curr->p_type != BI_OP)
684 set_new_parent (curr, prev_prec, prevp);
686 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
688 /* Normalized tree. */
689 fprintf (stderr, "Normalized Eval Tree:\n");
690 print_tree (stderr, *eval_treep, 0);
693 /* Rearrange the predicates. */
694 prevp = eval_treep;
695 biop_prec = NO_PREC; /* not COMMA_PREC */
696 if ((*prevp) && (*prevp)->p_type == BI_OP)
697 biop_prec = (*prevp)->p_prec;
698 while ((curr = *prevp) != NULL)
700 /* If there is a BI_OP of different precedence from the first
701 in the pred_left chain, create a new parent of the
702 original precedence, link the new parent to the left of the
703 previous and link CURR to the right of the new parent.
704 This preserves the precedence of expressions in the tree
705 in case we rearrange them. */
706 if (curr->p_type == BI_OP)
708 if (curr->p_prec != biop_prec)
709 curr = set_new_parent(curr, biop_prec, prevp);
712 /* See which predicate type we have. */
713 p_type = curr->pred_right->p_type;
714 pred_func = curr->pred_right->pred_func;
717 switch (p_type)
719 case NO_TYPE:
720 case PRIMARY_TYPE:
721 /* Don't rearrange the arguments of the comma operator, it is
722 not commutative. */
723 if (biop_prec == COMMA_PREC)
724 break;
726 /* If this predicate has no side effects, consider reordering it. */
727 if (!curr->pred_right->side_effects)
729 boolean reorder;
731 /* If it's one of our special primaries, move it to the
732 front of the list for that primary. */
733 if (predicate_is_cost_free(curr->pred_right))
735 if (options.debug_options & DebugTreeOpt)
737 fprintf(stderr, "-O%d: promoting cheap predicate ",
738 (int)options.optimisation_level);
739 print_predicate(stderr, curr->pred_right);
740 fprintf(stderr, " into name_list\n");
742 predlist_insert(&name_list, curr, prevp);
743 continue;
746 if (pred_func == pred_regex)
748 predlist_insert(&regex_list, curr, prevp);
749 continue;
752 reorder = ((options.optimisation_level > 1)
753 && (NeedsType == curr->pred_right->p_cost)
754 && !curr->pred_right->need_stat) ||
755 (options.optimisation_level > 2);
757 if (reorder)
759 if (options.debug_options & DebugTreeOpt)
761 fprintf(stderr, "-O%d: categorising predicate ",
762 (int)options.optimisation_level);
763 print_predicate(stderr, curr->pred_right);
764 fprintf(stderr, " by cost (%s)\n",
765 cost_name(curr->pred_right->p_cost));
767 predlist_insert(&cbo_list[curr->pred_right->p_cost], curr, prevp);
768 continue;
772 break;
774 case UNI_OP:
775 /* For NOT, check the expression trees below the NOT. */
776 curr->pred_right->side_effects
777 = opt_expr (&curr->pred_right->pred_right);
778 break;
780 case BI_OP:
781 /* For nested AND or OR, recurse (AND/OR form layers on the left of
782 the tree), and continue scanning this level of AND or OR. */
783 curr->pred_right->side_effects = opt_expr (&curr->pred_right);
784 break;
786 /* At this point, get_expr and scan_rest have already removed
787 all of the user's parentheses. */
789 default:
790 error (1, 0, _("oops -- invalid expression type!"));
791 break;
794 if (curr->pred_right->side_effects == true)
796 last_sidep = prevp;
798 /* Incorporate lists and reset list pointers for this group. */
799 merge_lists(cbo_list, NumEvaluationCosts, &name_list, &regex_list, last_sidep);
800 has_side_effects = true;
803 prevp = &curr->pred_left;
806 /* Do final list merges. */
807 last_sidep = prevp;
808 merge_lists(cbo_list, NumEvaluationCosts, &name_list, &regex_list, last_sidep);
809 return has_side_effects;
812 static float
813 constrain_rate(float rate)
815 if (rate > 1.0f)
816 return 1.0;
817 else if (rate < 0.0)
818 return 0.0;
819 else
820 return rate;
823 /* Link in a new parent BI_OP node for CURR, at *PREVP, with precedence
824 HIGH_PREC. */
826 static struct predicate *
827 set_new_parent (struct predicate *curr, enum predicate_precedence high_prec, struct predicate **prevp)
829 struct predicate *new_parent;
831 new_parent = xmalloc (sizeof (struct predicate));
832 new_parent->p_type = BI_OP;
833 new_parent->p_prec = high_prec;
834 new_parent->need_stat = false;
835 new_parent->need_type = false;
836 new_parent->p_cost = NeedsNothing;
838 switch (high_prec)
840 case COMMA_PREC:
841 new_parent->pred_func = pred_comma;
842 new_parent->p_name = ",";
843 new_parent->est_success_rate = 1.0;
844 break;
845 case OR_PREC:
846 new_parent->pred_func = pred_or;
847 new_parent->p_name = "-o";
848 new_parent->est_success_rate = constrain_rate(curr->est_success_rate);
849 break;
850 case AND_PREC:
851 new_parent->pred_func = pred_and;
852 new_parent->p_name = "-a";
853 new_parent->est_success_rate = constrain_rate(curr->est_success_rate);
854 break;
855 default:
856 ; /* empty */
859 new_parent->side_effects = false;
860 new_parent->no_default_print = false;
861 new_parent->args.str = NULL;
862 new_parent->pred_next = NULL;
864 /* Link in new_parent.
865 Pushes rest of left branch down 1 level to new_parent->pred_right. */
866 new_parent->pred_left = NULL;
867 new_parent->pred_right = curr;
868 *prevp = new_parent;
870 return new_parent;
873 /* Merge the predicate list that starts at BEG_LIST and ends at END_LIST
874 into the tree at LAST_P. */
876 static void
877 merge_pred (struct predicate *beg_list, struct predicate *end_list, struct predicate **last_p)
879 end_list->pred_left = *last_p;
880 *last_p = beg_list;
883 /* Find the first node in expression tree TREE that requires
884 a stat call and mark the operator above it as needing a stat
885 before calling the node. Since the expression precedences
886 are represented in the tree, some preds that need stat may not
887 get executed (because the expression value is determined earlier.)
888 So every expression needing stat must be marked as such, not just
889 the earliest, to be sure to obtain the stat. This still guarantees
890 that a stat is made as late as possible. Return true if the top node
891 in TREE requires a stat, false if not. */
894 struct pred_cost_lookup
896 PRED_FUNC fn;
897 enum EvaluationCost cost;
899 static struct pred_cost_lookup costlookup[] =
901 { pred_amin , NeedsStatInfo },
902 { pred_and , NeedsNothing, },
903 { pred_anewer , NeedsStatInfo, },
904 { pred_atime , NeedsStatInfo, },
905 { pred_closeparen, NeedsNothing },
906 { pred_cmin , NeedsStatInfo, },
907 { pred_cnewer , NeedsStatInfo, },
908 { pred_comma , NeedsNothing, },
909 { pred_ctime , NeedsStatInfo, },
910 { pred_delete , NeedsSyncDiskHit },
911 { pred_empty , NeedsStatInfo },
912 { pred_exec , NeedsEventualExec },
913 { pred_execdir , NeedsEventualExec },
914 { pred_executable, NeedsAccessInfo },
915 { pred_false , NeedsNothing },
916 { pred_fprint , NeedsNothing },
917 { pred_fprint0 , NeedsNothing },
918 { pred_fprintf , NeedsNothing },
919 { pred_fstype , NeedsStatInfo }, /* true for amortised cost */
920 { pred_gid , NeedsStatInfo },
921 { pred_group , NeedsStatInfo },
922 { pred_ilname , NeedsLinkName },
923 { pred_iname , NeedsNothing },
924 { pred_inum , NeedsStatInfo },
925 { pred_ipath , NeedsNothing },
926 { pred_links , NeedsStatInfo },
927 { pred_lname , NeedsLinkName },
928 { pred_ls , NeedsStatInfo },
929 { pred_fls , NeedsStatInfo },
930 { pred_mmin , NeedsStatInfo },
931 { pred_mtime , NeedsStatInfo },
932 { pred_name , NeedsNothing },
933 { pred_negate , NeedsNothing, },
934 { pred_newer , NeedsStatInfo, },
935 { pred_newerXY , NeedsStatInfo, },
936 { pred_nogroup , NeedsStatInfo }, /* true for amortised cost if caching is on */
937 { pred_nouser , NeedsStatInfo }, /* true for amortised cost if caching is on */
938 { pred_ok , NeedsUserInteraction },
939 { pred_okdir , NeedsUserInteraction },
940 { pred_openparen , NeedsNothing },
941 { pred_or , NeedsNothing, },
942 { pred_path , NeedsNothing },
943 { pred_perm , NeedsStatInfo },
944 { pred_print , NeedsNothing },
945 { pred_print0 , NeedsNothing },
946 { pred_prune , NeedsNothing },
947 { pred_quit , NeedsNothing },
948 { pred_readable , NeedsAccessInfo },
949 { pred_regex , NeedsNothing },
950 { pred_samefile , NeedsStatInfo },
951 { pred_size , NeedsStatInfo },
952 { pred_true , NeedsNothing },
953 { pred_type , NeedsType },
954 { pred_uid , NeedsStatInfo },
955 { pred_used , NeedsStatInfo },
956 { pred_user , NeedsStatInfo },
957 { pred_writable , NeedsAccessInfo },
958 { pred_xtype , NeedsType } /* roughly correct unless most files are symlinks */
960 static int pred_table_sorted = 0;
962 static boolean
963 check_sorted(void *base, size_t members, size_t membersize,
964 int (*cmpfn)(const void*, const void*))
966 const char *p = base;
967 size_t i;
968 for (i=1u; i<members; ++i)
970 int result = cmpfn(p+i*membersize, p+(i-1)*membersize);
971 if (result < 0)
972 return false;
973 result = cmpfn(p+(i-1)*membersize, p+i*membersize);
974 assert (result <= 0);
976 return true;
980 static int
981 cost_table_comparison(const void *p1, const void *p2)
983 const struct pred_cost_lookup *pc1 = p1;
984 const struct pred_cost_lookup *pc2 = p2;
985 const void* pf1 = (const void*)pc1->fn;
986 const void* pf2 = (const void*)pc2->fn;
988 /* We use casts to void* for > comparison because not all C
989 * compilers allow order comparison between functions. For example,
990 * that would fail on DEC Alpha OSF/1 with native cc.
992 if (pc1->fn == pc2->fn)
993 return 0;
994 else if (pf1 > pf2)
995 return 1;
996 else
997 return -1;
1000 static enum EvaluationCost
1001 get_pred_cost(const struct predicate *p)
1003 enum EvaluationCost data_requirement_cost = NeedsNothing;
1004 enum EvaluationCost inherent_cost = NeedsUnknown;
1006 if (p->need_stat)
1008 data_requirement_cost = NeedsStatInfo;
1010 else if (p->need_type)
1012 data_requirement_cost = NeedsType;
1014 else
1016 data_requirement_cost = NeedsNothing;
1019 if (pred_is(p, pred_exec) || pred_is(p, pred_execdir))
1021 if (p->args.exec_vec.multiple)
1022 inherent_cost = NeedsEventualExec;
1023 else
1024 inherent_cost = NeedsImmediateExec;
1026 else if (pred_is(p, pred_fprintf))
1028 /* the parser calculated the cost for us. */
1029 inherent_cost = p->p_cost;
1031 else
1033 struct pred_cost_lookup key;
1034 void *entry;
1036 if (!pred_table_sorted)
1038 qsort(costlookup,
1039 sizeof(costlookup)/sizeof(costlookup[0]),
1040 sizeof(costlookup[0]),
1041 cost_table_comparison);
1043 if (!check_sorted(costlookup,
1044 sizeof(costlookup)/sizeof(costlookup[0]),
1045 sizeof(costlookup[0]),
1046 cost_table_comparison))
1048 error(1, 0, "Failed to sort the costlookup array.");
1050 pred_table_sorted = 1;
1052 key.fn = p->pred_func;
1053 entry = bsearch(&key, costlookup,
1054 sizeof(costlookup)/sizeof(costlookup[0]),
1055 sizeof(costlookup[0]),
1056 cost_table_comparison);
1057 if (entry)
1059 inherent_cost = ((const struct pred_cost_lookup*)entry)->cost;
1061 else
1063 error(0, 0, "warning: no cost entry for predicate %s", p->p_name);
1064 inherent_cost = NeedsUnknown;
1068 if (inherent_cost > data_requirement_cost)
1069 return inherent_cost;
1070 else
1071 return data_requirement_cost;
1074 static void
1075 estimate_costs (struct predicate *tree)
1077 if (tree)
1079 estimate_costs(tree->pred_right);
1080 estimate_costs(tree->pred_left);
1082 tree->p_cost = get_pred_cost(tree);
1086 struct predicate*
1087 get_eval_tree(void)
1089 return eval_tree;
1092 static float
1093 getrate(const struct predicate *p)
1095 if (p)
1096 return p->est_success_rate;
1097 else
1098 return 1.0f;
1102 float
1103 calculate_derived_rates(struct predicate *p)
1105 assert (NULL != p);
1107 if (p->pred_right)
1108 calculate_derived_rates(p->pred_right);
1109 if (p->pred_left)
1110 calculate_derived_rates(p->pred_left);
1112 assert (p->p_type != CLOSE_PAREN);
1113 assert (p->p_type != OPEN_PAREN);
1115 switch (p->p_type)
1117 case NO_TYPE:
1118 assert (NULL == p->pred_right);
1119 assert (NULL == p->pred_left);
1120 return p->est_success_rate;
1122 case PRIMARY_TYPE:
1123 assert (NULL == p->pred_right);
1124 assert (NULL == p->pred_left);
1125 return p->est_success_rate;
1127 case UNI_OP:
1128 /* Unary operators must have exactly one operand */
1129 assert (pred_is(p, pred_negate));
1130 assert (NULL == p->pred_left);
1131 p->est_success_rate = (1.0 - p->pred_right->est_success_rate);
1132 return p->est_success_rate;
1134 case BI_OP:
1136 float rate;
1137 /* Binary operators must have two operands */
1138 if (pred_is(p, pred_and))
1140 rate = getrate(p->pred_right) * getrate(p->pred_left);
1142 else if (pred_is(p, pred_comma))
1144 rate = 1.0f;
1146 else if (pred_is(p, pred_or))
1148 rate = getrate(p->pred_right) + getrate(p->pred_left);
1150 else
1152 /* only and, or and comma are BI_OP. */
1153 assert (0);
1154 abort ();
1156 p->est_success_rate = constrain_rate(rate);
1158 return p->est_success_rate;
1160 case OPEN_PAREN:
1161 case CLOSE_PAREN:
1162 p->est_success_rate = 1.0;
1163 return p->est_success_rate;
1167 /* opt_expr() rearranges predicates such that each left subtree is
1168 * rooted at a logical predicate (e.g. and or or). check_normalization()
1169 * asserts that this property still holds.
1172 static void check_normalization(struct predicate *p, boolean at_root)
1174 if (at_root)
1176 assert (BI_OP == p->p_type);
1179 if (p->pred_left)
1181 assert (BI_OP == p->pred_left->p_type);
1182 check_normalization(p->pred_left, false);
1184 if (p->pred_right)
1186 check_normalization(p->pred_right, false);
1190 struct predicate*
1191 build_expression_tree(int argc, char *argv[], int end_of_leading_options)
1193 const struct parser_table *parse_entry; /* Pointer to the parsing table entry for this expression. */
1194 char *predicate_name; /* Name of predicate being parsed. */
1195 struct predicate *cur_pred;
1196 const struct parser_table *entry_close, *entry_print, *entry_open;
1197 int i, oldi;
1199 predicates = NULL;
1201 /* Find where in ARGV the predicates begin by skipping the list of
1202 * start points.
1204 for (i = end_of_leading_options; i < argc && !looks_like_expression(argv[i], true); i++)
1206 /* Do nothing. */ ;
1209 /* Enclose the expression in `( ... )' so a default -print will
1210 apply to the whole expression. */
1211 entry_open = find_parser("(");
1212 entry_close = find_parser(")");
1213 entry_print = find_parser("print");
1214 assert (entry_open != NULL);
1215 assert (entry_close != NULL);
1216 assert (entry_print != NULL);
1218 parse_openparen (entry_open, argv, &argc);
1219 last_pred->p_name = "(";
1220 predicates->artificial = true;
1221 parse_begin_user_args(argv, argc, last_pred, predicates);
1222 pred_sanity_check(last_pred);
1224 /* Build the input order list. */
1225 while (i < argc )
1227 if (!looks_like_expression(argv[i], false))
1229 error (0, 0, _("paths must precede expression: %s"), argv[i]);
1230 usage(stderr, 1, NULL);
1233 predicate_name = argv[i];
1234 parse_entry = find_parser (predicate_name);
1235 if (parse_entry == NULL)
1237 /* Command line option not recognized */
1238 error (1, 0, _("unknown predicate `%s'"), predicate_name);
1241 /* We have recognised a test of the form -foo. Eat that,
1242 * unless it is a predicate like -newerXY.
1244 if (parse_entry->type != ARG_SPECIAL_PARSE)
1246 i++;
1248 oldi = i;
1249 if (!(*(parse_entry->parser_func)) (parse_entry, argv, &i))
1251 if (argv[i])
1253 if ( (ARG_SPECIAL_PARSE == parse_entry->type) && (i == oldi) )
1255 /* The special parse function spat out the
1256 * predicate. It must be invalid, or not tasty.
1258 error (1, 0, _("invalid predicate `%s'"),
1259 predicate_name);
1261 else
1263 error (1, 0, _("invalid argument `%s' to `%s'"),
1264 argv[i], predicate_name);
1267 else
1269 /* Command line option requires an argument */
1270 error (1, 0, _("missing argument to `%s'"), predicate_name);
1273 else
1275 last_pred->p_name = predicate_name;
1277 /* If the parser consumed an argument, save it. */
1278 if (i != oldi)
1279 last_pred->arg_text = argv[oldi];
1280 else
1281 last_pred->arg_text = NULL;
1283 pred_sanity_check(last_pred);
1284 pred_sanity_check(predicates); /* XXX: expensive */
1286 parse_end_user_args(argv, argc, last_pred, predicates);
1287 if (predicates->pred_next == NULL)
1289 /* No predicates that do something other than set a global variable
1290 were given; remove the unneeded initial `(' and add `-print'. */
1291 cur_pred = predicates;
1292 predicates = last_pred = predicates->pred_next;
1293 free (cur_pred);
1294 parse_print (entry_print, argv, &argc);
1295 last_pred->p_name = "-print";
1296 pred_sanity_check(last_pred);
1297 pred_sanity_check(predicates); /* XXX: expensive */
1299 else if (!default_prints (predicates->pred_next))
1301 /* One or more predicates that produce output were given;
1302 remove the unneeded initial `('. */
1303 cur_pred = predicates;
1304 predicates = predicates->pred_next;
1305 pred_sanity_check(predicates); /* XXX: expensive */
1306 free (cur_pred);
1308 else
1310 /* `( user-supplied-expression ) -print'. */
1311 parse_closeparen (entry_close, argv, &argc);
1312 last_pred->p_name = ")";
1313 last_pred->artificial = true;
1314 pred_sanity_check(last_pred);
1315 parse_print (entry_print, argv, &argc);
1316 last_pred->p_name = "-print";
1317 last_pred->artificial = true;
1318 pred_sanity_check(last_pred);
1319 pred_sanity_check(predicates); /* XXX: expensive */
1322 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1324 fprintf (stderr, "Predicate List:\n");
1325 print_list (stderr, predicates);
1328 /* do a sanity check */
1329 pred_sanity_check(predicates);
1331 /* Done parsing the predicates. Build the evaluation tree. */
1332 cur_pred = predicates;
1333 eval_tree = get_expr (&cur_pred, NO_PREC, NULL);
1334 calculate_derived_rates(eval_tree);
1336 /* Check if we have any left-over predicates (this fixes
1337 * Debian bug #185202).
1339 if (cur_pred != NULL)
1341 /* cur_pred->p_name is often NULL here */
1342 if (pred_is(cur_pred, pred_closeparen))
1344 /* e.g. "find \( -true \) \)" */
1345 error (1, 0, _("you have too many ')'"));
1347 else
1349 if (cur_pred->p_name)
1350 error (1, 0, _("unexpected extra predicate '%s'"), cur_pred->p_name);
1351 else
1352 error (1, 0, _("unexpected extra predicate"));
1356 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1358 fprintf (stderr, "Eval Tree:\n");
1359 print_tree (stderr, eval_tree, 0);
1362 estimate_costs(eval_tree);
1364 /* Rearrange the eval tree in optimal-predicate order. */
1365 opt_expr (&eval_tree);
1367 /* Check that the tree is in normalised order (opt_expr does this) */
1368 check_normalization(eval_tree, true);
1370 do_arm_swaps(eval_tree);
1372 /* Check that the tree is still in normalised order */
1373 check_normalization(eval_tree, true);
1375 if (options.debug_options & (DebugExpressionTree|DebugTreeOpt))
1377 fprintf (stderr, "Optimized Eval Tree:\n");
1378 print_tree (stderr, eval_tree, 0);
1379 fprintf (stderr, "Optimized command line:\n");
1380 print_optlist(stderr, eval_tree);
1381 fprintf(stderr, "\n");
1384 return eval_tree;
1387 /* Initialise the performance data for a predicate.
1389 static void
1390 init_pred_perf(struct predicate *pred)
1392 struct predicate_performance_info *p = &pred->perf;
1393 p->visits = p->successes = 0;
1397 /* Return a pointer to a new predicate structure, which has been
1398 linked in as the last one in the predicates list.
1400 Set `predicates' to point to the start of the predicates list.
1401 Set `last_pred' to point to the new last predicate in the list.
1403 Set all cells in the new structure to the default values. */
1405 struct predicate *
1406 get_new_pred (const struct parser_table *entry)
1408 register struct predicate *new_pred;
1409 (void) entry;
1411 /* Options should not be turned into predicates. */
1412 assert (entry->type != ARG_OPTION);
1413 assert (entry->type != ARG_POSITIONAL_OPTION);
1415 if (predicates == NULL)
1417 predicates = (struct predicate *)
1418 xmalloc (sizeof (struct predicate));
1419 last_pred = predicates;
1421 else
1423 new_pred = xmalloc (sizeof (struct predicate));
1424 last_pred->pred_next = new_pred;
1425 last_pred = new_pred;
1427 last_pred->parser_entry = entry;
1428 last_pred->pred_func = NULL;
1429 last_pred->p_name = NULL;
1430 last_pred->p_type = NO_TYPE;
1431 last_pred->p_prec = NO_PREC;
1432 last_pred->side_effects = false;
1433 last_pred->no_default_print = false;
1434 last_pred->need_stat = true;
1435 last_pred->need_type = true;
1436 last_pred->args.str = NULL;
1437 last_pred->pred_next = NULL;
1438 last_pred->pred_left = NULL;
1439 last_pred->pred_right = NULL;
1440 last_pred->literal_control_chars = options.literal_control_chars;
1441 last_pred->artificial = false;
1442 last_pred->est_success_rate = 1.0;
1443 init_pred_perf(last_pred);
1444 return last_pred;
1447 /* Return a pointer to a new predicate, with operator check.
1448 Like get_new_pred, but it checks to make sure that the previous
1449 predicate is an operator. If it isn't, the AND operator is inserted. */
1451 struct predicate *
1452 get_new_pred_chk_op (const struct parser_table *entry)
1454 struct predicate *new_pred;
1455 static const struct parser_table *entry_and = NULL;
1457 /* Locate the entry in the parser table for the "and" operator */
1458 if (NULL == entry_and)
1459 entry_and = find_parser("and");
1461 /* Check that it's actually there. If not, that is a bug.*/
1462 assert (entry_and != NULL);
1464 if (last_pred)
1465 switch (last_pred->p_type)
1467 case NO_TYPE:
1468 error (1, 0, _("oops -- invalid default insertion of and!"));
1469 break;
1471 case PRIMARY_TYPE:
1472 case CLOSE_PAREN:
1473 /* We need to interpose the and operator. */
1474 new_pred = get_new_pred (entry_and);
1475 new_pred->pred_func = pred_and;
1476 new_pred->p_name = "-a";
1477 new_pred->p_type = BI_OP;
1478 new_pred->p_prec = AND_PREC;
1479 new_pred->need_stat = false;
1480 new_pred->need_type = false;
1481 new_pred->args.str = NULL;
1482 new_pred->side_effects = false;
1483 new_pred->no_default_print = false;
1484 break;
1486 default:
1487 break;
1490 new_pred = get_new_pred (entry);
1491 new_pred->parser_entry = entry;
1492 return new_pred;
1495 struct cost_assoc
1497 enum EvaluationCost cost;
1498 char *name;
1500 struct cost_assoc cost_table[] =
1502 { NeedsNothing, "Nothing" },
1503 { NeedsType, "Type" },
1504 { NeedsStatInfo, "StatInfo" },
1505 { NeedsLinkName, "LinkName" },
1506 { NeedsAccessInfo, "AccessInfo" },
1507 { NeedsSyncDiskHit, "SyncDiskHit" },
1508 { NeedsEventualExec, "EventualExec" },
1509 { NeedsImmediateExec, "ImmediateExec" },
1510 { NeedsUserInteraction, "UserInteraction" },
1511 { NeedsUnknown, "Unknown" }
1514 struct prec_assoc
1516 short prec;
1517 char *prec_name;
1520 static struct prec_assoc prec_table[] =
1522 {NO_PREC, "no"},
1523 {COMMA_PREC, "comma"},
1524 {OR_PREC, "or"},
1525 {AND_PREC, "and"},
1526 {NEGATE_PREC, "negate"},
1527 {MAX_PREC, "max"},
1528 {-1, "unknown "}
1531 struct op_assoc
1533 short type;
1534 char *type_name;
1537 static struct op_assoc type_table[] =
1539 {NO_TYPE, "no"},
1540 {PRIMARY_TYPE, "primary"},
1541 {UNI_OP, "uni_op"},
1542 {BI_OP, "bi_op"},
1543 {OPEN_PAREN, "open_paren "},
1544 {CLOSE_PAREN, "close_paren "},
1545 {-1, "unknown"}
1548 static const char *
1549 cost_name (enum EvaluationCost cost)
1551 unsigned int i;
1552 unsigned int n = sizeof(cost_table)/sizeof(cost_table[0]);
1554 for (i = 0; i<n; ++i)
1555 if (cost_table[i].cost == cost)
1556 return cost_table[i].name;
1557 return "unknown";
1561 static char *
1562 type_name (type)
1563 short type;
1565 int i;
1567 for (i = 0; type_table[i].type != (short) -1; i++)
1568 if (type_table[i].type == type)
1569 break;
1570 return (type_table[i].type_name);
1573 static char *
1574 prec_name (prec)
1575 short prec;
1577 int i;
1579 for (i = 0; prec_table[i].prec != (short) -1; i++)
1580 if (prec_table[i].prec == prec)
1581 break;
1582 return (prec_table[i].prec_name);
1586 /* Walk the expression tree NODE to stdout.
1587 INDENT is the number of levels to indent the left margin. */
1589 void
1590 print_tree (FILE *fp, struct predicate *node, int indent)
1592 int i;
1594 if (node == NULL)
1595 return;
1596 for (i = 0; i < indent; i++)
1597 fprintf (fp, " ");
1598 fprintf (fp, "pred=[");
1599 print_predicate(fp, node);
1600 fprintf (fp, "] type=%s prec=%s",
1601 type_name (node->p_type), prec_name (node->p_prec));
1602 fprintf (fp, " cost=%s rate=%#03.2g %sside effects ",
1603 cost_name(node->p_cost),
1604 node->est_success_rate,
1605 (node->side_effects ? "" : "no "));
1607 if (node->need_stat || node->need_type)
1609 int comma = 0;
1611 fprintf (fp, "Needs ");
1612 if (node->need_stat)
1614 fprintf (fp, "stat");
1615 comma = 1;
1617 if (node->need_type)
1619 fprintf (fp, "%stype", comma ? "," : "");
1622 fprintf (fp, "\n");
1625 for (i = 0; i < indent; i++)
1626 fprintf (fp, " ");
1627 if (NULL == node->pred_left && NULL == node->pred_right)
1629 fprintf (fp, "no children.\n");
1631 else
1633 if (node->pred_left)
1635 fprintf (fp, "left:\n");
1636 print_tree (fp, node->pred_left, indent + 1);
1638 else
1640 fprintf (fp, "no left.\n");
1643 for (i = 0; i < indent; i++)
1644 fprintf (fp, " ");
1645 if (node->pred_right)
1647 fprintf (fp, "right:\n");
1648 print_tree (fp, node->pred_right, indent + 1);
1650 else
1652 fprintf (fp, "no right.\n");