Fix set_append_rel_pathlist() to deal intelligently with cases where
[PostgreSQL.git] / src / backend / optimizer / path / allpaths.c
blobb3759021e347a15db3c0d00794651c8eabea4a04
1 /*-------------------------------------------------------------------------
3 * allpaths.c
4 * Routines to find possible search paths for processing a query
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include <math.h>
20 #include "nodes/nodeFuncs.h"
21 #ifdef OPTIMIZER_DEBUG
22 #include "nodes/print.h"
23 #endif
24 #include "optimizer/clauses.h"
25 #include "optimizer/cost.h"
26 #include "optimizer/geqo.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/paths.h"
29 #include "optimizer/plancat.h"
30 #include "optimizer/planner.h"
31 #include "optimizer/prep.h"
32 #include "optimizer/restrictinfo.h"
33 #include "optimizer/var.h"
34 #include "parser/parse_clause.h"
35 #include "parser/parsetree.h"
36 #include "rewrite/rewriteManip.h"
39 /* These parameters are set by GUC */
40 bool enable_geqo = false; /* just in case GUC doesn't set it */
41 int geqo_threshold;
43 /* Hook for plugins to replace standard_join_search() */
44 join_search_hook_type join_search_hook = NULL;
47 static void set_base_rel_pathlists(PlannerInfo *root);
48 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
49 Index rti, RangeTblEntry *rte);
50 static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
51 RangeTblEntry *rte);
52 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
53 Index rti, RangeTblEntry *rte);
54 static void set_dummy_rel_pathlist(RelOptInfo *rel);
55 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
56 Index rti, RangeTblEntry *rte);
57 static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel,
58 RangeTblEntry *rte);
59 static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel,
60 RangeTblEntry *rte);
61 static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
62 RangeTblEntry *rte);
63 static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
64 RangeTblEntry *rte);
65 static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
66 static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
67 bool *differentTypes);
68 static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
69 bool *differentTypes);
70 static void compare_tlist_datatypes(List *tlist, List *colTypes,
71 bool *differentTypes);
72 static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
73 bool *differentTypes);
74 static void subquery_push_qual(Query *subquery,
75 RangeTblEntry *rte, Index rti, Node *qual);
76 static void recurse_push_qual(Node *setOp, Query *topquery,
77 RangeTblEntry *rte, Index rti, Node *qual);
81 * make_one_rel
82 * Finds all possible access paths for executing a query, returning a
83 * single rel that represents the join of all base rels in the query.
85 RelOptInfo *
86 make_one_rel(PlannerInfo *root, List *joinlist)
88 RelOptInfo *rel;
91 * Generate access paths for the base rels.
93 set_base_rel_pathlists(root);
96 * Generate access paths for the entire join tree.
98 rel = make_rel_from_joinlist(root, joinlist);
101 * The result should join all and only the query's base rels.
103 #ifdef USE_ASSERT_CHECKING
105 int num_base_rels = 0;
106 Index rti;
108 for (rti = 1; rti < root->simple_rel_array_size; rti++)
110 RelOptInfo *brel = root->simple_rel_array[rti];
112 if (brel == NULL)
113 continue;
115 Assert(brel->relid == rti); /* sanity check on array */
117 /* ignore RTEs that are "other rels" */
118 if (brel->reloptkind != RELOPT_BASEREL)
119 continue;
121 Assert(bms_is_member(rti, rel->relids));
122 num_base_rels++;
125 Assert(bms_num_members(rel->relids) == num_base_rels);
127 #endif
129 return rel;
133 * set_base_rel_pathlists
134 * Finds all paths available for scanning each base-relation entry.
135 * Sequential scan and any available indices are considered.
136 * Each useful path is attached to its relation's 'pathlist' field.
138 static void
139 set_base_rel_pathlists(PlannerInfo *root)
141 Index rti;
143 for (rti = 1; rti < root->simple_rel_array_size; rti++)
145 RelOptInfo *rel = root->simple_rel_array[rti];
147 /* there may be empty slots corresponding to non-baserel RTEs */
148 if (rel == NULL)
149 continue;
151 Assert(rel->relid == rti); /* sanity check on array */
153 /* ignore RTEs that are "other rels" */
154 if (rel->reloptkind != RELOPT_BASEREL)
155 continue;
157 set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
162 * set_rel_pathlist
163 * Build access paths for a base relation
165 static void
166 set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
167 Index rti, RangeTblEntry *rte)
169 if (rte->inh)
171 /* It's an "append relation", process accordingly */
172 set_append_rel_pathlist(root, rel, rti, rte);
174 else if (rel->rtekind == RTE_SUBQUERY)
176 /* Subquery --- generate a separate plan for it */
177 set_subquery_pathlist(root, rel, rti, rte);
179 else if (rel->rtekind == RTE_FUNCTION)
181 /* RangeFunction --- generate a suitable path for it */
182 set_function_pathlist(root, rel, rte);
184 else if (rel->rtekind == RTE_VALUES)
186 /* Values list --- generate a suitable path for it */
187 set_values_pathlist(root, rel, rte);
189 else if (rel->rtekind == RTE_CTE)
191 /* CTE reference --- generate a suitable path for it */
192 if (rte->self_reference)
193 set_worktable_pathlist(root, rel, rte);
194 else
195 set_cte_pathlist(root, rel, rte);
197 else
199 /* Plain relation */
200 Assert(rel->rtekind == RTE_RELATION);
201 set_plain_rel_pathlist(root, rel, rte);
204 #ifdef OPTIMIZER_DEBUG
205 debug_print_rel(root, rel);
206 #endif
210 * set_plain_rel_pathlist
211 * Build access paths for a plain relation (no subquery, no inheritance)
213 static void
214 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
217 * If we can prove we don't need to scan the rel via constraint exclusion,
218 * set up a single dummy path for it. We only need to check for regular
219 * baserels; if it's an otherrel, CE was already checked in
220 * set_append_rel_pathlist().
222 if (rel->reloptkind == RELOPT_BASEREL &&
223 relation_excluded_by_constraints(root, rel, rte))
225 set_dummy_rel_pathlist(rel);
226 return;
230 * Test any partial indexes of rel for applicability. We must do this
231 * first since partial unique indexes can affect size estimates.
233 check_partial_indexes(root, rel);
235 /* Mark rel with estimated output rows, width, etc */
236 set_baserel_size_estimates(root, rel);
239 * Check to see if we can extract any restriction conditions from join
240 * quals that are OR-of-AND structures. If so, add them to the rel's
241 * restriction list, and redo the above steps.
243 if (create_or_index_quals(root, rel))
245 check_partial_indexes(root, rel);
246 set_baserel_size_estimates(root, rel);
250 * Generate paths and add them to the rel's pathlist.
252 * Note: add_path() will discard any paths that are dominated by another
253 * available path, keeping only those paths that are superior along at
254 * least one dimension of cost or sortedness.
257 /* Consider sequential scan */
258 add_path(rel, create_seqscan_path(root, rel));
260 /* Consider index scans */
261 create_index_paths(root, rel);
263 /* Consider TID scans */
264 create_tidscan_paths(root, rel);
266 /* Now find the cheapest of the paths for this rel */
267 set_cheapest(rel);
271 * set_append_rel_pathlist
272 * Build access paths for an "append relation"
274 * The passed-in rel and RTE represent the entire append relation. The
275 * relation's contents are computed by appending together the output of
276 * the individual member relations. Note that in the inheritance case,
277 * the first member relation is actually the same table as is mentioned in
278 * the parent RTE ... but it has a different RTE and RelOptInfo. This is
279 * a good thing because their outputs are not the same size.
281 static void
282 set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
283 Index rti, RangeTblEntry *rte)
285 int parentRTindex = rti;
286 List *subpaths = NIL;
287 double parent_rows;
288 double parent_size;
289 double *parent_attrsizes;
290 int nattrs;
291 ListCell *l;
294 * Initialize to compute size estimates for whole append relation.
296 * We handle width estimates by weighting the widths of different child
297 * rels proportionally to their number of rows. This is sensible because
298 * the use of width estimates is mainly to compute the total relation
299 * "footprint" if we have to sort or hash it. To do this, we sum the
300 * total equivalent size (in "double" arithmetic) and then divide by the
301 * total rowcount estimate. This is done separately for the total rel
302 * width and each attribute.
304 * Note: if you consider changing this logic, beware that child rels could
305 * have zero rows and/or width, if they were excluded by constraints.
307 parent_rows = 0;
308 parent_size = 0;
309 nattrs = rel->max_attr - rel->min_attr + 1;
310 parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
313 * Generate access paths for each member relation, and pick the cheapest
314 * path for each one.
316 foreach(l, root->append_rel_list)
318 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
319 int childRTindex;
320 RangeTblEntry *childRTE;
321 RelOptInfo *childrel;
322 List *childquals;
323 Node *childqual;
324 Path *childpath;
325 ListCell *parentvars;
326 ListCell *childvars;
328 /* append_rel_list contains all append rels; ignore others */
329 if (appinfo->parent_relid != parentRTindex)
330 continue;
332 childRTindex = appinfo->child_relid;
333 childRTE = root->simple_rte_array[childRTindex];
336 * The child rel's RelOptInfo was already created during
337 * add_base_rels_to_query.
339 childrel = find_base_rel(root, childRTindex);
340 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
343 * We have to copy the parent's targetlist and quals to the child,
344 * with appropriate substitution of variables. However, only the
345 * baserestrictinfo quals are needed before we can check for
346 * constraint exclusion; so do that first and then check to see if we
347 * can disregard this child.
349 * As of 8.4, the child rel's targetlist might contain non-Var
350 * expressions, which means that substitution into the quals
351 * could produce opportunities for const-simplification, and perhaps
352 * even pseudoconstant quals. To deal with this, we strip the
353 * RestrictInfo nodes, do the substitution, do const-simplification,
354 * and then reconstitute the RestrictInfo layer.
356 childquals = get_all_actual_clauses(rel->baserestrictinfo);
357 childquals = (List *) adjust_appendrel_attrs((Node *) childquals,
358 appinfo);
359 childqual = eval_const_expressions(root, (Node *)
360 make_ands_explicit(childquals));
361 if (childqual && IsA(childqual, Const) &&
362 (((Const *) childqual)->constisnull ||
363 !DatumGetBool(((Const *) childqual)->constvalue)))
366 * Restriction reduces to constant FALSE or constant NULL after
367 * substitution, so this child need not be scanned.
369 set_dummy_rel_pathlist(childrel);
370 continue;
372 childquals = make_ands_implicit((Expr *) childqual);
373 childquals = make_restrictinfos_from_actual_clauses(root,
374 childquals);
375 childrel->baserestrictinfo = childquals;
377 if (relation_excluded_by_constraints(root, childrel, childRTE))
380 * This child need not be scanned, so we can omit it from the
381 * appendrel. Mark it with a dummy cheapest-path though, in case
382 * best_appendrel_indexscan() looks at it later.
384 set_dummy_rel_pathlist(childrel);
385 continue;
388 /* CE failed, so finish copying targetlist and join quals */
389 childrel->joininfo = (List *)
390 adjust_appendrel_attrs((Node *) rel->joininfo,
391 appinfo);
392 childrel->reltargetlist = (List *)
393 adjust_appendrel_attrs((Node *) rel->reltargetlist,
394 appinfo);
397 * We have to make child entries in the EquivalenceClass data
398 * structures as well.
400 if (rel->has_eclass_joins)
402 add_child_rel_equivalences(root, appinfo, rel, childrel);
403 childrel->has_eclass_joins = true;
407 * Note: we could compute appropriate attr_needed data for the child's
408 * variables, by transforming the parent's attr_needed through the
409 * translated_vars mapping. However, currently there's no need
410 * because attr_needed is only examined for base relations not
411 * otherrels. So we just leave the child's attr_needed empty.
415 * Compute the child's access paths, and add the cheapest one to the
416 * Append path we are constructing for the parent.
418 * It's possible that the child is itself an appendrel, in which case
419 * we can "cut out the middleman" and just add its child paths to our
420 * own list. (We don't try to do this earlier because we need to
421 * apply both levels of transformation to the quals.)
423 set_rel_pathlist(root, childrel, childRTindex, childRTE);
425 childpath = childrel->cheapest_total_path;
426 if (IsA(childpath, AppendPath))
427 subpaths = list_concat(subpaths,
428 ((AppendPath *) childpath)->subpaths);
429 else
430 subpaths = lappend(subpaths, childpath);
433 * Accumulate size information from each child.
435 if (childrel->rows > 0)
437 parent_rows += childrel->rows;
438 parent_size += childrel->width * childrel->rows;
440 forboth(parentvars, rel->reltargetlist,
441 childvars, childrel->reltargetlist)
443 Var *parentvar = (Var *) lfirst(parentvars);
444 Var *childvar = (Var *) lfirst(childvars);
447 * Accumulate per-column estimates too. Whole-row Vars and
448 * PlaceHolderVars can be ignored here.
450 if (IsA(parentvar, Var) &&
451 IsA(childvar, Var))
453 int pndx = parentvar->varattno - rel->min_attr;
454 int cndx = childvar->varattno - childrel->min_attr;
456 parent_attrsizes[pndx] += childrel->attr_widths[cndx] * childrel->rows;
463 * Save the finished size estimates.
465 rel->rows = parent_rows;
466 if (parent_rows > 0)
468 int i;
470 rel->width = rint(parent_size / parent_rows);
471 for (i = 0; i < nattrs; i++)
472 rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
474 else
475 rel->width = 0; /* attr_widths should be zero already */
478 * Set "raw tuples" count equal to "rows" for the appendrel; needed
479 * because some places assume rel->tuples is valid for any baserel.
481 rel->tuples = parent_rows;
483 pfree(parent_attrsizes);
486 * Finally, build Append path and install it as the only access path for
487 * the parent rel. (Note: this is correct even if we have zero or one
488 * live subpath due to constraint exclusion.)
490 add_path(rel, (Path *) create_append_path(rel, subpaths));
492 /* Select cheapest path (pretty easy in this case...) */
493 set_cheapest(rel);
497 * set_dummy_rel_pathlist
498 * Build a dummy path for a relation that's been excluded by constraints
500 * Rather than inventing a special "dummy" path type, we represent this as an
501 * AppendPath with no members (see also IS_DUMMY_PATH macro).
503 static void
504 set_dummy_rel_pathlist(RelOptInfo *rel)
506 /* Set dummy size estimates --- we leave attr_widths[] as zeroes */
507 rel->rows = 0;
508 rel->width = 0;
510 add_path(rel, (Path *) create_append_path(rel, NIL));
512 /* Select cheapest path (pretty easy in this case...) */
513 set_cheapest(rel);
516 /* quick-and-dirty test to see if any joining is needed */
517 static bool
518 has_multiple_baserels(PlannerInfo *root)
520 int num_base_rels = 0;
521 Index rti;
523 for (rti = 1; rti < root->simple_rel_array_size; rti++)
525 RelOptInfo *brel = root->simple_rel_array[rti];
527 if (brel == NULL)
528 continue;
530 /* ignore RTEs that are "other rels" */
531 if (brel->reloptkind == RELOPT_BASEREL)
532 if (++num_base_rels > 1)
533 return true;
535 return false;
539 * set_subquery_pathlist
540 * Build the (single) access path for a subquery RTE
542 static void
543 set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
544 Index rti, RangeTblEntry *rte)
546 Query *parse = root->parse;
547 Query *subquery = rte->subquery;
548 bool *differentTypes;
549 double tuple_fraction;
550 PlannerInfo *subroot;
551 List *pathkeys;
554 * Must copy the Query so that planning doesn't mess up the RTE contents
555 * (really really need to fix the planner to not scribble on its input,
556 * someday).
558 subquery = copyObject(subquery);
560 /* We need a workspace for keeping track of set-op type coercions */
561 differentTypes = (bool *)
562 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
565 * If there are any restriction clauses that have been attached to the
566 * subquery relation, consider pushing them down to become WHERE or HAVING
567 * quals of the subquery itself. This transformation is useful because it
568 * may allow us to generate a better plan for the subquery than evaluating
569 * all the subquery output rows and then filtering them.
571 * There are several cases where we cannot push down clauses. Restrictions
572 * involving the subquery are checked by subquery_is_pushdown_safe().
573 * Restrictions on individual clauses are checked by
574 * qual_is_pushdown_safe(). Also, we don't want to push down
575 * pseudoconstant clauses; better to have the gating node above the
576 * subquery.
578 * Non-pushed-down clauses will get evaluated as qpquals of the
579 * SubqueryScan node.
581 * XXX Are there any cases where we want to make a policy decision not to
582 * push down a pushable qual, because it'd result in a worse plan?
584 if (rel->baserestrictinfo != NIL &&
585 subquery_is_pushdown_safe(subquery, subquery, differentTypes))
587 /* OK to consider pushing down individual quals */
588 List *upperrestrictlist = NIL;
589 ListCell *l;
591 foreach(l, rel->baserestrictinfo)
593 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
594 Node *clause = (Node *) rinfo->clause;
596 if (!rinfo->pseudoconstant &&
597 qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
599 /* Push it down */
600 subquery_push_qual(subquery, rte, rti, clause);
602 else
604 /* Keep it in the upper query */
605 upperrestrictlist = lappend(upperrestrictlist, rinfo);
608 rel->baserestrictinfo = upperrestrictlist;
611 pfree(differentTypes);
614 * We can safely pass the outer tuple_fraction down to the subquery if the
615 * outer level has no joining, aggregation, or sorting to do. Otherwise
616 * we'd better tell the subquery to plan for full retrieval. (XXX This
617 * could probably be made more intelligent ...)
619 if (parse->hasAggs ||
620 parse->groupClause ||
621 parse->havingQual ||
622 parse->distinctClause ||
623 parse->sortClause ||
624 has_multiple_baserels(root))
625 tuple_fraction = 0.0; /* default case */
626 else
627 tuple_fraction = root->tuple_fraction;
629 /* Generate the plan for the subquery */
630 rel->subplan = subquery_planner(root->glob, subquery,
631 root,
632 false, tuple_fraction,
633 &subroot);
634 rel->subrtable = subroot->parse->rtable;
636 /* Copy number of output rows from subplan */
637 rel->tuples = rel->subplan->plan_rows;
639 /* Mark rel with estimated output rows, width, etc */
640 set_baserel_size_estimates(root, rel);
642 /* Convert subquery pathkeys to outer representation */
643 pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
645 /* Generate appropriate path */
646 add_path(rel, create_subqueryscan_path(rel, pathkeys));
648 /* Select cheapest path (pretty easy in this case...) */
649 set_cheapest(rel);
653 * set_function_pathlist
654 * Build the (single) access path for a function RTE
656 static void
657 set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
659 /* Mark rel with estimated output rows, width, etc */
660 set_function_size_estimates(root, rel);
662 /* Generate appropriate path */
663 add_path(rel, create_functionscan_path(root, rel));
665 /* Select cheapest path (pretty easy in this case...) */
666 set_cheapest(rel);
670 * set_values_pathlist
671 * Build the (single) access path for a VALUES RTE
673 static void
674 set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
676 /* Mark rel with estimated output rows, width, etc */
677 set_values_size_estimates(root, rel);
679 /* Generate appropriate path */
680 add_path(rel, create_valuesscan_path(root, rel));
682 /* Select cheapest path (pretty easy in this case...) */
683 set_cheapest(rel);
687 * set_cte_pathlist
688 * Build the (single) access path for a non-self-reference CTE RTE
690 static void
691 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
693 Plan *cteplan;
694 PlannerInfo *cteroot;
695 Index levelsup;
696 int ndx;
697 ListCell *lc;
698 int plan_id;
701 * Find the referenced CTE, and locate the plan previously made for it.
703 levelsup = rte->ctelevelsup;
704 cteroot = root;
705 while (levelsup-- > 0)
707 cteroot = cteroot->parent_root;
708 if (!cteroot) /* shouldn't happen */
709 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
713 * Note: cte_plan_ids can be shorter than cteList, if we are still working
714 * on planning the CTEs (ie, this is a side-reference from another CTE).
715 * So we mustn't use forboth here.
717 ndx = 0;
718 foreach(lc, cteroot->parse->cteList)
720 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
722 if (strcmp(cte->ctename, rte->ctename) == 0)
723 break;
724 ndx++;
726 if (lc == NULL) /* shouldn't happen */
727 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
728 if (ndx >= list_length(cteroot->cte_plan_ids))
729 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
730 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
731 Assert(plan_id > 0);
732 cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1);
734 /* Mark rel with estimated output rows, width, etc */
735 set_cte_size_estimates(root, rel, cteplan);
737 /* Generate appropriate path */
738 add_path(rel, create_ctescan_path(root, rel));
740 /* Select cheapest path (pretty easy in this case...) */
741 set_cheapest(rel);
745 * set_worktable_pathlist
746 * Build the (single) access path for a self-reference CTE RTE
748 static void
749 set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
751 Plan *cteplan;
752 PlannerInfo *cteroot;
753 Index levelsup;
756 * We need to find the non-recursive term's plan, which is in the plan
757 * level that's processing the recursive UNION, which is one level *below*
758 * where the CTE comes from.
760 levelsup = rte->ctelevelsup;
761 if (levelsup == 0) /* shouldn't happen */
762 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
763 levelsup--;
764 cteroot = root;
765 while (levelsup-- > 0)
767 cteroot = cteroot->parent_root;
768 if (!cteroot) /* shouldn't happen */
769 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
771 cteplan = cteroot->non_recursive_plan;
772 if (!cteplan) /* shouldn't happen */
773 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
775 /* Mark rel with estimated output rows, width, etc */
776 set_cte_size_estimates(root, rel, cteplan);
778 /* Generate appropriate path */
779 add_path(rel, create_worktablescan_path(root, rel));
781 /* Select cheapest path (pretty easy in this case...) */
782 set_cheapest(rel);
786 * make_rel_from_joinlist
787 * Build access paths using a "joinlist" to guide the join path search.
789 * See comments for deconstruct_jointree() for definition of the joinlist
790 * data structure.
792 static RelOptInfo *
793 make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
795 int levels_needed;
796 List *initial_rels;
797 ListCell *jl;
800 * Count the number of child joinlist nodes. This is the depth of the
801 * dynamic-programming algorithm we must employ to consider all ways of
802 * joining the child nodes.
804 levels_needed = list_length(joinlist);
806 if (levels_needed <= 0)
807 return NULL; /* nothing to do? */
810 * Construct a list of rels corresponding to the child joinlist nodes.
811 * This may contain both base rels and rels constructed according to
812 * sub-joinlists.
814 initial_rels = NIL;
815 foreach(jl, joinlist)
817 Node *jlnode = (Node *) lfirst(jl);
818 RelOptInfo *thisrel;
820 if (IsA(jlnode, RangeTblRef))
822 int varno = ((RangeTblRef *) jlnode)->rtindex;
824 thisrel = find_base_rel(root, varno);
826 else if (IsA(jlnode, List))
828 /* Recurse to handle subproblem */
829 thisrel = make_rel_from_joinlist(root, (List *) jlnode);
831 else
833 elog(ERROR, "unrecognized joinlist node type: %d",
834 (int) nodeTag(jlnode));
835 thisrel = NULL; /* keep compiler quiet */
838 initial_rels = lappend(initial_rels, thisrel);
841 if (levels_needed == 1)
844 * Single joinlist node, so we're done.
846 return (RelOptInfo *) linitial(initial_rels);
848 else
851 * Consider the different orders in which we could join the rels,
852 * using a plugin, GEQO, or the regular join search code.
854 * We put the initial_rels list into a PlannerInfo field because
855 * has_legal_joinclause() needs to look at it (ugly :-().
857 root->initial_rels = initial_rels;
859 if (join_search_hook)
860 return (*join_search_hook) (root, levels_needed, initial_rels);
861 else if (enable_geqo && levels_needed >= geqo_threshold)
862 return geqo(root, levels_needed, initial_rels);
863 else
864 return standard_join_search(root, levels_needed, initial_rels);
869 * standard_join_search
870 * Find possible joinpaths for a query by successively finding ways
871 * to join component relations into join relations.
873 * 'levels_needed' is the number of iterations needed, ie, the number of
874 * independent jointree items in the query. This is > 1.
876 * 'initial_rels' is a list of RelOptInfo nodes for each independent
877 * jointree item. These are the components to be joined together.
878 * Note that levels_needed == list_length(initial_rels).
880 * Returns the final level of join relations, i.e., the relation that is
881 * the result of joining all the original relations together.
882 * At least one implementation path must be provided for this relation and
883 * all required sub-relations.
885 * To support loadable plugins that modify planner behavior by changing the
886 * join searching algorithm, we provide a hook variable that lets a plugin
887 * replace or supplement this function. Any such hook must return the same
888 * final join relation as the standard code would, but it might have a
889 * different set of implementation paths attached, and only the sub-joinrels
890 * needed for these paths need have been instantiated.
892 * Note to plugin authors: the functions invoked during standard_join_search()
893 * modify root->join_rel_list and root->join_rel_hash. If you want to do more
894 * than one join-order search, you'll probably need to save and restore the
895 * original states of those data structures. See geqo_eval() for an example.
897 RelOptInfo *
898 standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
900 List **joinitems;
901 int lev;
902 RelOptInfo *rel;
905 * We employ a simple "dynamic programming" algorithm: we first find all
906 * ways to build joins of two jointree items, then all ways to build joins
907 * of three items (from two-item joins and single items), then four-item
908 * joins, and so on until we have considered all ways to join all the
909 * items into one rel.
911 * joinitems[j] is a list of all the j-item rels. Initially we set
912 * joinitems[1] to represent all the single-jointree-item relations.
914 joinitems = (List **) palloc0((levels_needed + 1) * sizeof(List *));
916 joinitems[1] = initial_rels;
918 for (lev = 2; lev <= levels_needed; lev++)
920 ListCell *x;
923 * Determine all possible pairs of relations to be joined at this
924 * level, and build paths for making each one from every available
925 * pair of lower-level relations.
927 joinitems[lev] = join_search_one_level(root, lev, joinitems);
930 * Do cleanup work on each just-processed rel.
932 foreach(x, joinitems[lev])
934 rel = (RelOptInfo *) lfirst(x);
936 /* Find and save the cheapest paths for this rel */
937 set_cheapest(rel);
939 #ifdef OPTIMIZER_DEBUG
940 debug_print_rel(root, rel);
941 #endif
946 * We should have a single rel at the final level.
948 if (joinitems[levels_needed] == NIL)
949 elog(ERROR, "failed to build any %d-way joins", levels_needed);
950 Assert(list_length(joinitems[levels_needed]) == 1);
952 rel = (RelOptInfo *) linitial(joinitems[levels_needed]);
954 return rel;
957 /*****************************************************************************
958 * PUSHING QUALS DOWN INTO SUBQUERIES
959 *****************************************************************************/
962 * subquery_is_pushdown_safe - is a subquery safe for pushing down quals?
964 * subquery is the particular component query being checked. topquery
965 * is the top component of a set-operations tree (the same Query if no
966 * set-op is involved).
968 * Conditions checked here:
970 * 1. If the subquery has a LIMIT clause, we must not push down any quals,
971 * since that could change the set of rows returned.
973 * 2. If the subquery contains any window functions, we can't push quals
974 * into it, because that would change the results.
976 * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
977 * quals into it, because that would change the results.
979 * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
980 * push quals into each component query, but the quals can only reference
981 * subquery columns that suffer no type coercions in the set operation.
982 * Otherwise there are possible semantic gotchas. So, we check the
983 * component queries to see if any of them have different output types;
984 * differentTypes[k] is set true if column k has different type in any
985 * component.
987 static bool
988 subquery_is_pushdown_safe(Query *subquery, Query *topquery,
989 bool *differentTypes)
991 SetOperationStmt *topop;
993 /* Check point 1 */
994 if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
995 return false;
997 /* Check point 2 */
998 if (subquery->hasWindowFuncs)
999 return false;
1001 /* Are we at top level, or looking at a setop component? */
1002 if (subquery == topquery)
1004 /* Top level, so check any component queries */
1005 if (subquery->setOperations != NULL)
1006 if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1007 differentTypes))
1008 return false;
1010 else
1012 /* Setop component must not have more components (too weird) */
1013 if (subquery->setOperations != NULL)
1014 return false;
1015 /* Check whether setop component output types match top level */
1016 topop = (SetOperationStmt *) topquery->setOperations;
1017 Assert(topop && IsA(topop, SetOperationStmt));
1018 compare_tlist_datatypes(subquery->targetList,
1019 topop->colTypes,
1020 differentTypes);
1022 return true;
1026 * Helper routine to recurse through setOperations tree
1028 static bool
1029 recurse_pushdown_safe(Node *setOp, Query *topquery,
1030 bool *differentTypes)
1032 if (IsA(setOp, RangeTblRef))
1034 RangeTblRef *rtr = (RangeTblRef *) setOp;
1035 RangeTblEntry *rte = rt_fetch(rtr->rtindex, topquery->rtable);
1036 Query *subquery = rte->subquery;
1038 Assert(subquery != NULL);
1039 return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
1041 else if (IsA(setOp, SetOperationStmt))
1043 SetOperationStmt *op = (SetOperationStmt *) setOp;
1045 /* EXCEPT is no good */
1046 if (op->op == SETOP_EXCEPT)
1047 return false;
1048 /* Else recurse */
1049 if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
1050 return false;
1051 if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
1052 return false;
1054 else
1056 elog(ERROR, "unrecognized node type: %d",
1057 (int) nodeTag(setOp));
1059 return true;
1063 * Compare tlist's datatypes against the list of set-operation result types.
1064 * For any items that are different, mark the appropriate element of
1065 * differentTypes[] to show that this column will have type conversions.
1067 * We don't have to care about typmods here: the only allowed difference
1068 * between set-op input and output typmods is input is a specific typmod
1069 * and output is -1, and that does not require a coercion.
1071 static void
1072 compare_tlist_datatypes(List *tlist, List *colTypes,
1073 bool *differentTypes)
1075 ListCell *l;
1076 ListCell *colType = list_head(colTypes);
1078 foreach(l, tlist)
1080 TargetEntry *tle = (TargetEntry *) lfirst(l);
1082 if (tle->resjunk)
1083 continue; /* ignore resjunk columns */
1084 if (colType == NULL)
1085 elog(ERROR, "wrong number of tlist entries");
1086 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1087 differentTypes[tle->resno] = true;
1088 colType = lnext(colType);
1090 if (colType != NULL)
1091 elog(ERROR, "wrong number of tlist entries");
1095 * qual_is_pushdown_safe - is a particular qual safe to push down?
1097 * qual is a restriction clause applying to the given subquery (whose RTE
1098 * has index rti in the parent query).
1100 * Conditions checked here:
1102 * 1. The qual must not contain any subselects (mainly because I'm not sure
1103 * it will work correctly: sublinks will already have been transformed into
1104 * subplans in the qual, but not in the subquery).
1106 * 2. The qual must not refer to the whole-row output of the subquery
1107 * (since there is no easy way to name that within the subquery itself).
1109 * 3. The qual must not refer to any subquery output columns that were
1110 * found to have inconsistent types across a set operation tree by
1111 * subquery_is_pushdown_safe().
1113 * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
1114 * refer to non-DISTINCT output columns, because that could change the set
1115 * of rows returned. (This condition is vacuous for DISTINCT, because then
1116 * there are no non-DISTINCT output columns, so we needn't check. But note
1117 * we are assuming that the qual can't distinguish values that the DISTINCT
1118 * operator sees as equal. This is a bit shaky but we have no way to test
1119 * for the case, and it's unlikely enough that we shouldn't refuse the
1120 * optimization just because it could theoretically happen.)
1122 * 5. We must not push down any quals that refer to subselect outputs that
1123 * return sets, else we'd introduce functions-returning-sets into the
1124 * subquery's WHERE/HAVING quals.
1126 * 6. We must not push down any quals that refer to subselect outputs that
1127 * contain volatile functions, for fear of introducing strange results due
1128 * to multiple evaluation of a volatile function.
1130 static bool
1131 qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1132 bool *differentTypes)
1134 bool safe = true;
1135 List *vars;
1136 ListCell *vl;
1137 Bitmapset *tested = NULL;
1139 /* Refuse subselects (point 1) */
1140 if (contain_subplans(qual))
1141 return false;
1144 * It would be unsafe to push down window function calls, but at least for
1145 * the moment we could never see any in a qual anyhow.
1147 Assert(!contain_window_function(qual));
1150 * Examine all Vars used in clause; since it's a restriction clause, all
1151 * such Vars must refer to subselect output columns.
1153 vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
1154 foreach(vl, vars)
1156 Var *var = (Var *) lfirst(vl);
1157 TargetEntry *tle;
1160 * XXX Punt if we find any PlaceHolderVars in the restriction clause.
1161 * It's not clear whether a PHV could safely be pushed down, and even
1162 * less clear whether such a situation could arise in any cases of
1163 * practical interest anyway. So for the moment, just refuse to push
1164 * down.
1166 if (!IsA(var, Var))
1168 safe = false;
1169 break;
1172 Assert(var->varno == rti);
1174 /* Check point 2 */
1175 if (var->varattno == 0)
1177 safe = false;
1178 break;
1182 * We use a bitmapset to avoid testing the same attno more than once.
1183 * (NB: this only works because subquery outputs can't have negative
1184 * attnos.)
1186 if (bms_is_member(var->varattno, tested))
1187 continue;
1188 tested = bms_add_member(tested, var->varattno);
1190 /* Check point 3 */
1191 if (differentTypes[var->varattno])
1193 safe = false;
1194 break;
1197 /* Must find the tlist element referenced by the Var */
1198 tle = get_tle_by_resno(subquery->targetList, var->varattno);
1199 Assert(tle != NULL);
1200 Assert(!tle->resjunk);
1202 /* If subquery uses DISTINCT ON, check point 4 */
1203 if (subquery->hasDistinctOn &&
1204 !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1206 /* non-DISTINCT column, so fail */
1207 safe = false;
1208 break;
1211 /* Refuse functions returning sets (point 5) */
1212 if (expression_returns_set((Node *) tle->expr))
1214 safe = false;
1215 break;
1218 /* Refuse volatile functions (point 6) */
1219 if (contain_volatile_functions((Node *) tle->expr))
1221 safe = false;
1222 break;
1226 list_free(vars);
1227 bms_free(tested);
1229 return safe;
1233 * subquery_push_qual - push down a qual that we have determined is safe
1235 static void
1236 subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual)
1238 if (subquery->setOperations != NULL)
1240 /* Recurse to push it separately to each component query */
1241 recurse_push_qual(subquery->setOperations, subquery,
1242 rte, rti, qual);
1244 else
1247 * We need to replace Vars in the qual (which must refer to outputs of
1248 * the subquery) with copies of the subquery's targetlist expressions.
1249 * Note that at this point, any uplevel Vars in the qual should have
1250 * been replaced with Params, so they need no work.
1252 * This step also ensures that when we are pushing into a setop tree,
1253 * each component query gets its own copy of the qual.
1255 qual = ResolveNew(qual, rti, 0, rte,
1256 subquery->targetList,
1257 CMD_SELECT, 0);
1260 * Now attach the qual to the proper place: normally WHERE, but if the
1261 * subquery uses grouping or aggregation, put it in HAVING (since the
1262 * qual really refers to the group-result rows).
1264 if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
1265 subquery->havingQual = make_and_qual(subquery->havingQual, qual);
1266 else
1267 subquery->jointree->quals =
1268 make_and_qual(subquery->jointree->quals, qual);
1271 * We need not change the subquery's hasAggs or hasSublinks flags,
1272 * since we can't be pushing down any aggregates that weren't there
1273 * before, and we don't push down subselects at all.
1279 * Helper routine to recurse through setOperations tree
1281 static void
1282 recurse_push_qual(Node *setOp, Query *topquery,
1283 RangeTblEntry *rte, Index rti, Node *qual)
1285 if (IsA(setOp, RangeTblRef))
1287 RangeTblRef *rtr = (RangeTblRef *) setOp;
1288 RangeTblEntry *subrte = rt_fetch(rtr->rtindex, topquery->rtable);
1289 Query *subquery = subrte->subquery;
1291 Assert(subquery != NULL);
1292 subquery_push_qual(subquery, rte, rti, qual);
1294 else if (IsA(setOp, SetOperationStmt))
1296 SetOperationStmt *op = (SetOperationStmt *) setOp;
1298 recurse_push_qual(op->larg, topquery, rte, rti, qual);
1299 recurse_push_qual(op->rarg, topquery, rte, rti, qual);
1301 else
1303 elog(ERROR, "unrecognized node type: %d",
1304 (int) nodeTag(setOp));
1308 /*****************************************************************************
1309 * DEBUG SUPPORT
1310 *****************************************************************************/
1312 #ifdef OPTIMIZER_DEBUG
1314 static void
1315 print_relids(Relids relids)
1317 Relids tmprelids;
1318 int x;
1319 bool first = true;
1321 tmprelids = bms_copy(relids);
1322 while ((x = bms_first_member(tmprelids)) >= 0)
1324 if (!first)
1325 printf(" ");
1326 printf("%d", x);
1327 first = false;
1329 bms_free(tmprelids);
1332 static void
1333 print_restrictclauses(PlannerInfo *root, List *clauses)
1335 ListCell *l;
1337 foreach(l, clauses)
1339 RestrictInfo *c = lfirst(l);
1341 print_expr((Node *) c->clause, root->parse->rtable);
1342 if (lnext(l))
1343 printf(", ");
1347 static void
1348 print_path(PlannerInfo *root, Path *path, int indent)
1350 const char *ptype;
1351 bool join = false;
1352 Path *subpath = NULL;
1353 int i;
1355 switch (nodeTag(path))
1357 case T_Path:
1358 ptype = "SeqScan";
1359 break;
1360 case T_IndexPath:
1361 ptype = "IdxScan";
1362 break;
1363 case T_BitmapHeapPath:
1364 ptype = "BitmapHeapScan";
1365 break;
1366 case T_BitmapAndPath:
1367 ptype = "BitmapAndPath";
1368 break;
1369 case T_BitmapOrPath:
1370 ptype = "BitmapOrPath";
1371 break;
1372 case T_TidPath:
1373 ptype = "TidScan";
1374 break;
1375 case T_AppendPath:
1376 ptype = "Append";
1377 break;
1378 case T_ResultPath:
1379 ptype = "Result";
1380 break;
1381 case T_MaterialPath:
1382 ptype = "Material";
1383 subpath = ((MaterialPath *) path)->subpath;
1384 break;
1385 case T_UniquePath:
1386 ptype = "Unique";
1387 subpath = ((UniquePath *) path)->subpath;
1388 break;
1389 case T_NestPath:
1390 ptype = "NestLoop";
1391 join = true;
1392 break;
1393 case T_MergePath:
1394 ptype = "MergeJoin";
1395 join = true;
1396 break;
1397 case T_HashPath:
1398 ptype = "HashJoin";
1399 join = true;
1400 break;
1401 default:
1402 ptype = "???Path";
1403 break;
1406 for (i = 0; i < indent; i++)
1407 printf("\t");
1408 printf("%s", ptype);
1410 if (path->parent)
1412 printf("(");
1413 print_relids(path->parent->relids);
1414 printf(") rows=%.0f", path->parent->rows);
1416 printf(" cost=%.2f..%.2f\n", path->startup_cost, path->total_cost);
1418 if (path->pathkeys)
1420 for (i = 0; i < indent; i++)
1421 printf("\t");
1422 printf(" pathkeys: ");
1423 print_pathkeys(path->pathkeys, root->parse->rtable);
1426 if (join)
1428 JoinPath *jp = (JoinPath *) path;
1430 for (i = 0; i < indent; i++)
1431 printf("\t");
1432 printf(" clauses: ");
1433 print_restrictclauses(root, jp->joinrestrictinfo);
1434 printf("\n");
1436 if (IsA(path, MergePath))
1438 MergePath *mp = (MergePath *) path;
1440 if (mp->outersortkeys || mp->innersortkeys)
1442 for (i = 0; i < indent; i++)
1443 printf("\t");
1444 printf(" sortouter=%d sortinner=%d\n",
1445 ((mp->outersortkeys) ? 1 : 0),
1446 ((mp->innersortkeys) ? 1 : 0));
1450 print_path(root, jp->outerjoinpath, indent + 1);
1451 print_path(root, jp->innerjoinpath, indent + 1);
1454 if (subpath)
1455 print_path(root, subpath, indent + 1);
1458 void
1459 debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
1461 ListCell *l;
1463 printf("RELOPTINFO (");
1464 print_relids(rel->relids);
1465 printf("): rows=%.0f width=%d\n", rel->rows, rel->width);
1467 if (rel->baserestrictinfo)
1469 printf("\tbaserestrictinfo: ");
1470 print_restrictclauses(root, rel->baserestrictinfo);
1471 printf("\n");
1474 if (rel->joininfo)
1476 printf("\tjoininfo: ");
1477 print_restrictclauses(root, rel->joininfo);
1478 printf("\n");
1481 printf("\tpath list:\n");
1482 foreach(l, rel->pathlist)
1483 print_path(root, lfirst(l), 1);
1484 printf("\n\tcheapest startup path:\n");
1485 print_path(root, rel->cheapest_startup_path, 1);
1486 printf("\n\tcheapest total path:\n");
1487 print_path(root, rel->cheapest_total_path, 1);
1488 printf("\n");
1489 fflush(stdout);
1492 #endif /* OPTIMIZER_DEBUG */