Update copyright for 2022
[pgsql.git] / src / backend / optimizer / prep / prepqual.c
blobda01234d3e8baf199a6ab856f016fbaef3b60a6a
1 /*-------------------------------------------------------------------------
3 * prepqual.c
4 * Routines for preprocessing qualification expressions
7 * While the parser will produce flattened (N-argument) AND/OR trees from
8 * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9 * directly underneath another AND, or OR underneath OR, if the input was
10 * oddly parenthesized. Also, rule expansion and subquery flattening could
11 * produce such parsetrees. The planner wants to flatten all such cases
12 * to ensure consistent optimization behavior.
14 * Formerly, this module was responsible for doing the initial flattening,
15 * but now we leave it to eval_const_expressions to do that since it has to
16 * make a complete pass over the expression tree anyway. Instead, we just
17 * have to ensure that our manipulations preserve AND/OR flatness.
18 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19 * tree after local transformations that might introduce nested AND/ORs.
22 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
23 * Portions Copyright (c) 1994, Regents of the University of California
26 * IDENTIFICATION
27 * src/backend/optimizer/prep/prepqual.c
29 *-------------------------------------------------------------------------
32 #include "postgres.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #include "optimizer/optimizer.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
44 static Expr *process_duplicate_ors(List *orlist);
48 * negate_clause
49 * Negate a Boolean expression.
51 * Input is a clause to be negated (e.g., the argument of a NOT clause).
52 * Returns a new clause equivalent to the negation of the given clause.
54 * Although this can be invoked on its own, it's mainly intended as a helper
55 * for eval_const_expressions(), and that context drives several design
56 * decisions. In particular, if the input is already AND/OR flat, we must
57 * preserve that property. We also don't bother to recurse in situations
58 * where we can assume that lower-level executions of eval_const_expressions
59 * would already have simplified sub-clauses of the input.
61 * The difference between this and a simple make_notclause() is that this
62 * tries to get rid of the NOT node by logical simplification. It's clearly
63 * always a win if the NOT node can be eliminated altogether. However, our
64 * use of DeMorgan's laws could result in having more NOT nodes rather than
65 * fewer. We do that unconditionally anyway, because in WHERE clauses it's
66 * important to expose as much top-level AND/OR structure as possible.
67 * Also, eliminating an intermediate NOT may allow us to flatten two levels
68 * of AND or OR together that we couldn't have otherwise. Finally, one of
69 * the motivations for doing this is to ensure that logically equivalent
70 * expressions will be seen as physically equal(), so we should always apply
71 * the same transformations.
73 Node *
74 negate_clause(Node *node)
76 if (node == NULL) /* should not happen */
77 elog(ERROR, "can't negate an empty subexpression");
78 switch (nodeTag(node))
80 case T_Const:
82 Const *c = (Const *) node;
84 /* NOT NULL is still NULL */
85 if (c->constisnull)
86 return makeBoolConst(false, true);
87 /* otherwise pretty easy */
88 return makeBoolConst(!DatumGetBool(c->constvalue), false);
90 break;
91 case T_OpExpr:
94 * Negate operator if possible: (NOT (< A B)) => (>= A B)
96 OpExpr *opexpr = (OpExpr *) node;
97 Oid negator = get_negator(opexpr->opno);
99 if (negator)
101 OpExpr *newopexpr = makeNode(OpExpr);
103 newopexpr->opno = negator;
104 newopexpr->opfuncid = InvalidOid;
105 newopexpr->opresulttype = opexpr->opresulttype;
106 newopexpr->opretset = opexpr->opretset;
107 newopexpr->opcollid = opexpr->opcollid;
108 newopexpr->inputcollid = opexpr->inputcollid;
109 newopexpr->args = opexpr->args;
110 newopexpr->location = opexpr->location;
111 return (Node *) newopexpr;
114 break;
115 case T_ScalarArrayOpExpr:
118 * Negate a ScalarArrayOpExpr if its operator has a negator;
119 * for example x = ANY (list) becomes x <> ALL (list)
121 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
122 Oid negator = get_negator(saopexpr->opno);
124 if (negator)
126 ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
128 newopexpr->opno = negator;
129 newopexpr->opfuncid = InvalidOid;
130 newopexpr->hashfuncid = InvalidOid;
131 newopexpr->negfuncid = InvalidOid;
132 newopexpr->useOr = !saopexpr->useOr;
133 newopexpr->inputcollid = saopexpr->inputcollid;
134 newopexpr->args = saopexpr->args;
135 newopexpr->location = saopexpr->location;
136 return (Node *) newopexpr;
139 break;
140 case T_BoolExpr:
142 BoolExpr *expr = (BoolExpr *) node;
144 switch (expr->boolop)
146 /*--------------------
147 * Apply DeMorgan's Laws:
148 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
149 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
150 * i.e., swap AND for OR and negate each subclause.
152 * If the input is already AND/OR flat and has no NOT
153 * directly above AND or OR, this transformation preserves
154 * those properties. For example, if no direct child of
155 * the given AND clause is an AND or a NOT-above-OR, then
156 * the recursive calls of negate_clause() can't return any
157 * OR clauses. So we needn't call pull_ors() before
158 * building a new OR clause. Similarly for the OR case.
159 *--------------------
161 case AND_EXPR:
163 List *nargs = NIL;
164 ListCell *lc;
166 foreach(lc, expr->args)
168 nargs = lappend(nargs,
169 negate_clause(lfirst(lc)));
171 return (Node *) make_orclause(nargs);
173 break;
174 case OR_EXPR:
176 List *nargs = NIL;
177 ListCell *lc;
179 foreach(lc, expr->args)
181 nargs = lappend(nargs,
182 negate_clause(lfirst(lc)));
184 return (Node *) make_andclause(nargs);
186 break;
187 case NOT_EXPR:
190 * NOT underneath NOT: they cancel. We assume the
191 * input is already simplified, so no need to recurse.
193 return (Node *) linitial(expr->args);
194 default:
195 elog(ERROR, "unrecognized boolop: %d",
196 (int) expr->boolop);
197 break;
200 break;
201 case T_NullTest:
203 NullTest *expr = (NullTest *) node;
206 * In the rowtype case, the two flavors of NullTest are *not*
207 * logical inverses, so we can't simplify. But it does work
208 * for scalar datatypes.
210 if (!expr->argisrow)
212 NullTest *newexpr = makeNode(NullTest);
214 newexpr->arg = expr->arg;
215 newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
216 IS_NOT_NULL : IS_NULL);
217 newexpr->argisrow = expr->argisrow;
218 newexpr->location = expr->location;
219 return (Node *) newexpr;
222 break;
223 case T_BooleanTest:
225 BooleanTest *expr = (BooleanTest *) node;
226 BooleanTest *newexpr = makeNode(BooleanTest);
228 newexpr->arg = expr->arg;
229 switch (expr->booltesttype)
231 case IS_TRUE:
232 newexpr->booltesttype = IS_NOT_TRUE;
233 break;
234 case IS_NOT_TRUE:
235 newexpr->booltesttype = IS_TRUE;
236 break;
237 case IS_FALSE:
238 newexpr->booltesttype = IS_NOT_FALSE;
239 break;
240 case IS_NOT_FALSE:
241 newexpr->booltesttype = IS_FALSE;
242 break;
243 case IS_UNKNOWN:
244 newexpr->booltesttype = IS_NOT_UNKNOWN;
245 break;
246 case IS_NOT_UNKNOWN:
247 newexpr->booltesttype = IS_UNKNOWN;
248 break;
249 default:
250 elog(ERROR, "unrecognized booltesttype: %d",
251 (int) expr->booltesttype);
252 break;
254 newexpr->location = expr->location;
255 return (Node *) newexpr;
257 break;
258 default:
259 /* else fall through */
260 break;
264 * Otherwise we don't know how to simplify this, so just tack on an
265 * explicit NOT node.
267 return (Node *) make_notclause((Expr *) node);
272 * canonicalize_qual
273 * Convert a qualification expression to the most useful form.
275 * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
276 * clauses. It can also be used on top-level CHECK constraints, for which
277 * pass is_check = true. DO NOT call it on any expression that is not known
278 * to be one or the other, as it might apply inappropriate simplifications.
280 * The name of this routine is a holdover from a time when it would try to
281 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
282 * Eventually, we recognized that that had more theoretical purity than
283 * actual usefulness, and so now the transformation doesn't involve any
284 * notion of reaching a canonical form.
286 * NOTE: we assume the input has already been through eval_const_expressions
287 * and therefore possesses AND/OR flatness. Formerly this function included
288 * its own flattening logic, but that requires a useless extra pass over the
289 * tree.
291 * Returns the modified qualification.
293 Expr *
294 canonicalize_qual(Expr *qual, bool is_check)
296 Expr *newqual;
298 /* Quick exit for empty qual */
299 if (qual == NULL)
300 return NULL;
302 /* This should not be invoked on quals in implicit-AND format */
303 Assert(!IsA(qual, List));
306 * Pull up redundant subclauses in OR-of-AND trees. We do this only
307 * within the top-level AND/OR structure; there's no point in looking
308 * deeper. Also remove any NULL constants in the top-level structure.
310 newqual = find_duplicate_ors(qual, is_check);
312 return newqual;
317 * pull_ands
318 * Recursively flatten nested AND clauses into a single and-clause list.
320 * Input is the arglist of an AND clause.
321 * Returns the rebuilt arglist (note original list structure is not touched).
323 static List *
324 pull_ands(List *andlist)
326 List *out_list = NIL;
327 ListCell *arg;
329 foreach(arg, andlist)
331 Node *subexpr = (Node *) lfirst(arg);
333 if (is_andclause(subexpr))
334 out_list = list_concat(out_list,
335 pull_ands(((BoolExpr *) subexpr)->args));
336 else
337 out_list = lappend(out_list, subexpr);
339 return out_list;
343 * pull_ors
344 * Recursively flatten nested OR clauses into a single or-clause list.
346 * Input is the arglist of an OR clause.
347 * Returns the rebuilt arglist (note original list structure is not touched).
349 static List *
350 pull_ors(List *orlist)
352 List *out_list = NIL;
353 ListCell *arg;
355 foreach(arg, orlist)
357 Node *subexpr = (Node *) lfirst(arg);
359 if (is_orclause(subexpr))
360 out_list = list_concat(out_list,
361 pull_ors(((BoolExpr *) subexpr)->args));
362 else
363 out_list = lappend(out_list, subexpr);
365 return out_list;
369 /*--------------------
370 * The following code attempts to apply the inverse OR distributive law:
371 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
372 * That is, locate OR clauses in which every subclause contains an
373 * identical term, and pull out the duplicated terms.
375 * This may seem like a fairly useless activity, but it turns out to be
376 * applicable to many machine-generated queries, and there are also queries
377 * in some of the TPC benchmarks that need it. This was in fact almost the
378 * sole useful side-effect of the old prepqual code that tried to force
379 * the query into canonical AND-of-ORs form: the canonical equivalent of
380 * ((A AND B) OR (A AND C))
381 * is
382 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
383 * which the code was able to simplify to
384 * (A AND (A OR C) AND (B OR A) AND (B OR C))
385 * thus successfully extracting the common condition A --- but at the cost
386 * of cluttering the qual with many redundant clauses.
387 *--------------------
391 * find_duplicate_ors
392 * Given a qualification tree with the NOTs pushed down, search for
393 * OR clauses to which the inverse OR distributive law might apply.
394 * Only the top-level AND/OR structure is searched.
396 * While at it, we remove any NULL constants within the top-level AND/OR
397 * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
398 * In general that would change the result, so eval_const_expressions can't
399 * do it; but at top level of WHERE, we don't need to distinguish between
400 * FALSE and NULL results, so it's valid to treat NULL::boolean the same
401 * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
402 * CHECK constraint, we may treat a NULL the same as TRUE.
404 * Returns the modified qualification. AND/OR flatness is preserved.
406 static Expr *
407 find_duplicate_ors(Expr *qual, bool is_check)
409 if (is_orclause(qual))
411 List *orlist = NIL;
412 ListCell *temp;
414 /* Recurse */
415 foreach(temp, ((BoolExpr *) qual)->args)
417 Expr *arg = (Expr *) lfirst(temp);
419 arg = find_duplicate_ors(arg, is_check);
421 /* Get rid of any constant inputs */
422 if (arg && IsA(arg, Const))
424 Const *carg = (Const *) arg;
426 if (is_check)
428 /* Within OR in CHECK, drop constant FALSE */
429 if (!carg->constisnull && !DatumGetBool(carg->constvalue))
430 continue;
431 /* Constant TRUE or NULL, so OR reduces to TRUE */
432 return (Expr *) makeBoolConst(true, false);
434 else
436 /* Within OR in WHERE, drop constant FALSE or NULL */
437 if (carg->constisnull || !DatumGetBool(carg->constvalue))
438 continue;
439 /* Constant TRUE, so OR reduces to TRUE */
440 return arg;
444 orlist = lappend(orlist, arg);
447 /* Flatten any ORs pulled up to just below here */
448 orlist = pull_ors(orlist);
450 /* Now we can look for duplicate ORs */
451 return process_duplicate_ors(orlist);
453 else if (is_andclause(qual))
455 List *andlist = NIL;
456 ListCell *temp;
458 /* Recurse */
459 foreach(temp, ((BoolExpr *) qual)->args)
461 Expr *arg = (Expr *) lfirst(temp);
463 arg = find_duplicate_ors(arg, is_check);
465 /* Get rid of any constant inputs */
466 if (arg && IsA(arg, Const))
468 Const *carg = (Const *) arg;
470 if (is_check)
472 /* Within AND in CHECK, drop constant TRUE or NULL */
473 if (carg->constisnull || DatumGetBool(carg->constvalue))
474 continue;
475 /* Constant FALSE, so AND reduces to FALSE */
476 return arg;
478 else
480 /* Within AND in WHERE, drop constant TRUE */
481 if (!carg->constisnull && DatumGetBool(carg->constvalue))
482 continue;
483 /* Constant FALSE or NULL, so AND reduces to FALSE */
484 return (Expr *) makeBoolConst(false, false);
488 andlist = lappend(andlist, arg);
491 /* Flatten any ANDs introduced just below here */
492 andlist = pull_ands(andlist);
494 /* AND of no inputs reduces to TRUE */
495 if (andlist == NIL)
496 return (Expr *) makeBoolConst(true, false);
498 /* Single-expression AND just reduces to that expression */
499 if (list_length(andlist) == 1)
500 return (Expr *) linitial(andlist);
502 /* Else we still need an AND node */
503 return make_andclause(andlist);
505 else
506 return qual;
510 * process_duplicate_ors
511 * Given a list of exprs which are ORed together, try to apply
512 * the inverse OR distributive law.
514 * Returns the resulting expression (could be an AND clause, an OR
515 * clause, or maybe even a single subexpression).
517 static Expr *
518 process_duplicate_ors(List *orlist)
520 List *reference = NIL;
521 int num_subclauses = 0;
522 List *winners;
523 List *neworlist;
524 ListCell *temp;
526 /* OR of no inputs reduces to FALSE */
527 if (orlist == NIL)
528 return (Expr *) makeBoolConst(false, false);
530 /* Single-expression OR just reduces to that expression */
531 if (list_length(orlist) == 1)
532 return (Expr *) linitial(orlist);
535 * Choose the shortest AND clause as the reference list --- obviously, any
536 * subclause not in this clause isn't in all the clauses. If we find a
537 * clause that's not an AND, we can treat it as a one-element AND clause,
538 * which necessarily wins as shortest.
540 foreach(temp, orlist)
542 Expr *clause = (Expr *) lfirst(temp);
544 if (is_andclause(clause))
546 List *subclauses = ((BoolExpr *) clause)->args;
547 int nclauses = list_length(subclauses);
549 if (reference == NIL || nclauses < num_subclauses)
551 reference = subclauses;
552 num_subclauses = nclauses;
555 else
557 reference = list_make1(clause);
558 break;
563 * Just in case, eliminate any duplicates in the reference list.
565 reference = list_union(NIL, reference);
568 * Check each element of the reference list to see if it's in all the OR
569 * clauses. Build a new list of winning clauses.
571 winners = NIL;
572 foreach(temp, reference)
574 Expr *refclause = (Expr *) lfirst(temp);
575 bool win = true;
576 ListCell *temp2;
578 foreach(temp2, orlist)
580 Expr *clause = (Expr *) lfirst(temp2);
582 if (is_andclause(clause))
584 if (!list_member(((BoolExpr *) clause)->args, refclause))
586 win = false;
587 break;
590 else
592 if (!equal(refclause, clause))
594 win = false;
595 break;
600 if (win)
601 winners = lappend(winners, refclause);
605 * If no winners, we can't transform the OR
607 if (winners == NIL)
608 return make_orclause(orlist);
611 * Generate new OR list consisting of the remaining sub-clauses.
613 * If any clause degenerates to empty, then we have a situation like (A
614 * AND B) OR (A), which can be reduced to just A --- that is, the
615 * additional conditions in other arms of the OR are irrelevant.
617 * Note that because we use list_difference, any multiple occurrences of a
618 * winning clause in an AND sub-clause will be removed automatically.
620 neworlist = NIL;
621 foreach(temp, orlist)
623 Expr *clause = (Expr *) lfirst(temp);
625 if (is_andclause(clause))
627 List *subclauses = ((BoolExpr *) clause)->args;
629 subclauses = list_difference(subclauses, winners);
630 if (subclauses != NIL)
632 if (list_length(subclauses) == 1)
633 neworlist = lappend(neworlist, linitial(subclauses));
634 else
635 neworlist = lappend(neworlist, make_andclause(subclauses));
637 else
639 neworlist = NIL; /* degenerate case, see above */
640 break;
643 else
645 if (!list_member(winners, clause))
646 neworlist = lappend(neworlist, clause);
647 else
649 neworlist = NIL; /* degenerate case, see above */
650 break;
656 * Append reduced OR to the winners list, if it's not degenerate, handling
657 * the special case of one element correctly (can that really happen?).
658 * Also be careful to maintain AND/OR flatness in case we pulled up a
659 * sub-sub-OR-clause.
661 if (neworlist != NIL)
663 if (list_length(neworlist) == 1)
664 winners = lappend(winners, linitial(neworlist));
665 else
666 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
670 * And return the constructed AND clause, again being wary of a single
671 * element and AND/OR flatness.
673 if (list_length(winners) == 1)
674 return (Expr *) linitial(winners);
675 else
676 return make_andclause(pull_ands(winners));