Update copyright for 2022
[pgsql.git] / src / backend / commands / indexcmds.c
blobe5cf1bde13feb58d7d77296bd1eb9df8173057b0
1 /*-------------------------------------------------------------------------
3 * indexcmds.c
4 * POSTGRES define and remove index code.
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/commands/indexcmds.c
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include "access/amapi.h"
19 #include "access/heapam.h"
20 #include "access/htup_details.h"
21 #include "access/reloptions.h"
22 #include "access/sysattr.h"
23 #include "access/tableam.h"
24 #include "access/xact.h"
25 #include "catalog/catalog.h"
26 #include "catalog/index.h"
27 #include "catalog/indexing.h"
28 #include "catalog/pg_am.h"
29 #include "catalog/pg_constraint.h"
30 #include "catalog/pg_inherits.h"
31 #include "catalog/pg_opclass.h"
32 #include "catalog/pg_opfamily.h"
33 #include "catalog/pg_tablespace.h"
34 #include "catalog/pg_type.h"
35 #include "commands/comment.h"
36 #include "commands/dbcommands.h"
37 #include "commands/defrem.h"
38 #include "commands/event_trigger.h"
39 #include "commands/progress.h"
40 #include "commands/tablecmds.h"
41 #include "commands/tablespace.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "nodes/makefuncs.h"
45 #include "nodes/nodeFuncs.h"
46 #include "optimizer/optimizer.h"
47 #include "parser/parse_coerce.h"
48 #include "parser/parse_func.h"
49 #include "parser/parse_oper.h"
50 #include "partitioning/partdesc.h"
51 #include "pgstat.h"
52 #include "rewrite/rewriteManip.h"
53 #include "storage/lmgr.h"
54 #include "storage/proc.h"
55 #include "storage/procarray.h"
56 #include "storage/sinvaladt.h"
57 #include "utils/acl.h"
58 #include "utils/builtins.h"
59 #include "utils/fmgroids.h"
60 #include "utils/inval.h"
61 #include "utils/lsyscache.h"
62 #include "utils/memutils.h"
63 #include "utils/partcache.h"
64 #include "utils/pg_rusage.h"
65 #include "utils/regproc.h"
66 #include "utils/snapmgr.h"
67 #include "utils/syscache.h"
70 /* non-export function prototypes */
71 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
72 static void CheckPredicate(Expr *predicate);
73 static void ComputeIndexAttrs(IndexInfo *indexInfo,
74 Oid *typeOidP,
75 Oid *collationOidP,
76 Oid *classOidP,
77 int16 *colOptionP,
78 List *attList,
79 List *exclusionOpNames,
80 Oid relId,
81 const char *accessMethodName, Oid accessMethodId,
82 bool amcanorder,
83 bool isconstraint);
84 static char *ChooseIndexName(const char *tabname, Oid namespaceId,
85 List *colnames, List *exclusionOpNames,
86 bool primary, bool isconstraint);
87 static char *ChooseIndexNameAddition(List *colnames);
88 static List *ChooseIndexColumnNames(List *indexElems);
89 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
90 bool isTopLevel);
91 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
92 Oid relId, Oid oldRelId, void *arg);
93 static Oid ReindexTable(RangeVar *relation, ReindexParams *params,
94 bool isTopLevel);
95 static void ReindexMultipleTables(const char *objectName,
96 ReindexObjectType objectKind, ReindexParams *params);
97 static void reindex_error_callback(void *args);
98 static void ReindexPartitions(Oid relid, ReindexParams *params,
99 bool isTopLevel);
100 static void ReindexMultipleInternal(List *relids,
101 ReindexParams *params);
102 static bool ReindexRelationConcurrently(Oid relationOid,
103 ReindexParams *params);
104 static void update_relispartition(Oid relationId, bool newval);
105 static inline void set_indexsafe_procflags(void);
108 * callback argument type for RangeVarCallbackForReindexIndex()
110 struct ReindexIndexCallbackState
112 ReindexParams params; /* options from statement */
113 Oid locked_table_oid; /* tracks previously locked table */
117 * callback arguments for reindex_error_callback()
119 typedef struct ReindexErrorInfo
121 char *relname;
122 char *relnamespace;
123 char relkind;
124 } ReindexErrorInfo;
127 * CheckIndexCompatible
128 * Determine whether an existing index definition is compatible with a
129 * prospective index definition, such that the existing index storage
130 * could become the storage of the new index, avoiding a rebuild.
132 * 'heapRelation': the relation the index would apply to.
133 * 'accessMethodName': name of the AM to use.
134 * 'attributeList': a list of IndexElem specifying columns and expressions
135 * to index on.
136 * 'exclusionOpNames': list of names of exclusion-constraint operators,
137 * or NIL if not an exclusion constraint.
139 * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
140 * any indexes that depended on a changing column from their pg_get_indexdef
141 * or pg_get_constraintdef definitions. We omit some of the sanity checks of
142 * DefineIndex. We assume that the old and new indexes have the same number
143 * of columns and that if one has an expression column or predicate, both do.
144 * Errors arising from the attribute list still apply.
146 * Most column type changes that can skip a table rewrite do not invalidate
147 * indexes. We acknowledge this when all operator classes, collations and
148 * exclusion operators match. Though we could further permit intra-opfamily
149 * changes for btree and hash indexes, that adds subtle complexity with no
150 * concrete benefit for core types. Note, that INCLUDE columns aren't
151 * checked by this function, for them it's enough that table rewrite is
152 * skipped.
154 * When a comparison or exclusion operator has a polymorphic input type, the
155 * actual input types must also match. This defends against the possibility
156 * that operators could vary behavior in response to get_fn_expr_argtype().
157 * At present, this hazard is theoretical: check_exclusion_constraint() and
158 * all core index access methods decline to set fn_expr for such calls.
160 * We do not yet implement a test to verify compatibility of expression
161 * columns or predicates, so assume any such index is incompatible.
163 bool
164 CheckIndexCompatible(Oid oldId,
165 const char *accessMethodName,
166 List *attributeList,
167 List *exclusionOpNames)
169 bool isconstraint;
170 Oid *typeObjectId;
171 Oid *collationObjectId;
172 Oid *classObjectId;
173 Oid accessMethodId;
174 Oid relationId;
175 HeapTuple tuple;
176 Form_pg_index indexForm;
177 Form_pg_am accessMethodForm;
178 IndexAmRoutine *amRoutine;
179 bool amcanorder;
180 int16 *coloptions;
181 IndexInfo *indexInfo;
182 int numberOfAttributes;
183 int old_natts;
184 bool isnull;
185 bool ret = true;
186 oidvector *old_indclass;
187 oidvector *old_indcollation;
188 Relation irel;
189 int i;
190 Datum d;
192 /* Caller should already have the relation locked in some way. */
193 relationId = IndexGetRelation(oldId, false);
196 * We can pretend isconstraint = false unconditionally. It only serves to
197 * decide the text of an error message that should never happen for us.
199 isconstraint = false;
201 numberOfAttributes = list_length(attributeList);
202 Assert(numberOfAttributes > 0);
203 Assert(numberOfAttributes <= INDEX_MAX_KEYS);
205 /* look up the access method */
206 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
207 if (!HeapTupleIsValid(tuple))
208 ereport(ERROR,
209 (errcode(ERRCODE_UNDEFINED_OBJECT),
210 errmsg("access method \"%s\" does not exist",
211 accessMethodName)));
212 accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
213 accessMethodId = accessMethodForm->oid;
214 amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
215 ReleaseSysCache(tuple);
217 amcanorder = amRoutine->amcanorder;
220 * Compute the operator classes, collations, and exclusion operators for
221 * the new index, so we can test whether it's compatible with the existing
222 * one. Note that ComputeIndexAttrs might fail here, but that's OK:
223 * DefineIndex would have called this function with the same arguments
224 * later on, and it would have failed then anyway. Our attributeList
225 * contains only key attributes, thus we're filling ii_NumIndexAttrs and
226 * ii_NumIndexKeyAttrs with same value.
228 indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
229 accessMethodId, NIL, NIL, false, false, false);
230 typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
231 collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
232 classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
233 coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
234 ComputeIndexAttrs(indexInfo,
235 typeObjectId, collationObjectId, classObjectId,
236 coloptions, attributeList,
237 exclusionOpNames, relationId,
238 accessMethodName, accessMethodId,
239 amcanorder, isconstraint);
242 /* Get the soon-obsolete pg_index tuple. */
243 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
244 if (!HeapTupleIsValid(tuple))
245 elog(ERROR, "cache lookup failed for index %u", oldId);
246 indexForm = (Form_pg_index) GETSTRUCT(tuple);
249 * We don't assess expressions or predicates; assume incompatibility.
250 * Also, if the index is invalid for any reason, treat it as incompatible.
252 if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
253 heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
254 indexForm->indisvalid))
256 ReleaseSysCache(tuple);
257 return false;
260 /* Any change in operator class or collation breaks compatibility. */
261 old_natts = indexForm->indnkeyatts;
262 Assert(old_natts == numberOfAttributes);
264 d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
265 Assert(!isnull);
266 old_indcollation = (oidvector *) DatumGetPointer(d);
268 d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
269 Assert(!isnull);
270 old_indclass = (oidvector *) DatumGetPointer(d);
272 ret = (memcmp(old_indclass->values, classObjectId,
273 old_natts * sizeof(Oid)) == 0 &&
274 memcmp(old_indcollation->values, collationObjectId,
275 old_natts * sizeof(Oid)) == 0);
277 ReleaseSysCache(tuple);
279 if (!ret)
280 return false;
282 /* For polymorphic opcintype, column type changes break compatibility. */
283 irel = index_open(oldId, AccessShareLock); /* caller probably has a lock */
284 for (i = 0; i < old_natts; i++)
286 if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
287 TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
289 ret = false;
290 break;
294 /* Any change in opclass options break compatibility. */
295 if (ret)
297 Datum *opclassOptions = RelationGetIndexRawAttOptions(irel);
299 ret = CompareOpclassOptions(opclassOptions,
300 indexInfo->ii_OpclassOptions, old_natts);
302 if (opclassOptions)
303 pfree(opclassOptions);
306 /* Any change in exclusion operator selections breaks compatibility. */
307 if (ret && indexInfo->ii_ExclusionOps != NULL)
309 Oid *old_operators,
310 *old_procs;
311 uint16 *old_strats;
313 RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
314 ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
315 old_natts * sizeof(Oid)) == 0;
317 /* Require an exact input type match for polymorphic operators. */
318 if (ret)
320 for (i = 0; i < old_natts && ret; i++)
322 Oid left,
323 right;
325 op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
326 if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
327 TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
329 ret = false;
330 break;
336 index_close(irel, NoLock);
337 return ret;
341 * CompareOpclassOptions
343 * Compare per-column opclass options which are represented by arrays of text[]
344 * datums. Both elements of arrays and array themselves can be NULL.
346 static bool
347 CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
349 int i;
351 if (!opts1 && !opts2)
352 return true;
354 for (i = 0; i < natts; i++)
356 Datum opt1 = opts1 ? opts1[i] : (Datum) 0;
357 Datum opt2 = opts2 ? opts2[i] : (Datum) 0;
359 if (opt1 == (Datum) 0)
361 if (opt2 == (Datum) 0)
362 continue;
363 else
364 return false;
366 else if (opt2 == (Datum) 0)
367 return false;
369 /* Compare non-NULL text[] datums. */
370 if (!DatumGetBool(DirectFunctionCall2(array_eq, opt1, opt2)))
371 return false;
374 return true;
378 * WaitForOlderSnapshots
380 * Wait for transactions that might have an older snapshot than the given xmin
381 * limit, because it might not contain tuples deleted just before it has
382 * been taken. Obtain a list of VXIDs of such transactions, and wait for them
383 * individually. This is used when building an index concurrently.
385 * We can exclude any running transactions that have xmin > the xmin given;
386 * their oldest snapshot must be newer than our xmin limit.
387 * We can also exclude any transactions that have xmin = zero, since they
388 * evidently have no live snapshot at all (and any one they might be in
389 * process of taking is certainly newer than ours). Transactions in other
390 * DBs can be ignored too, since they'll never even be able to see the
391 * index being worked on.
393 * We can also exclude autovacuum processes and processes running manual
394 * lazy VACUUMs, because they won't be fazed by missing index entries
395 * either. (Manual ANALYZEs, however, can't be excluded because they
396 * might be within transactions that are going to do arbitrary operations
397 * later.) Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY
398 * on indexes that are neither expressional nor partial are also safe to
399 * ignore, since we know that those processes won't examine any data
400 * outside the table they're indexing.
402 * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
403 * check for that.
405 * If a process goes idle-in-transaction with xmin zero, we do not need to
406 * wait for it anymore, per the above argument. We do not have the
407 * infrastructure right now to stop waiting if that happens, but we can at
408 * least avoid the folly of waiting when it is idle at the time we would
409 * begin to wait. We do this by repeatedly rechecking the output of
410 * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid
411 * doesn't show up in the output, we know we can forget about it.
413 void
414 WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
416 int n_old_snapshots;
417 int i;
418 VirtualTransactionId *old_snapshots;
420 old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
421 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
422 | PROC_IN_SAFE_IC,
423 &n_old_snapshots);
424 if (progress)
425 pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
427 for (i = 0; i < n_old_snapshots; i++)
429 if (!VirtualTransactionIdIsValid(old_snapshots[i]))
430 continue; /* found uninteresting in previous cycle */
432 if (i > 0)
434 /* see if anything's changed ... */
435 VirtualTransactionId *newer_snapshots;
436 int n_newer_snapshots;
437 int j;
438 int k;
440 newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
441 true, false,
442 PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
443 | PROC_IN_SAFE_IC,
444 &n_newer_snapshots);
445 for (j = i; j < n_old_snapshots; j++)
447 if (!VirtualTransactionIdIsValid(old_snapshots[j]))
448 continue; /* found uninteresting in previous cycle */
449 for (k = 0; k < n_newer_snapshots; k++)
451 if (VirtualTransactionIdEquals(old_snapshots[j],
452 newer_snapshots[k]))
453 break;
455 if (k >= n_newer_snapshots) /* not there anymore */
456 SetInvalidVirtualTransactionId(old_snapshots[j]);
458 pfree(newer_snapshots);
461 if (VirtualTransactionIdIsValid(old_snapshots[i]))
463 /* If requested, publish who we're going to wait for. */
464 if (progress)
466 PGPROC *holder = BackendIdGetProc(old_snapshots[i].backendId);
468 if (holder)
469 pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
470 holder->pid);
472 VirtualXactLock(old_snapshots[i], true);
475 if (progress)
476 pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1);
482 * DefineIndex
483 * Creates a new index.
485 * 'relationId': the OID of the heap relation on which the index is to be
486 * created
487 * 'stmt': IndexStmt describing the properties of the new index.
488 * 'indexRelationId': normally InvalidOid, but during bootstrap can be
489 * nonzero to specify a preselected OID for the index.
490 * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
491 * of a partitioned index.
492 * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
493 * the child of a constraint (only used when recursing)
494 * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
495 * 'check_rights': check for CREATE rights in namespace and tablespace. (This
496 * should be true except when ALTER is deleting/recreating an index.)
497 * 'check_not_in_use': check for table not already in use in current session.
498 * This should be true unless caller is holding the table open, in which
499 * case the caller had better have checked it earlier.
500 * 'skip_build': make the catalog entries but don't create the index files
501 * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
503 * Returns the object address of the created index.
505 ObjectAddress
506 DefineIndex(Oid relationId,
507 IndexStmt *stmt,
508 Oid indexRelationId,
509 Oid parentIndexId,
510 Oid parentConstraintId,
511 bool is_alter_table,
512 bool check_rights,
513 bool check_not_in_use,
514 bool skip_build,
515 bool quiet)
517 bool concurrent;
518 char *indexRelationName;
519 char *accessMethodName;
520 Oid *typeObjectId;
521 Oid *collationObjectId;
522 Oid *classObjectId;
523 Oid accessMethodId;
524 Oid namespaceId;
525 Oid tablespaceId;
526 Oid createdConstraintId = InvalidOid;
527 List *indexColNames;
528 List *allIndexParams;
529 Relation rel;
530 HeapTuple tuple;
531 Form_pg_am accessMethodForm;
532 IndexAmRoutine *amRoutine;
533 bool amcanorder;
534 amoptions_function amoptions;
535 bool partitioned;
536 bool safe_index;
537 Datum reloptions;
538 int16 *coloptions;
539 IndexInfo *indexInfo;
540 bits16 flags;
541 bits16 constr_flags;
542 int numberOfAttributes;
543 int numberOfKeyAttributes;
544 TransactionId limitXmin;
545 ObjectAddress address;
546 LockRelId heaprelid;
547 LOCKTAG heaplocktag;
548 LOCKMODE lockmode;
549 Snapshot snapshot;
550 int save_nestlevel = -1;
551 int i;
554 * Some callers need us to run with an empty default_tablespace; this is a
555 * necessary hack to be able to reproduce catalog state accurately when
556 * recreating indexes after table-rewriting ALTER TABLE.
558 if (stmt->reset_default_tblspc)
560 save_nestlevel = NewGUCNestLevel();
561 (void) set_config_option("default_tablespace", "",
562 PGC_USERSET, PGC_S_SESSION,
563 GUC_ACTION_SAVE, true, 0, false);
567 * Force non-concurrent build on temporary relations, even if CONCURRENTLY
568 * was requested. Other backends can't access a temporary relation, so
569 * there's no harm in grabbing a stronger lock, and a non-concurrent DROP
570 * is more efficient. Do this before any use of the concurrent option is
571 * done.
573 if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
574 concurrent = true;
575 else
576 concurrent = false;
579 * Start progress report. If we're building a partition, this was already
580 * done.
582 if (!OidIsValid(parentIndexId))
584 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
585 relationId);
586 pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
587 concurrent ?
588 PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
589 PROGRESS_CREATEIDX_COMMAND_CREATE);
593 * No index OID to report yet
595 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
596 InvalidOid);
599 * count key attributes in index
601 numberOfKeyAttributes = list_length(stmt->indexParams);
604 * Calculate the new list of index columns including both key columns and
605 * INCLUDE columns. Later we can determine which of these are key
606 * columns, and which are just part of the INCLUDE list by checking the
607 * list position. A list item in a position less than ii_NumIndexKeyAttrs
608 * is part of the key columns, and anything equal to and over is part of
609 * the INCLUDE columns.
611 allIndexParams = list_concat_copy(stmt->indexParams,
612 stmt->indexIncludingParams);
613 numberOfAttributes = list_length(allIndexParams);
615 if (numberOfKeyAttributes <= 0)
616 ereport(ERROR,
617 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
618 errmsg("must specify at least one column")));
619 if (numberOfAttributes > INDEX_MAX_KEYS)
620 ereport(ERROR,
621 (errcode(ERRCODE_TOO_MANY_COLUMNS),
622 errmsg("cannot use more than %d columns in an index",
623 INDEX_MAX_KEYS)));
626 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
627 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
628 * (but not VACUUM).
630 * NB: Caller is responsible for making sure that relationId refers to the
631 * relation on which the index should be built; except in bootstrap mode,
632 * this will typically require the caller to have already locked the
633 * relation. To avoid lock upgrade hazards, that lock should be at least
634 * as strong as the one we take here.
636 * NB: If the lock strength here ever changes, code that is run by
637 * parallel workers under the control of certain particular ambuild
638 * functions will need to be updated, too.
640 lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
641 rel = table_open(relationId, lockmode);
643 namespaceId = RelationGetNamespace(rel);
645 /* Ensure that it makes sense to index this kind of relation */
646 switch (rel->rd_rel->relkind)
648 case RELKIND_RELATION:
649 case RELKIND_MATVIEW:
650 case RELKIND_PARTITIONED_TABLE:
651 /* OK */
652 break;
653 default:
654 ereport(ERROR,
655 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
656 errmsg("cannot create index on relation \"%s\"",
657 RelationGetRelationName(rel)),
658 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
659 break;
663 * Establish behavior for partitioned tables, and verify sanity of
664 * parameters.
666 * We do not build an actual index in this case; we only create a few
667 * catalog entries. The actual indexes are built by recursing for each
668 * partition.
670 partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
671 if (partitioned)
674 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
675 * the error is thrown also for temporary tables. Seems better to be
676 * consistent, even though we could do it on temporary table because
677 * we're not actually doing it concurrently.
679 if (stmt->concurrent)
680 ereport(ERROR,
681 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
682 errmsg("cannot create index on partitioned table \"%s\" concurrently",
683 RelationGetRelationName(rel))));
684 if (stmt->excludeOpNames)
685 ereport(ERROR,
686 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
687 errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
688 RelationGetRelationName(rel))));
692 * Don't try to CREATE INDEX on temp tables of other backends.
694 if (RELATION_IS_OTHER_TEMP(rel))
695 ereport(ERROR,
696 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
697 errmsg("cannot create indexes on temporary tables of other sessions")));
700 * Unless our caller vouches for having checked this already, insist that
701 * the table not be in use by our own session, either. Otherwise we might
702 * fail to make entries in the new index (for instance, if an INSERT or
703 * UPDATE is in progress and has already made its list of target indexes).
705 if (check_not_in_use)
706 CheckTableNotInUse(rel, "CREATE INDEX");
709 * Verify we (still) have CREATE rights in the rel's namespace.
710 * (Presumably we did when the rel was created, but maybe not anymore.)
711 * Skip check if caller doesn't want it. Also skip check if
712 * bootstrapping, since permissions machinery may not be working yet.
714 if (check_rights && !IsBootstrapProcessingMode())
716 AclResult aclresult;
718 aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
719 ACL_CREATE);
720 if (aclresult != ACLCHECK_OK)
721 aclcheck_error(aclresult, OBJECT_SCHEMA,
722 get_namespace_name(namespaceId));
726 * Select tablespace to use. If not specified, use default tablespace
727 * (which may in turn default to database's default).
729 if (stmt->tableSpace)
731 tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
732 if (partitioned && tablespaceId == MyDatabaseTableSpace)
733 ereport(ERROR,
734 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
735 errmsg("cannot specify default tablespace for partitioned relations")));
737 else
739 tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
740 partitioned);
741 /* note InvalidOid is OK in this case */
744 /* Check tablespace permissions */
745 if (check_rights &&
746 OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
748 AclResult aclresult;
750 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
751 ACL_CREATE);
752 if (aclresult != ACLCHECK_OK)
753 aclcheck_error(aclresult, OBJECT_TABLESPACE,
754 get_tablespace_name(tablespaceId));
758 * Force shared indexes into the pg_global tablespace. This is a bit of a
759 * hack but seems simpler than marking them in the BKI commands. On the
760 * other hand, if it's not shared, don't allow it to be placed there.
762 if (rel->rd_rel->relisshared)
763 tablespaceId = GLOBALTABLESPACE_OID;
764 else if (tablespaceId == GLOBALTABLESPACE_OID)
765 ereport(ERROR,
766 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
767 errmsg("only shared relations can be placed in pg_global tablespace")));
770 * Choose the index column names.
772 indexColNames = ChooseIndexColumnNames(allIndexParams);
775 * Select name for index if caller didn't specify
777 indexRelationName = stmt->idxname;
778 if (indexRelationName == NULL)
779 indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
780 namespaceId,
781 indexColNames,
782 stmt->excludeOpNames,
783 stmt->primary,
784 stmt->isconstraint);
787 * look up the access method, verify it can handle the requested features
789 accessMethodName = stmt->accessMethod;
790 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
791 if (!HeapTupleIsValid(tuple))
794 * Hack to provide more-or-less-transparent updating of old RTREE
795 * indexes to GiST: if RTREE is requested and not found, use GIST.
797 if (strcmp(accessMethodName, "rtree") == 0)
799 ereport(NOTICE,
800 (errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
801 accessMethodName = "gist";
802 tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
805 if (!HeapTupleIsValid(tuple))
806 ereport(ERROR,
807 (errcode(ERRCODE_UNDEFINED_OBJECT),
808 errmsg("access method \"%s\" does not exist",
809 accessMethodName)));
811 accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
812 accessMethodId = accessMethodForm->oid;
813 amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
815 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
816 accessMethodId);
818 if (stmt->unique && !amRoutine->amcanunique)
819 ereport(ERROR,
820 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
821 errmsg("access method \"%s\" does not support unique indexes",
822 accessMethodName)));
823 if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
824 ereport(ERROR,
825 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
826 errmsg("access method \"%s\" does not support included columns",
827 accessMethodName)));
828 if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol)
829 ereport(ERROR,
830 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
831 errmsg("access method \"%s\" does not support multicolumn indexes",
832 accessMethodName)));
833 if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
834 ereport(ERROR,
835 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 errmsg("access method \"%s\" does not support exclusion constraints",
837 accessMethodName)));
839 amcanorder = amRoutine->amcanorder;
840 amoptions = amRoutine->amoptions;
842 pfree(amRoutine);
843 ReleaseSysCache(tuple);
846 * Validate predicate, if given
848 if (stmt->whereClause)
849 CheckPredicate((Expr *) stmt->whereClause);
852 * Parse AM-specific options, convert to text array form, validate.
854 reloptions = transformRelOptions((Datum) 0, stmt->options,
855 NULL, NULL, false, false);
857 (void) index_reloptions(amoptions, reloptions, true);
860 * Prepare arguments for index_create, primarily an IndexInfo structure.
861 * Note that predicates must be in implicit-AND format. In a concurrent
862 * build, mark it not-ready-for-inserts.
864 indexInfo = makeIndexInfo(numberOfAttributes,
865 numberOfKeyAttributes,
866 accessMethodId,
867 NIL, /* expressions, NIL for now */
868 make_ands_implicit((Expr *) stmt->whereClause),
869 stmt->unique,
870 !concurrent,
871 concurrent);
873 typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
874 collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
875 classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
876 coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
877 ComputeIndexAttrs(indexInfo,
878 typeObjectId, collationObjectId, classObjectId,
879 coloptions, allIndexParams,
880 stmt->excludeOpNames, relationId,
881 accessMethodName, accessMethodId,
882 amcanorder, stmt->isconstraint);
885 * Extra checks when creating a PRIMARY KEY index.
887 if (stmt->primary)
888 index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
891 * If this table is partitioned and we're creating a unique index or a
892 * primary key, make sure that the partition key is a subset of the
893 * index's columns. Otherwise it would be possible to violate uniqueness
894 * by putting values that ought to be unique in different partitions.
896 * We could lift this limitation if we had global indexes, but those have
897 * their own problems, so this is a useful feature combination.
899 if (partitioned && (stmt->unique || stmt->primary))
901 PartitionKey key = RelationGetPartitionKey(rel);
902 const char *constraint_type;
903 int i;
905 if (stmt->primary)
906 constraint_type = "PRIMARY KEY";
907 else if (stmt->unique)
908 constraint_type = "UNIQUE";
909 else if (stmt->excludeOpNames != NIL)
910 constraint_type = "EXCLUDE";
911 else
913 elog(ERROR, "unknown constraint type");
914 constraint_type = NULL; /* keep compiler quiet */
918 * Verify that all the columns in the partition key appear in the
919 * unique key definition, with the same notion of equality.
921 for (i = 0; i < key->partnatts; i++)
923 bool found = false;
924 int eq_strategy;
925 Oid ptkey_eqop;
926 int j;
929 * Identify the equality operator associated with this partkey
930 * column. For list and range partitioning, partkeys use btree
931 * operator classes; hash partitioning uses hash operator classes.
932 * (Keep this in sync with ComputePartitionAttrs!)
934 if (key->strategy == PARTITION_STRATEGY_HASH)
935 eq_strategy = HTEqualStrategyNumber;
936 else
937 eq_strategy = BTEqualStrategyNumber;
939 ptkey_eqop = get_opfamily_member(key->partopfamily[i],
940 key->partopcintype[i],
941 key->partopcintype[i],
942 eq_strategy);
943 if (!OidIsValid(ptkey_eqop))
944 elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
945 eq_strategy, key->partopcintype[i], key->partopcintype[i],
946 key->partopfamily[i]);
949 * We'll need to be able to identify the equality operators
950 * associated with index columns, too. We know what to do with
951 * btree opclasses; if there are ever any other index types that
952 * support unique indexes, this logic will need extension.
954 if (accessMethodId == BTREE_AM_OID)
955 eq_strategy = BTEqualStrategyNumber;
956 else
957 ereport(ERROR,
958 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
959 errmsg("cannot match partition key to an index using access method \"%s\"",
960 accessMethodName)));
963 * It may be possible to support UNIQUE constraints when partition
964 * keys are expressions, but is it worth it? Give up for now.
966 if (key->partattrs[i] == 0)
967 ereport(ERROR,
968 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
969 errmsg("unsupported %s constraint with partition key definition",
970 constraint_type),
971 errdetail("%s constraints cannot be used when partition keys include expressions.",
972 constraint_type)));
974 /* Search the index column(s) for a match */
975 for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
977 if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
979 /* Matched the column, now what about the equality op? */
980 Oid idx_opfamily;
981 Oid idx_opcintype;
983 if (get_opclass_opfamily_and_input_type(classObjectId[j],
984 &idx_opfamily,
985 &idx_opcintype))
987 Oid idx_eqop;
989 idx_eqop = get_opfamily_member(idx_opfamily,
990 idx_opcintype,
991 idx_opcintype,
992 eq_strategy);
993 if (ptkey_eqop == idx_eqop)
995 found = true;
996 break;
1002 if (!found)
1004 Form_pg_attribute att;
1006 att = TupleDescAttr(RelationGetDescr(rel),
1007 key->partattrs[i] - 1);
1008 ereport(ERROR,
1009 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1010 errmsg("unique constraint on partitioned table must include all partitioning columns"),
1011 errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
1012 constraint_type, RelationGetRelationName(rel),
1013 NameStr(att->attname))));
1020 * We disallow indexes on system columns. They would not necessarily get
1021 * updated correctly, and they don't seem useful anyway.
1023 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1025 AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i];
1027 if (attno < 0)
1028 ereport(ERROR,
1029 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1030 errmsg("index creation on system columns is not supported")));
1034 * Also check for system columns used in expressions or predicates.
1036 if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
1038 Bitmapset *indexattrs = NULL;
1040 pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
1041 pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
1043 for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
1045 if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
1046 indexattrs))
1047 ereport(ERROR,
1048 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1049 errmsg("index creation on system columns is not supported")));
1053 /* Is index safe for others to ignore? See set_indexsafe_procflags() */
1054 safe_index = indexInfo->ii_Expressions == NIL &&
1055 indexInfo->ii_Predicate == NIL;
1058 * Report index creation if appropriate (delay this till after most of the
1059 * error checks)
1061 if (stmt->isconstraint && !quiet)
1063 const char *constraint_type;
1065 if (stmt->primary)
1066 constraint_type = "PRIMARY KEY";
1067 else if (stmt->unique)
1068 constraint_type = "UNIQUE";
1069 else if (stmt->excludeOpNames != NIL)
1070 constraint_type = "EXCLUDE";
1071 else
1073 elog(ERROR, "unknown constraint type");
1074 constraint_type = NULL; /* keep compiler quiet */
1077 ereport(DEBUG1,
1078 (errmsg_internal("%s %s will create implicit index \"%s\" for table \"%s\"",
1079 is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
1080 constraint_type,
1081 indexRelationName, RelationGetRelationName(rel))));
1085 * A valid stmt->oldNode implies that we already have a built form of the
1086 * index. The caller should also decline any index build.
1088 Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
1091 * Make the catalog entries for the index, including constraints. This
1092 * step also actually builds the index, except if caller requested not to
1093 * or in concurrent mode, in which case it'll be done later, or doing a
1094 * partitioned index (because those don't have storage).
1096 flags = constr_flags = 0;
1097 if (stmt->isconstraint)
1098 flags |= INDEX_CREATE_ADD_CONSTRAINT;
1099 if (skip_build || concurrent || partitioned)
1100 flags |= INDEX_CREATE_SKIP_BUILD;
1101 if (stmt->if_not_exists)
1102 flags |= INDEX_CREATE_IF_NOT_EXISTS;
1103 if (concurrent)
1104 flags |= INDEX_CREATE_CONCURRENT;
1105 if (partitioned)
1106 flags |= INDEX_CREATE_PARTITIONED;
1107 if (stmt->primary)
1108 flags |= INDEX_CREATE_IS_PRIMARY;
1111 * If the table is partitioned, and recursion was declined but partitions
1112 * exist, mark the index as invalid.
1114 if (partitioned && stmt->relation && !stmt->relation->inh)
1116 PartitionDesc pd = RelationGetPartitionDesc(rel, true);
1118 if (pd->nparts != 0)
1119 flags |= INDEX_CREATE_INVALID;
1122 if (stmt->deferrable)
1123 constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
1124 if (stmt->initdeferred)
1125 constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
1127 indexRelationId =
1128 index_create(rel, indexRelationName, indexRelationId, parentIndexId,
1129 parentConstraintId,
1130 stmt->oldNode, indexInfo, indexColNames,
1131 accessMethodId, tablespaceId,
1132 collationObjectId, classObjectId,
1133 coloptions, reloptions,
1134 flags, constr_flags,
1135 allowSystemTableMods, !check_rights,
1136 &createdConstraintId);
1138 ObjectAddressSet(address, RelationRelationId, indexRelationId);
1141 * Revert to original default_tablespace. Must do this before any return
1142 * from this function, but after index_create, so this is a good time.
1144 if (save_nestlevel >= 0)
1145 AtEOXact_GUC(true, save_nestlevel);
1147 if (!OidIsValid(indexRelationId))
1149 table_close(rel, NoLock);
1151 /* If this is the top-level index, we're done */
1152 if (!OidIsValid(parentIndexId))
1153 pgstat_progress_end_command();
1155 return address;
1158 /* Add any requested comment */
1159 if (stmt->idxcomment != NULL)
1160 CreateComments(indexRelationId, RelationRelationId, 0,
1161 stmt->idxcomment);
1163 if (partitioned)
1165 PartitionDesc partdesc;
1168 * Unless caller specified to skip this step (via ONLY), process each
1169 * partition to make sure they all contain a corresponding index.
1171 * If we're called internally (no stmt->relation), recurse always.
1173 partdesc = RelationGetPartitionDesc(rel, true);
1174 if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
1176 int nparts = partdesc->nparts;
1177 Oid *part_oids = palloc(sizeof(Oid) * nparts);
1178 bool invalidate_parent = false;
1179 TupleDesc parentDesc;
1180 Oid *opfamOids;
1182 pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
1183 nparts);
1185 memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
1187 parentDesc = RelationGetDescr(rel);
1188 opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
1189 for (i = 0; i < numberOfKeyAttributes; i++)
1190 opfamOids[i] = get_opclass_family(classObjectId[i]);
1193 * For each partition, scan all existing indexes; if one matches
1194 * our index definition and is not already attached to some other
1195 * parent index, attach it to the one we just created.
1197 * If none matches, build a new index by calling ourselves
1198 * recursively with the same options (except for the index name).
1200 for (i = 0; i < nparts; i++)
1202 Oid childRelid = part_oids[i];
1203 Relation childrel;
1204 List *childidxs;
1205 ListCell *cell;
1206 AttrMap *attmap;
1207 bool found = false;
1209 childrel = table_open(childRelid, lockmode);
1212 * Don't try to create indexes on foreign tables, though. Skip
1213 * those if a regular index, or fail if trying to create a
1214 * constraint index.
1216 if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1218 if (stmt->unique || stmt->primary)
1219 ereport(ERROR,
1220 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1221 errmsg("cannot create unique index on partitioned table \"%s\"",
1222 RelationGetRelationName(rel)),
1223 errdetail("Table \"%s\" contains partitions that are foreign tables.",
1224 RelationGetRelationName(rel))));
1226 table_close(childrel, lockmode);
1227 continue;
1230 childidxs = RelationGetIndexList(childrel);
1231 attmap =
1232 build_attrmap_by_name(RelationGetDescr(childrel),
1233 parentDesc);
1235 foreach(cell, childidxs)
1237 Oid cldidxid = lfirst_oid(cell);
1238 Relation cldidx;
1239 IndexInfo *cldIdxInfo;
1241 /* this index is already partition of another one */
1242 if (has_superclass(cldidxid))
1243 continue;
1245 cldidx = index_open(cldidxid, lockmode);
1246 cldIdxInfo = BuildIndexInfo(cldidx);
1247 if (CompareIndexInfo(cldIdxInfo, indexInfo,
1248 cldidx->rd_indcollation,
1249 collationObjectId,
1250 cldidx->rd_opfamily,
1251 opfamOids,
1252 attmap))
1254 Oid cldConstrOid = InvalidOid;
1257 * Found a match.
1259 * If this index is being created in the parent
1260 * because of a constraint, then the child needs to
1261 * have a constraint also, so look for one. If there
1262 * is no such constraint, this index is no good, so
1263 * keep looking.
1265 if (createdConstraintId != InvalidOid)
1267 cldConstrOid =
1268 get_relation_idx_constraint_oid(childRelid,
1269 cldidxid);
1270 if (cldConstrOid == InvalidOid)
1272 index_close(cldidx, lockmode);
1273 continue;
1277 /* Attach index to parent and we're done. */
1278 IndexSetParentIndex(cldidx, indexRelationId);
1279 if (createdConstraintId != InvalidOid)
1280 ConstraintSetParentConstraint(cldConstrOid,
1281 createdConstraintId,
1282 childRelid);
1284 if (!cldidx->rd_index->indisvalid)
1285 invalidate_parent = true;
1287 found = true;
1288 /* keep lock till commit */
1289 index_close(cldidx, NoLock);
1290 break;
1293 index_close(cldidx, lockmode);
1296 list_free(childidxs);
1297 table_close(childrel, NoLock);
1300 * If no matching index was found, create our own.
1302 if (!found)
1304 IndexStmt *childStmt = copyObject(stmt);
1305 bool found_whole_row;
1306 ListCell *lc;
1309 * We can't use the same index name for the child index,
1310 * so clear idxname to let the recursive invocation choose
1311 * a new name. Likewise, the existing target relation
1312 * field is wrong, and if indexOid or oldNode are set,
1313 * they mustn't be applied to the child either.
1315 childStmt->idxname = NULL;
1316 childStmt->relation = NULL;
1317 childStmt->indexOid = InvalidOid;
1318 childStmt->oldNode = InvalidOid;
1319 childStmt->oldCreateSubid = InvalidSubTransactionId;
1320 childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
1323 * Adjust any Vars (both in expressions and in the index's
1324 * WHERE clause) to match the partition's column numbering
1325 * in case it's different from the parent's.
1327 foreach(lc, childStmt->indexParams)
1329 IndexElem *ielem = lfirst(lc);
1332 * If the index parameter is an expression, we must
1333 * translate it to contain child Vars.
1335 if (ielem->expr)
1337 ielem->expr =
1338 map_variable_attnos((Node *) ielem->expr,
1339 1, 0, attmap,
1340 InvalidOid,
1341 &found_whole_row);
1342 if (found_whole_row)
1343 elog(ERROR, "cannot convert whole-row table reference");
1346 childStmt->whereClause =
1347 map_variable_attnos(stmt->whereClause, 1, 0,
1348 attmap,
1349 InvalidOid, &found_whole_row);
1350 if (found_whole_row)
1351 elog(ERROR, "cannot convert whole-row table reference");
1353 DefineIndex(childRelid, childStmt,
1354 InvalidOid, /* no predefined OID */
1355 indexRelationId, /* this is our child */
1356 createdConstraintId,
1357 is_alter_table, check_rights, check_not_in_use,
1358 skip_build, quiet);
1361 pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
1362 i + 1);
1363 free_attrmap(attmap);
1367 * The pg_index row we inserted for this index was marked
1368 * indisvalid=true. But if we attached an existing index that is
1369 * invalid, this is incorrect, so update our row to invalid too.
1371 if (invalidate_parent)
1373 Relation pg_index = table_open(IndexRelationId, RowExclusiveLock);
1374 HeapTuple tup,
1375 newtup;
1377 tup = SearchSysCache1(INDEXRELID,
1378 ObjectIdGetDatum(indexRelationId));
1379 if (!HeapTupleIsValid(tup))
1380 elog(ERROR, "cache lookup failed for index %u",
1381 indexRelationId);
1382 newtup = heap_copytuple(tup);
1383 ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
1384 CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
1385 ReleaseSysCache(tup);
1386 table_close(pg_index, RowExclusiveLock);
1387 heap_freetuple(newtup);
1392 * Indexes on partitioned tables are not themselves built, so we're
1393 * done here.
1395 table_close(rel, NoLock);
1396 if (!OidIsValid(parentIndexId))
1397 pgstat_progress_end_command();
1398 return address;
1401 if (!concurrent)
1403 /* Close the heap and we're done, in the non-concurrent case */
1404 table_close(rel, NoLock);
1406 /* If this is the top-level index, we're done. */
1407 if (!OidIsValid(parentIndexId))
1408 pgstat_progress_end_command();
1410 return address;
1413 /* save lockrelid and locktag for below, then close rel */
1414 heaprelid = rel->rd_lockInfo.lockRelId;
1415 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1416 table_close(rel, NoLock);
1419 * For a concurrent build, it's important to make the catalog entries
1420 * visible to other transactions before we start to build the index. That
1421 * will prevent them from making incompatible HOT updates. The new index
1422 * will be marked not indisready and not indisvalid, so that no one else
1423 * tries to either insert into it or use it for queries.
1425 * We must commit our current transaction so that the index becomes
1426 * visible; then start another. Note that all the data structures we just
1427 * built are lost in the commit. The only data we keep past here are the
1428 * relation IDs.
1430 * Before committing, get a session-level lock on the table, to ensure
1431 * that neither it nor the index can be dropped before we finish. This
1432 * cannot block, even if someone else is waiting for access, because we
1433 * already have the same lock within our transaction.
1435 * Note: we don't currently bother with a session lock on the index,
1436 * because there are no operations that could change its state while we
1437 * hold lock on the parent table. This might need to change later.
1439 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1441 PopActiveSnapshot();
1442 CommitTransactionCommand();
1443 StartTransactionCommand();
1445 /* Tell concurrent index builds to ignore us, if index qualifies */
1446 if (safe_index)
1447 set_indexsafe_procflags();
1450 * The index is now visible, so we can report the OID. While on it,
1451 * include the report for the beginning of phase 2.
1454 const int progress_cols[] = {
1455 PROGRESS_CREATEIDX_INDEX_OID,
1456 PROGRESS_CREATEIDX_PHASE
1458 const int64 progress_vals[] = {
1459 indexRelationId,
1460 PROGRESS_CREATEIDX_PHASE_WAIT_1
1463 pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
1467 * Phase 2 of concurrent index build (see comments for validate_index()
1468 * for an overview of how this works)
1470 * Now we must wait until no running transaction could have the table open
1471 * with the old list of indexes. Use ShareLock to consider running
1472 * transactions that hold locks that permit writing to the table. Note we
1473 * do not need to worry about xacts that open the table for writing after
1474 * this point; they will see the new index when they open it.
1476 * Note: the reason we use actual lock acquisition here, rather than just
1477 * checking the ProcArray and sleeping, is that deadlock is possible if
1478 * one of the transactions in question is blocked trying to acquire an
1479 * exclusive lock on our table. The lock code will detect deadlock and
1480 * error out properly.
1482 WaitForLockers(heaplocktag, ShareLock, true);
1485 * At this moment we are sure that there are no transactions with the
1486 * table open for write that don't have this new index in their list of
1487 * indexes. We have waited out all the existing transactions and any new
1488 * transaction will have the new index in its list, but the index is still
1489 * marked as "not-ready-for-inserts". The index is consulted while
1490 * deciding HOT-safety though. This arrangement ensures that no new HOT
1491 * chains can be created where the new tuple and the old tuple in the
1492 * chain have different index keys.
1494 * We now take a new snapshot, and build the index using all tuples that
1495 * are visible in this snapshot. We can be sure that any HOT updates to
1496 * these tuples will be compatible with the index, since any updates made
1497 * by transactions that didn't know about the index are now committed or
1498 * rolled back. Thus, each visible tuple is either the end of its
1499 * HOT-chain or the extension of the chain is HOT-safe for this index.
1502 /* Set ActiveSnapshot since functions in the indexes may need it */
1503 PushActiveSnapshot(GetTransactionSnapshot());
1505 /* Perform concurrent build of index */
1506 index_concurrently_build(relationId, indexRelationId);
1508 /* we can do away with our snapshot */
1509 PopActiveSnapshot();
1512 * Commit this transaction to make the indisready update visible.
1514 CommitTransactionCommand();
1515 StartTransactionCommand();
1517 /* Tell concurrent index builds to ignore us, if index qualifies */
1518 if (safe_index)
1519 set_indexsafe_procflags();
1522 * Phase 3 of concurrent index build
1524 * We once again wait until no transaction can have the table open with
1525 * the index marked as read-only for updates.
1527 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1528 PROGRESS_CREATEIDX_PHASE_WAIT_2);
1529 WaitForLockers(heaplocktag, ShareLock, true);
1532 * Now take the "reference snapshot" that will be used by validate_index()
1533 * to filter candidate tuples. Beware! There might still be snapshots in
1534 * use that treat some transaction as in-progress that our reference
1535 * snapshot treats as committed. If such a recently-committed transaction
1536 * deleted tuples in the table, we will not include them in the index; yet
1537 * those transactions which see the deleting one as still-in-progress will
1538 * expect such tuples to be there once we mark the index as valid.
1540 * We solve this by waiting for all endangered transactions to exit before
1541 * we mark the index as valid.
1543 * We also set ActiveSnapshot to this snap, since functions in indexes may
1544 * need a snapshot.
1546 snapshot = RegisterSnapshot(GetTransactionSnapshot());
1547 PushActiveSnapshot(snapshot);
1550 * Scan the index and the heap, insert any missing index entries.
1552 validate_index(relationId, indexRelationId, snapshot);
1555 * Drop the reference snapshot. We must do this before waiting out other
1556 * snapshot holders, else we will deadlock against other processes also
1557 * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
1558 * they must wait for. But first, save the snapshot's xmin to use as
1559 * limitXmin for GetCurrentVirtualXIDs().
1561 limitXmin = snapshot->xmin;
1563 PopActiveSnapshot();
1564 UnregisterSnapshot(snapshot);
1567 * The snapshot subsystem could still contain registered snapshots that
1568 * are holding back our process's advertised xmin; in particular, if
1569 * default_transaction_isolation = serializable, there is a transaction
1570 * snapshot that is still active. The CatalogSnapshot is likewise a
1571 * hazard. To ensure no deadlocks, we must commit and start yet another
1572 * transaction, and do our wait before any snapshot has been taken in it.
1574 CommitTransactionCommand();
1575 StartTransactionCommand();
1577 /* Tell concurrent index builds to ignore us, if index qualifies */
1578 if (safe_index)
1579 set_indexsafe_procflags();
1581 /* We should now definitely not be advertising any xmin. */
1582 Assert(MyProc->xmin == InvalidTransactionId);
1585 * The index is now valid in the sense that it contains all currently
1586 * interesting tuples. But since it might not contain tuples deleted just
1587 * before the reference snap was taken, we have to wait out any
1588 * transactions that might have older snapshots.
1590 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1591 PROGRESS_CREATEIDX_PHASE_WAIT_3);
1592 WaitForOlderSnapshots(limitXmin, true);
1595 * Index can now be marked valid -- update its pg_index entry
1597 index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
1600 * The pg_index update will cause backends (including this one) to update
1601 * relcache entries for the index itself, but we should also send a
1602 * relcache inval on the parent table to force replanning of cached plans.
1603 * Otherwise existing sessions might fail to use the new index where it
1604 * would be useful. (Note that our earlier commits did not create reasons
1605 * to replan; so relcache flush on the index itself was sufficient.)
1607 CacheInvalidateRelcacheByRelid(heaprelid.relId);
1610 * Last thing to do is release the session-level lock on the parent table.
1612 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1614 pgstat_progress_end_command();
1616 return address;
1621 * CheckMutability
1622 * Test whether given expression is mutable
1624 static bool
1625 CheckMutability(Expr *expr)
1628 * First run the expression through the planner. This has a couple of
1629 * important consequences. First, function default arguments will get
1630 * inserted, which may affect volatility (consider "default now()").
1631 * Second, inline-able functions will get inlined, which may allow us to
1632 * conclude that the function is really less volatile than it's marked. As
1633 * an example, polymorphic functions must be marked with the most volatile
1634 * behavior that they have for any input type, but once we inline the
1635 * function we may be able to conclude that it's not so volatile for the
1636 * particular input type we're dealing with.
1638 * We assume here that expression_planner() won't scribble on its input.
1640 expr = expression_planner(expr);
1642 /* Now we can search for non-immutable functions */
1643 return contain_mutable_functions((Node *) expr);
1648 * CheckPredicate
1649 * Checks that the given partial-index predicate is valid.
1651 * This used to also constrain the form of the predicate to forms that
1652 * indxpath.c could do something with. However, that seems overly
1653 * restrictive. One useful application of partial indexes is to apply
1654 * a UNIQUE constraint across a subset of a table, and in that scenario
1655 * any evaluable predicate will work. So accept any predicate here
1656 * (except ones requiring a plan), and let indxpath.c fend for itself.
1658 static void
1659 CheckPredicate(Expr *predicate)
1662 * transformExpr() should have already rejected subqueries, aggregates,
1663 * and window functions, based on the EXPR_KIND_ for a predicate.
1667 * A predicate using mutable functions is probably wrong, for the same
1668 * reasons that we don't allow an index expression to use one.
1670 if (CheckMutability(predicate))
1671 ereport(ERROR,
1672 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1673 errmsg("functions in index predicate must be marked IMMUTABLE")));
1677 * Compute per-index-column information, including indexed column numbers
1678 * or index expressions, opclasses and their options. Note, all output vectors
1679 * should be allocated for all columns, including "including" ones.
1681 static void
1682 ComputeIndexAttrs(IndexInfo *indexInfo,
1683 Oid *typeOidP,
1684 Oid *collationOidP,
1685 Oid *classOidP,
1686 int16 *colOptionP,
1687 List *attList, /* list of IndexElem's */
1688 List *exclusionOpNames,
1689 Oid relId,
1690 const char *accessMethodName,
1691 Oid accessMethodId,
1692 bool amcanorder,
1693 bool isconstraint)
1695 ListCell *nextExclOp;
1696 ListCell *lc;
1697 int attn;
1698 int nkeycols = indexInfo->ii_NumIndexKeyAttrs;
1700 /* Allocate space for exclusion operator info, if needed */
1701 if (exclusionOpNames)
1703 Assert(list_length(exclusionOpNames) == nkeycols);
1704 indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
1705 indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
1706 indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
1707 nextExclOp = list_head(exclusionOpNames);
1709 else
1710 nextExclOp = NULL;
1713 * process attributeList
1715 attn = 0;
1716 foreach(lc, attList)
1718 IndexElem *attribute = (IndexElem *) lfirst(lc);
1719 Oid atttype;
1720 Oid attcollation;
1723 * Process the column-or-expression to be indexed.
1725 if (attribute->name != NULL)
1727 /* Simple index attribute */
1728 HeapTuple atttuple;
1729 Form_pg_attribute attform;
1731 Assert(attribute->expr == NULL);
1732 atttuple = SearchSysCacheAttName(relId, attribute->name);
1733 if (!HeapTupleIsValid(atttuple))
1735 /* difference in error message spellings is historical */
1736 if (isconstraint)
1737 ereport(ERROR,
1738 (errcode(ERRCODE_UNDEFINED_COLUMN),
1739 errmsg("column \"%s\" named in key does not exist",
1740 attribute->name)));
1741 else
1742 ereport(ERROR,
1743 (errcode(ERRCODE_UNDEFINED_COLUMN),
1744 errmsg("column \"%s\" does not exist",
1745 attribute->name)));
1747 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
1748 indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
1749 atttype = attform->atttypid;
1750 attcollation = attform->attcollation;
1751 ReleaseSysCache(atttuple);
1753 else
1755 /* Index expression */
1756 Node *expr = attribute->expr;
1758 Assert(expr != NULL);
1760 if (attn >= nkeycols)
1761 ereport(ERROR,
1762 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1763 errmsg("expressions are not supported in included columns")));
1764 atttype = exprType(expr);
1765 attcollation = exprCollation(expr);
1768 * Strip any top-level COLLATE clause. This ensures that we treat
1769 * "x COLLATE y" and "(x COLLATE y)" alike.
1771 while (IsA(expr, CollateExpr))
1772 expr = (Node *) ((CollateExpr *) expr)->arg;
1774 if (IsA(expr, Var) &&
1775 ((Var *) expr)->varattno != InvalidAttrNumber)
1778 * User wrote "(column)" or "(column COLLATE something)".
1779 * Treat it like simple attribute anyway.
1781 indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
1783 else
1785 indexInfo->ii_IndexAttrNumbers[attn] = 0; /* marks expression */
1786 indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
1787 expr);
1790 * transformExpr() should have already rejected subqueries,
1791 * aggregates, and window functions, based on the EXPR_KIND_
1792 * for an index expression.
1796 * An expression using mutable functions is probably wrong,
1797 * since if you aren't going to get the same result for the
1798 * same data every time, it's not clear what the index entries
1799 * mean at all.
1801 if (CheckMutability((Expr *) expr))
1802 ereport(ERROR,
1803 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1804 errmsg("functions in index expression must be marked IMMUTABLE")));
1808 typeOidP[attn] = atttype;
1811 * Included columns have no collation, no opclass and no ordering
1812 * options.
1814 if (attn >= nkeycols)
1816 if (attribute->collation)
1817 ereport(ERROR,
1818 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1819 errmsg("including column does not support a collation")));
1820 if (attribute->opclass)
1821 ereport(ERROR,
1822 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1823 errmsg("including column does not support an operator class")));
1824 if (attribute->ordering != SORTBY_DEFAULT)
1825 ereport(ERROR,
1826 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1827 errmsg("including column does not support ASC/DESC options")));
1828 if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1829 ereport(ERROR,
1830 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1831 errmsg("including column does not support NULLS FIRST/LAST options")));
1833 classOidP[attn] = InvalidOid;
1834 colOptionP[attn] = 0;
1835 collationOidP[attn] = InvalidOid;
1836 attn++;
1838 continue;
1842 * Apply collation override if any
1844 if (attribute->collation)
1845 attcollation = get_collation_oid(attribute->collation, false);
1848 * Check we have a collation iff it's a collatable type. The only
1849 * expected failures here are (1) COLLATE applied to a noncollatable
1850 * type, or (2) index expression had an unresolved collation. But we
1851 * might as well code this to be a complete consistency check.
1853 if (type_is_collatable(atttype))
1855 if (!OidIsValid(attcollation))
1856 ereport(ERROR,
1857 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1858 errmsg("could not determine which collation to use for index expression"),
1859 errhint("Use the COLLATE clause to set the collation explicitly.")));
1861 else
1863 if (OidIsValid(attcollation))
1864 ereport(ERROR,
1865 (errcode(ERRCODE_DATATYPE_MISMATCH),
1866 errmsg("collations are not supported by type %s",
1867 format_type_be(atttype))));
1870 collationOidP[attn] = attcollation;
1873 * Identify the opclass to use.
1875 classOidP[attn] = ResolveOpClass(attribute->opclass,
1876 atttype,
1877 accessMethodName,
1878 accessMethodId);
1881 * Identify the exclusion operator, if any.
1883 if (nextExclOp)
1885 List *opname = (List *) lfirst(nextExclOp);
1886 Oid opid;
1887 Oid opfamily;
1888 int strat;
1891 * Find the operator --- it must accept the column datatype
1892 * without runtime coercion (but binary compatibility is OK)
1894 opid = compatible_oper_opid(opname, atttype, atttype, false);
1897 * Only allow commutative operators to be used in exclusion
1898 * constraints. If X conflicts with Y, but Y does not conflict
1899 * with X, bad things will happen.
1901 if (get_commutator(opid) != opid)
1902 ereport(ERROR,
1903 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1904 errmsg("operator %s is not commutative",
1905 format_operator(opid)),
1906 errdetail("Only commutative operators can be used in exclusion constraints.")));
1909 * Operator must be a member of the right opfamily, too
1911 opfamily = get_opclass_family(classOidP[attn]);
1912 strat = get_op_opfamily_strategy(opid, opfamily);
1913 if (strat == 0)
1915 HeapTuple opftuple;
1916 Form_pg_opfamily opfform;
1919 * attribute->opclass might not explicitly name the opfamily,
1920 * so fetch the name of the selected opfamily for use in the
1921 * error message.
1923 opftuple = SearchSysCache1(OPFAMILYOID,
1924 ObjectIdGetDatum(opfamily));
1925 if (!HeapTupleIsValid(opftuple))
1926 elog(ERROR, "cache lookup failed for opfamily %u",
1927 opfamily);
1928 opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
1930 ereport(ERROR,
1931 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1932 errmsg("operator %s is not a member of operator family \"%s\"",
1933 format_operator(opid),
1934 NameStr(opfform->opfname)),
1935 errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
1938 indexInfo->ii_ExclusionOps[attn] = opid;
1939 indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
1940 indexInfo->ii_ExclusionStrats[attn] = strat;
1941 nextExclOp = lnext(exclusionOpNames, nextExclOp);
1945 * Set up the per-column options (indoption field). For now, this is
1946 * zero for any un-ordered index, while ordered indexes have DESC and
1947 * NULLS FIRST/LAST options.
1949 colOptionP[attn] = 0;
1950 if (amcanorder)
1952 /* default ordering is ASC */
1953 if (attribute->ordering == SORTBY_DESC)
1954 colOptionP[attn] |= INDOPTION_DESC;
1955 /* default null ordering is LAST for ASC, FIRST for DESC */
1956 if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
1958 if (attribute->ordering == SORTBY_DESC)
1959 colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1961 else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
1962 colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1964 else
1966 /* index AM does not support ordering */
1967 if (attribute->ordering != SORTBY_DEFAULT)
1968 ereport(ERROR,
1969 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1970 errmsg("access method \"%s\" does not support ASC/DESC options",
1971 accessMethodName)));
1972 if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1973 ereport(ERROR,
1974 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1975 errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
1976 accessMethodName)));
1979 /* Set up the per-column opclass options (attoptions field). */
1980 if (attribute->opclassopts)
1982 Assert(attn < nkeycols);
1984 if (!indexInfo->ii_OpclassOptions)
1985 indexInfo->ii_OpclassOptions =
1986 palloc0(sizeof(Datum) * indexInfo->ii_NumIndexAttrs);
1988 indexInfo->ii_OpclassOptions[attn] =
1989 transformRelOptions((Datum) 0, attribute->opclassopts,
1990 NULL, NULL, false, false);
1993 attn++;
1998 * Resolve possibly-defaulted operator class specification
2000 * Note: This is used to resolve operator class specifications in index and
2001 * partition key definitions.
2004 ResolveOpClass(List *opclass, Oid attrType,
2005 const char *accessMethodName, Oid accessMethodId)
2007 char *schemaname;
2008 char *opcname;
2009 HeapTuple tuple;
2010 Form_pg_opclass opform;
2011 Oid opClassId,
2012 opInputType;
2014 if (opclass == NIL)
2016 /* no operator class specified, so find the default */
2017 opClassId = GetDefaultOpClass(attrType, accessMethodId);
2018 if (!OidIsValid(opClassId))
2019 ereport(ERROR,
2020 (errcode(ERRCODE_UNDEFINED_OBJECT),
2021 errmsg("data type %s has no default operator class for access method \"%s\"",
2022 format_type_be(attrType), accessMethodName),
2023 errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
2024 return opClassId;
2028 * Specific opclass name given, so look up the opclass.
2031 /* deconstruct the name list */
2032 DeconstructQualifiedName(opclass, &schemaname, &opcname);
2034 if (schemaname)
2036 /* Look in specific schema only */
2037 Oid namespaceId;
2039 namespaceId = LookupExplicitNamespace(schemaname, false);
2040 tuple = SearchSysCache3(CLAAMNAMENSP,
2041 ObjectIdGetDatum(accessMethodId),
2042 PointerGetDatum(opcname),
2043 ObjectIdGetDatum(namespaceId));
2045 else
2047 /* Unqualified opclass name, so search the search path */
2048 opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
2049 if (!OidIsValid(opClassId))
2050 ereport(ERROR,
2051 (errcode(ERRCODE_UNDEFINED_OBJECT),
2052 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
2053 opcname, accessMethodName)));
2054 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
2057 if (!HeapTupleIsValid(tuple))
2058 ereport(ERROR,
2059 (errcode(ERRCODE_UNDEFINED_OBJECT),
2060 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
2061 NameListToString(opclass), accessMethodName)));
2064 * Verify that the index operator class accepts this datatype. Note we
2065 * will accept binary compatibility.
2067 opform = (Form_pg_opclass) GETSTRUCT(tuple);
2068 opClassId = opform->oid;
2069 opInputType = opform->opcintype;
2071 if (!IsBinaryCoercible(attrType, opInputType))
2072 ereport(ERROR,
2073 (errcode(ERRCODE_DATATYPE_MISMATCH),
2074 errmsg("operator class \"%s\" does not accept data type %s",
2075 NameListToString(opclass), format_type_be(attrType))));
2077 ReleaseSysCache(tuple);
2079 return opClassId;
2083 * GetDefaultOpClass
2085 * Given the OIDs of a datatype and an access method, find the default
2086 * operator class, if any. Returns InvalidOid if there is none.
2089 GetDefaultOpClass(Oid type_id, Oid am_id)
2091 Oid result = InvalidOid;
2092 int nexact = 0;
2093 int ncompatible = 0;
2094 int ncompatiblepreferred = 0;
2095 Relation rel;
2096 ScanKeyData skey[1];
2097 SysScanDesc scan;
2098 HeapTuple tup;
2099 TYPCATEGORY tcategory;
2101 /* If it's a domain, look at the base type instead */
2102 type_id = getBaseType(type_id);
2104 tcategory = TypeCategory(type_id);
2107 * We scan through all the opclasses available for the access method,
2108 * looking for one that is marked default and matches the target type
2109 * (either exactly or binary-compatibly, but prefer an exact match).
2111 * We could find more than one binary-compatible match. If just one is
2112 * for a preferred type, use that one; otherwise we fail, forcing the user
2113 * to specify which one he wants. (The preferred-type special case is a
2114 * kluge for varchar: it's binary-compatible to both text and bpchar, so
2115 * we need a tiebreaker.) If we find more than one exact match, then
2116 * someone put bogus entries in pg_opclass.
2118 rel = table_open(OperatorClassRelationId, AccessShareLock);
2120 ScanKeyInit(&skey[0],
2121 Anum_pg_opclass_opcmethod,
2122 BTEqualStrategyNumber, F_OIDEQ,
2123 ObjectIdGetDatum(am_id));
2125 scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
2126 NULL, 1, skey);
2128 while (HeapTupleIsValid(tup = systable_getnext(scan)))
2130 Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
2132 /* ignore altogether if not a default opclass */
2133 if (!opclass->opcdefault)
2134 continue;
2135 if (opclass->opcintype == type_id)
2137 nexact++;
2138 result = opclass->oid;
2140 else if (nexact == 0 &&
2141 IsBinaryCoercible(type_id, opclass->opcintype))
2143 if (IsPreferredType(tcategory, opclass->opcintype))
2145 ncompatiblepreferred++;
2146 result = opclass->oid;
2148 else if (ncompatiblepreferred == 0)
2150 ncompatible++;
2151 result = opclass->oid;
2156 systable_endscan(scan);
2158 table_close(rel, AccessShareLock);
2160 /* raise error if pg_opclass contains inconsistent data */
2161 if (nexact > 1)
2162 ereport(ERROR,
2163 (errcode(ERRCODE_DUPLICATE_OBJECT),
2164 errmsg("there are multiple default operator classes for data type %s",
2165 format_type_be(type_id))));
2167 if (nexact == 1 ||
2168 ncompatiblepreferred == 1 ||
2169 (ncompatiblepreferred == 0 && ncompatible == 1))
2170 return result;
2172 return InvalidOid;
2176 * makeObjectName()
2178 * Create a name for an implicitly created index, sequence, constraint,
2179 * extended statistics, etc.
2181 * The parameters are typically: the original table name, the original field
2182 * name, and a "type" string (such as "seq" or "pkey"). The field name
2183 * and/or type can be NULL if not relevant.
2185 * The result is a palloc'd string.
2187 * The basic result we want is "name1_name2_label", omitting "_name2" or
2188 * "_label" when those parameters are NULL. However, we must generate
2189 * a name with less than NAMEDATALEN characters! So, we truncate one or
2190 * both names if necessary to make a short-enough string. The label part
2191 * is never truncated (so it had better be reasonably short).
2193 * The caller is responsible for checking uniqueness of the generated
2194 * name and retrying as needed; retrying will be done by altering the
2195 * "label" string (which is why we never truncate that part).
2197 char *
2198 makeObjectName(const char *name1, const char *name2, const char *label)
2200 char *name;
2201 int overhead = 0; /* chars needed for label and underscores */
2202 int availchars; /* chars available for name(s) */
2203 int name1chars; /* chars allocated to name1 */
2204 int name2chars; /* chars allocated to name2 */
2205 int ndx;
2207 name1chars = strlen(name1);
2208 if (name2)
2210 name2chars = strlen(name2);
2211 overhead++; /* allow for separating underscore */
2213 else
2214 name2chars = 0;
2215 if (label)
2216 overhead += strlen(label) + 1;
2218 availchars = NAMEDATALEN - 1 - overhead;
2219 Assert(availchars > 0); /* else caller chose a bad label */
2222 * If we must truncate, preferentially truncate the longer name. This
2223 * logic could be expressed without a loop, but it's simple and obvious as
2224 * a loop.
2226 while (name1chars + name2chars > availchars)
2228 if (name1chars > name2chars)
2229 name1chars--;
2230 else
2231 name2chars--;
2234 name1chars = pg_mbcliplen(name1, name1chars, name1chars);
2235 if (name2)
2236 name2chars = pg_mbcliplen(name2, name2chars, name2chars);
2238 /* Now construct the string using the chosen lengths */
2239 name = palloc(name1chars + name2chars + overhead + 1);
2240 memcpy(name, name1, name1chars);
2241 ndx = name1chars;
2242 if (name2)
2244 name[ndx++] = '_';
2245 memcpy(name + ndx, name2, name2chars);
2246 ndx += name2chars;
2248 if (label)
2250 name[ndx++] = '_';
2251 strcpy(name + ndx, label);
2253 else
2254 name[ndx] = '\0';
2256 return name;
2260 * Select a nonconflicting name for a new relation. This is ordinarily
2261 * used to choose index names (which is why it's here) but it can also
2262 * be used for sequences, or any autogenerated relation kind.
2264 * name1, name2, and label are used the same way as for makeObjectName(),
2265 * except that the label can't be NULL; digits will be appended to the label
2266 * if needed to create a name that is unique within the specified namespace.
2268 * If isconstraint is true, we also avoid choosing a name matching any
2269 * existing constraint in the same namespace. (This is stricter than what
2270 * Postgres itself requires, but the SQL standard says that constraint names
2271 * should be unique within schemas, so we follow that for autogenerated
2272 * constraint names.)
2274 * Note: it is theoretically possible to get a collision anyway, if someone
2275 * else chooses the same name concurrently. This is fairly unlikely to be
2276 * a problem in practice, especially if one is holding an exclusive lock on
2277 * the relation identified by name1. However, if choosing multiple names
2278 * within a single command, you'd better create the new object and do
2279 * CommandCounterIncrement before choosing the next one!
2281 * Returns a palloc'd string.
2283 char *
2284 ChooseRelationName(const char *name1, const char *name2,
2285 const char *label, Oid namespaceid,
2286 bool isconstraint)
2288 int pass = 0;
2289 char *relname = NULL;
2290 char modlabel[NAMEDATALEN];
2292 /* try the unmodified label first */
2293 strlcpy(modlabel, label, sizeof(modlabel));
2295 for (;;)
2297 relname = makeObjectName(name1, name2, modlabel);
2299 if (!OidIsValid(get_relname_relid(relname, namespaceid)))
2301 if (!isconstraint ||
2302 !ConstraintNameExists(relname, namespaceid))
2303 break;
2306 /* found a conflict, so try a new name component */
2307 pfree(relname);
2308 snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
2311 return relname;
2315 * Select the name to be used for an index.
2317 * The argument list is pretty ad-hoc :-(
2319 static char *
2320 ChooseIndexName(const char *tabname, Oid namespaceId,
2321 List *colnames, List *exclusionOpNames,
2322 bool primary, bool isconstraint)
2324 char *indexname;
2326 if (primary)
2328 /* the primary key's name does not depend on the specific column(s) */
2329 indexname = ChooseRelationName(tabname,
2330 NULL,
2331 "pkey",
2332 namespaceId,
2333 true);
2335 else if (exclusionOpNames != NIL)
2337 indexname = ChooseRelationName(tabname,
2338 ChooseIndexNameAddition(colnames),
2339 "excl",
2340 namespaceId,
2341 true);
2343 else if (isconstraint)
2345 indexname = ChooseRelationName(tabname,
2346 ChooseIndexNameAddition(colnames),
2347 "key",
2348 namespaceId,
2349 true);
2351 else
2353 indexname = ChooseRelationName(tabname,
2354 ChooseIndexNameAddition(colnames),
2355 "idx",
2356 namespaceId,
2357 false);
2360 return indexname;
2364 * Generate "name2" for a new index given the list of column names for it
2365 * (as produced by ChooseIndexColumnNames). This will be passed to
2366 * ChooseRelationName along with the parent table name and a suitable label.
2368 * We know that less than NAMEDATALEN characters will actually be used,
2369 * so we can truncate the result once we've generated that many.
2371 * XXX See also ChooseForeignKeyConstraintNameAddition and
2372 * ChooseExtendedStatisticNameAddition.
2374 static char *
2375 ChooseIndexNameAddition(List *colnames)
2377 char buf[NAMEDATALEN * 2];
2378 int buflen = 0;
2379 ListCell *lc;
2381 buf[0] = '\0';
2382 foreach(lc, colnames)
2384 const char *name = (const char *) lfirst(lc);
2386 if (buflen > 0)
2387 buf[buflen++] = '_'; /* insert _ between names */
2390 * At this point we have buflen <= NAMEDATALEN. name should be less
2391 * than NAMEDATALEN already, but use strlcpy for paranoia.
2393 strlcpy(buf + buflen, name, NAMEDATALEN);
2394 buflen += strlen(buf + buflen);
2395 if (buflen >= NAMEDATALEN)
2396 break;
2398 return pstrdup(buf);
2402 * Select the actual names to be used for the columns of an index, given the
2403 * list of IndexElems for the columns. This is mostly about ensuring the
2404 * names are unique so we don't get a conflicting-attribute-names error.
2406 * Returns a List of plain strings (char *, not String nodes).
2408 static List *
2409 ChooseIndexColumnNames(List *indexElems)
2411 List *result = NIL;
2412 ListCell *lc;
2414 foreach(lc, indexElems)
2416 IndexElem *ielem = (IndexElem *) lfirst(lc);
2417 const char *origname;
2418 const char *curname;
2419 int i;
2420 char buf[NAMEDATALEN];
2422 /* Get the preliminary name from the IndexElem */
2423 if (ielem->indexcolname)
2424 origname = ielem->indexcolname; /* caller-specified name */
2425 else if (ielem->name)
2426 origname = ielem->name; /* simple column reference */
2427 else
2428 origname = "expr"; /* default name for expression */
2430 /* If it conflicts with any previous column, tweak it */
2431 curname = origname;
2432 for (i = 1;; i++)
2434 ListCell *lc2;
2435 char nbuf[32];
2436 int nlen;
2438 foreach(lc2, result)
2440 if (strcmp(curname, (char *) lfirst(lc2)) == 0)
2441 break;
2443 if (lc2 == NULL)
2444 break; /* found nonconflicting name */
2446 sprintf(nbuf, "%d", i);
2448 /* Ensure generated names are shorter than NAMEDATALEN */
2449 nlen = pg_mbcliplen(origname, strlen(origname),
2450 NAMEDATALEN - 1 - strlen(nbuf));
2451 memcpy(buf, origname, nlen);
2452 strcpy(buf + nlen, nbuf);
2453 curname = buf;
2456 /* And attach to the result list */
2457 result = lappend(result, pstrdup(curname));
2459 return result;
2463 * ExecReindex
2465 * Primary entry point for manual REINDEX commands. This is mainly a
2466 * preparation wrapper for the real operations that will happen in
2467 * each subroutine of REINDEX.
2469 void
2470 ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel)
2472 ReindexParams params = {0};
2473 ListCell *lc;
2474 bool concurrently = false;
2475 bool verbose = false;
2476 char *tablespacename = NULL;
2478 /* Parse option list */
2479 foreach(lc, stmt->params)
2481 DefElem *opt = (DefElem *) lfirst(lc);
2483 if (strcmp(opt->defname, "verbose") == 0)
2484 verbose = defGetBoolean(opt);
2485 else if (strcmp(opt->defname, "concurrently") == 0)
2486 concurrently = defGetBoolean(opt);
2487 else if (strcmp(opt->defname, "tablespace") == 0)
2488 tablespacename = defGetString(opt);
2489 else
2490 ereport(ERROR,
2491 (errcode(ERRCODE_SYNTAX_ERROR),
2492 errmsg("unrecognized REINDEX option \"%s\"",
2493 opt->defname),
2494 parser_errposition(pstate, opt->location)));
2497 if (concurrently)
2498 PreventInTransactionBlock(isTopLevel,
2499 "REINDEX CONCURRENTLY");
2501 params.options =
2502 (verbose ? REINDEXOPT_VERBOSE : 0) |
2503 (concurrently ? REINDEXOPT_CONCURRENTLY : 0);
2506 * Assign the tablespace OID to move indexes to, with InvalidOid to do
2507 * nothing.
2509 if (tablespacename != NULL)
2511 params.tablespaceOid = get_tablespace_oid(tablespacename, false);
2513 /* Check permissions except when moving to database's default */
2514 if (OidIsValid(params.tablespaceOid) &&
2515 params.tablespaceOid != MyDatabaseTableSpace)
2517 AclResult aclresult;
2519 aclresult = pg_tablespace_aclcheck(params.tablespaceOid,
2520 GetUserId(), ACL_CREATE);
2521 if (aclresult != ACLCHECK_OK)
2522 aclcheck_error(aclresult, OBJECT_TABLESPACE,
2523 get_tablespace_name(params.tablespaceOid));
2526 else
2527 params.tablespaceOid = InvalidOid;
2529 switch (stmt->kind)
2531 case REINDEX_OBJECT_INDEX:
2532 ReindexIndex(stmt->relation, &params, isTopLevel);
2533 break;
2534 case REINDEX_OBJECT_TABLE:
2535 ReindexTable(stmt->relation, &params, isTopLevel);
2536 break;
2537 case REINDEX_OBJECT_SCHEMA:
2538 case REINDEX_OBJECT_SYSTEM:
2539 case REINDEX_OBJECT_DATABASE:
2542 * This cannot run inside a user transaction block; if we were
2543 * inside a transaction, then its commit- and
2544 * start-transaction-command calls would not have the intended
2545 * effect!
2547 PreventInTransactionBlock(isTopLevel,
2548 (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" :
2549 (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" :
2550 "REINDEX DATABASE");
2551 ReindexMultipleTables(stmt->name, stmt->kind, &params);
2552 break;
2553 default:
2554 elog(ERROR, "unrecognized object type: %d",
2555 (int) stmt->kind);
2556 break;
2561 * ReindexIndex
2562 * Recreate a specific index.
2564 static void
2565 ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
2567 struct ReindexIndexCallbackState state;
2568 Oid indOid;
2569 char persistence;
2570 char relkind;
2573 * Find and lock index, and check permissions on table; use callback to
2574 * obtain lock on table first, to avoid deadlock hazard. The lock level
2575 * used here must match the index lock obtained in reindex_index().
2577 * If it's a temporary index, we will perform a non-concurrent reindex,
2578 * even if CONCURRENTLY was requested. In that case, reindex_index() will
2579 * upgrade the lock, but that's OK, because other sessions can't hold
2580 * locks on our temporary table.
2582 state.params = *params;
2583 state.locked_table_oid = InvalidOid;
2584 indOid = RangeVarGetRelidExtended(indexRelation,
2585 (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
2586 ShareUpdateExclusiveLock : AccessExclusiveLock,
2588 RangeVarCallbackForReindexIndex,
2589 &state);
2592 * Obtain the current persistence and kind of the existing index. We
2593 * already hold a lock on the index.
2595 persistence = get_rel_persistence(indOid);
2596 relkind = get_rel_relkind(indOid);
2598 if (relkind == RELKIND_PARTITIONED_INDEX)
2599 ReindexPartitions(indOid, params, isTopLevel);
2600 else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2601 persistence != RELPERSISTENCE_TEMP)
2602 ReindexRelationConcurrently(indOid, params);
2603 else
2605 ReindexParams newparams = *params;
2607 newparams.options |= REINDEXOPT_REPORT_PROGRESS;
2608 reindex_index(indOid, false, persistence, &newparams);
2613 * Check permissions on table before acquiring relation lock; also lock
2614 * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
2615 * deadlocks.
2617 static void
2618 RangeVarCallbackForReindexIndex(const RangeVar *relation,
2619 Oid relId, Oid oldRelId, void *arg)
2621 char relkind;
2622 struct ReindexIndexCallbackState *state = arg;
2623 LOCKMODE table_lockmode;
2626 * Lock level here should match table lock in reindex_index() for
2627 * non-concurrent case and table locks used by index_concurrently_*() for
2628 * concurrent case.
2630 table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ?
2631 ShareUpdateExclusiveLock : ShareLock;
2634 * If we previously locked some other index's heap, and the name we're
2635 * looking up no longer refers to that relation, release the now-useless
2636 * lock.
2638 if (relId != oldRelId && OidIsValid(oldRelId))
2640 UnlockRelationOid(state->locked_table_oid, table_lockmode);
2641 state->locked_table_oid = InvalidOid;
2644 /* If the relation does not exist, there's nothing more to do. */
2645 if (!OidIsValid(relId))
2646 return;
2649 * If the relation does exist, check whether it's an index. But note that
2650 * the relation might have been dropped between the time we did the name
2651 * lookup and now. In that case, there's nothing to do.
2653 relkind = get_rel_relkind(relId);
2654 if (!relkind)
2655 return;
2656 if (relkind != RELKIND_INDEX &&
2657 relkind != RELKIND_PARTITIONED_INDEX)
2658 ereport(ERROR,
2659 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
2660 errmsg("\"%s\" is not an index", relation->relname)));
2662 /* Check permissions */
2663 if (!pg_class_ownercheck(relId, GetUserId()))
2664 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
2666 /* Lock heap before index to avoid deadlock. */
2667 if (relId != oldRelId)
2669 Oid table_oid = IndexGetRelation(relId, true);
2672 * If the OID isn't valid, it means the index was concurrently
2673 * dropped, which is not a problem for us; just return normally.
2675 if (OidIsValid(table_oid))
2677 LockRelationOid(table_oid, table_lockmode);
2678 state->locked_table_oid = table_oid;
2684 * ReindexTable
2685 * Recreate all indexes of a table (and of its toast table, if any)
2687 static Oid
2688 ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel)
2690 Oid heapOid;
2691 bool result;
2694 * The lock level used here should match reindex_relation().
2696 * If it's a temporary table, we will perform a non-concurrent reindex,
2697 * even if CONCURRENTLY was requested. In that case, reindex_relation()
2698 * will upgrade the lock, but that's OK, because other sessions can't hold
2699 * locks on our temporary table.
2701 heapOid = RangeVarGetRelidExtended(relation,
2702 (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
2703 ShareUpdateExclusiveLock : ShareLock,
2705 RangeVarCallbackOwnsTable, NULL);
2707 if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
2708 ReindexPartitions(heapOid, params, isTopLevel);
2709 else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2710 get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
2712 result = ReindexRelationConcurrently(heapOid, params);
2714 if (!result)
2715 ereport(NOTICE,
2716 (errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
2717 relation->relname)));
2719 else
2721 ReindexParams newparams = *params;
2723 newparams.options |= REINDEXOPT_REPORT_PROGRESS;
2724 result = reindex_relation(heapOid,
2725 REINDEX_REL_PROCESS_TOAST |
2726 REINDEX_REL_CHECK_CONSTRAINTS,
2727 &newparams);
2728 if (!result)
2729 ereport(NOTICE,
2730 (errmsg("table \"%s\" has no indexes to reindex",
2731 relation->relname)));
2734 return heapOid;
2738 * ReindexMultipleTables
2739 * Recreate indexes of tables selected by objectName/objectKind.
2741 * To reduce the probability of deadlocks, each table is reindexed in a
2742 * separate transaction, so we can release the lock on it right away.
2743 * That means this must not be called within a user transaction block!
2745 static void
2746 ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
2747 ReindexParams *params)
2749 Oid objectOid;
2750 Relation relationRelation;
2751 TableScanDesc scan;
2752 ScanKeyData scan_keys[1];
2753 HeapTuple tuple;
2754 MemoryContext private_context;
2755 MemoryContext old;
2756 List *relids = NIL;
2757 int num_keys;
2758 bool concurrent_warning = false;
2759 bool tablespace_warning = false;
2761 AssertArg(objectName);
2762 Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
2763 objectKind == REINDEX_OBJECT_SYSTEM ||
2764 objectKind == REINDEX_OBJECT_DATABASE);
2766 if (objectKind == REINDEX_OBJECT_SYSTEM &&
2767 (params->options & REINDEXOPT_CONCURRENTLY) != 0)
2768 ereport(ERROR,
2769 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2770 errmsg("cannot reindex system catalogs concurrently")));
2773 * Get OID of object to reindex, being the database currently being used
2774 * by session for a database or for system catalogs, or the schema defined
2775 * by caller. At the same time do permission checks that need different
2776 * processing depending on the object type.
2778 if (objectKind == REINDEX_OBJECT_SCHEMA)
2780 objectOid = get_namespace_oid(objectName, false);
2782 if (!pg_namespace_ownercheck(objectOid, GetUserId()))
2783 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
2784 objectName);
2786 else
2788 objectOid = MyDatabaseId;
2790 if (strcmp(objectName, get_database_name(objectOid)) != 0)
2791 ereport(ERROR,
2792 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2793 errmsg("can only reindex the currently open database")));
2794 if (!pg_database_ownercheck(objectOid, GetUserId()))
2795 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
2796 objectName);
2800 * Create a memory context that will survive forced transaction commits we
2801 * do below. Since it is a child of PortalContext, it will go away
2802 * eventually even if we suffer an error; there's no need for special
2803 * abort cleanup logic.
2805 private_context = AllocSetContextCreate(PortalContext,
2806 "ReindexMultipleTables",
2807 ALLOCSET_SMALL_SIZES);
2810 * Define the search keys to find the objects to reindex. For a schema, we
2811 * select target relations using relnamespace, something not necessary for
2812 * a database-wide operation.
2814 if (objectKind == REINDEX_OBJECT_SCHEMA)
2816 num_keys = 1;
2817 ScanKeyInit(&scan_keys[0],
2818 Anum_pg_class_relnamespace,
2819 BTEqualStrategyNumber, F_OIDEQ,
2820 ObjectIdGetDatum(objectOid));
2822 else
2823 num_keys = 0;
2826 * Scan pg_class to build a list of the relations we need to reindex.
2828 * We only consider plain relations and materialized views here (toast
2829 * rels will be processed indirectly by reindex_relation).
2831 relationRelation = table_open(RelationRelationId, AccessShareLock);
2832 scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
2833 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2835 Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
2836 Oid relid = classtuple->oid;
2839 * Only regular tables and matviews can have indexes, so ignore any
2840 * other kind of relation.
2842 * Partitioned tables/indexes are skipped but matching leaf partitions
2843 * are processed.
2845 if (classtuple->relkind != RELKIND_RELATION &&
2846 classtuple->relkind != RELKIND_MATVIEW)
2847 continue;
2849 /* Skip temp tables of other backends; we can't reindex them at all */
2850 if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
2851 !isTempNamespace(classtuple->relnamespace))
2852 continue;
2854 /* Check user/system classification, and optionally skip */
2855 if (objectKind == REINDEX_OBJECT_SYSTEM &&
2856 !IsSystemClass(relid, classtuple))
2857 continue;
2860 * The table can be reindexed if the user is superuser, the table
2861 * owner, or the database/schema owner (but in the latter case, only
2862 * if it's not a shared relation). pg_class_ownercheck includes the
2863 * superuser case, and depending on objectKind we already know that
2864 * the user has permission to run REINDEX on this database or schema
2865 * per the permission checks at the beginning of this routine.
2867 if (classtuple->relisshared &&
2868 !pg_class_ownercheck(relid, GetUserId()))
2869 continue;
2872 * Skip system tables, since index_create() would reject indexing them
2873 * concurrently (and it would likely fail if we tried).
2875 if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2876 IsCatalogRelationOid(relid))
2878 if (!concurrent_warning)
2879 ereport(WARNING,
2880 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2881 errmsg("cannot reindex system catalogs concurrently, skipping all")));
2882 concurrent_warning = true;
2883 continue;
2887 * If a new tablespace is set, check if this relation has to be
2888 * skipped.
2890 if (OidIsValid(params->tablespaceOid))
2892 bool skip_rel = false;
2895 * Mapped relations cannot be moved to different tablespaces (in
2896 * particular this eliminates all shared catalogs.).
2898 if (RELKIND_HAS_STORAGE(classtuple->relkind) &&
2899 !OidIsValid(classtuple->relfilenode))
2900 skip_rel = true;
2903 * A system relation is always skipped, even with
2904 * allow_system_table_mods enabled.
2906 if (IsSystemClass(relid, classtuple))
2907 skip_rel = true;
2909 if (skip_rel)
2911 if (!tablespace_warning)
2912 ereport(WARNING,
2913 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2914 errmsg("cannot move system relations, skipping all")));
2915 tablespace_warning = true;
2916 continue;
2920 /* Save the list of relation OIDs in private context */
2921 old = MemoryContextSwitchTo(private_context);
2924 * We always want to reindex pg_class first if it's selected to be
2925 * reindexed. This ensures that if there is any corruption in
2926 * pg_class' indexes, they will be fixed before we process any other
2927 * tables. This is critical because reindexing itself will try to
2928 * update pg_class.
2930 if (relid == RelationRelationId)
2931 relids = lcons_oid(relid, relids);
2932 else
2933 relids = lappend_oid(relids, relid);
2935 MemoryContextSwitchTo(old);
2937 table_endscan(scan);
2938 table_close(relationRelation, AccessShareLock);
2941 * Process each relation listed in a separate transaction. Note that this
2942 * commits and then starts a new transaction immediately.
2944 ReindexMultipleInternal(relids, params);
2946 MemoryContextDelete(private_context);
2950 * Error callback specific to ReindexPartitions().
2952 static void
2953 reindex_error_callback(void *arg)
2955 ReindexErrorInfo *errinfo = (ReindexErrorInfo *) arg;
2957 Assert(RELKIND_HAS_PARTITIONS(errinfo->relkind));
2959 if (errinfo->relkind == RELKIND_PARTITIONED_TABLE)
2960 errcontext("while reindexing partitioned table \"%s.%s\"",
2961 errinfo->relnamespace, errinfo->relname);
2962 else if (errinfo->relkind == RELKIND_PARTITIONED_INDEX)
2963 errcontext("while reindexing partitioned index \"%s.%s\"",
2964 errinfo->relnamespace, errinfo->relname);
2968 * ReindexPartitions
2970 * Reindex a set of partitions, per the partitioned index or table given
2971 * by the caller.
2973 static void
2974 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
2976 List *partitions = NIL;
2977 char relkind = get_rel_relkind(relid);
2978 char *relname = get_rel_name(relid);
2979 char *relnamespace = get_namespace_name(get_rel_namespace(relid));
2980 MemoryContext reindex_context;
2981 List *inhoids;
2982 ListCell *lc;
2983 ErrorContextCallback errcallback;
2984 ReindexErrorInfo errinfo;
2986 Assert(RELKIND_HAS_PARTITIONS(relkind));
2989 * Check if this runs in a transaction block, with an error callback to
2990 * provide more context under which a problem happens.
2992 errinfo.relname = pstrdup(relname);
2993 errinfo.relnamespace = pstrdup(relnamespace);
2994 errinfo.relkind = relkind;
2995 errcallback.callback = reindex_error_callback;
2996 errcallback.arg = (void *) &errinfo;
2997 errcallback.previous = error_context_stack;
2998 error_context_stack = &errcallback;
3000 PreventInTransactionBlock(isTopLevel,
3001 relkind == RELKIND_PARTITIONED_TABLE ?
3002 "REINDEX TABLE" : "REINDEX INDEX");
3004 /* Pop the error context stack */
3005 error_context_stack = errcallback.previous;
3008 * Create special memory context for cross-transaction storage.
3010 * Since it is a child of PortalContext, it will go away eventually even
3011 * if we suffer an error so there is no need for special abort cleanup
3012 * logic.
3014 reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
3015 ALLOCSET_DEFAULT_SIZES);
3017 /* ShareLock is enough to prevent schema modifications */
3018 inhoids = find_all_inheritors(relid, ShareLock, NULL);
3021 * The list of relations to reindex are the physical partitions of the
3022 * tree so discard any partitioned table or index.
3024 foreach(lc, inhoids)
3026 Oid partoid = lfirst_oid(lc);
3027 char partkind = get_rel_relkind(partoid);
3028 MemoryContext old_context;
3031 * This discards partitioned tables, partitioned indexes and foreign
3032 * tables.
3034 if (!RELKIND_HAS_STORAGE(partkind))
3035 continue;
3037 Assert(partkind == RELKIND_INDEX ||
3038 partkind == RELKIND_RELATION);
3040 /* Save partition OID */
3041 old_context = MemoryContextSwitchTo(reindex_context);
3042 partitions = lappend_oid(partitions, partoid);
3043 MemoryContextSwitchTo(old_context);
3047 * Process each partition listed in a separate transaction. Note that
3048 * this commits and then starts a new transaction immediately.
3050 ReindexMultipleInternal(partitions, params);
3053 * Clean up working storage --- note we must do this after
3054 * StartTransactionCommand, else we might be trying to delete the active
3055 * context!
3057 MemoryContextDelete(reindex_context);
3061 * ReindexMultipleInternal
3063 * Reindex a list of relations, each one being processed in its own
3064 * transaction. This commits the existing transaction immediately,
3065 * and starts a new transaction when finished.
3067 static void
3068 ReindexMultipleInternal(List *relids, ReindexParams *params)
3070 ListCell *l;
3072 PopActiveSnapshot();
3073 CommitTransactionCommand();
3075 foreach(l, relids)
3077 Oid relid = lfirst_oid(l);
3078 char relkind;
3079 char relpersistence;
3081 StartTransactionCommand();
3083 /* functions in indexes may want a snapshot set */
3084 PushActiveSnapshot(GetTransactionSnapshot());
3086 /* check if the relation still exists */
3087 if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid)))
3089 PopActiveSnapshot();
3090 CommitTransactionCommand();
3091 continue;
3095 * Check permissions except when moving to database's default if a new
3096 * tablespace is chosen. Note that this check also happens in
3097 * ExecReindex(), but we do an extra check here as this runs across
3098 * multiple transactions.
3100 if (OidIsValid(params->tablespaceOid) &&
3101 params->tablespaceOid != MyDatabaseTableSpace)
3103 AclResult aclresult;
3105 aclresult = pg_tablespace_aclcheck(params->tablespaceOid,
3106 GetUserId(), ACL_CREATE);
3107 if (aclresult != ACLCHECK_OK)
3108 aclcheck_error(aclresult, OBJECT_TABLESPACE,
3109 get_tablespace_name(params->tablespaceOid));
3112 relkind = get_rel_relkind(relid);
3113 relpersistence = get_rel_persistence(relid);
3116 * Partitioned tables and indexes can never be processed directly, and
3117 * a list of their leaves should be built first.
3119 Assert(!RELKIND_HAS_PARTITIONS(relkind));
3121 if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
3122 relpersistence != RELPERSISTENCE_TEMP)
3124 ReindexParams newparams = *params;
3126 newparams.options |= REINDEXOPT_MISSING_OK;
3127 (void) ReindexRelationConcurrently(relid, &newparams);
3128 /* ReindexRelationConcurrently() does the verbose output */
3130 else if (relkind == RELKIND_INDEX)
3132 ReindexParams newparams = *params;
3134 newparams.options |=
3135 REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
3136 reindex_index(relid, false, relpersistence, &newparams);
3137 PopActiveSnapshot();
3138 /* reindex_index() does the verbose output */
3140 else
3142 bool result;
3143 ReindexParams newparams = *params;
3145 newparams.options |=
3146 REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
3147 result = reindex_relation(relid,
3148 REINDEX_REL_PROCESS_TOAST |
3149 REINDEX_REL_CHECK_CONSTRAINTS,
3150 &newparams);
3152 if (result && (params->options & REINDEXOPT_VERBOSE) != 0)
3153 ereport(INFO,
3154 (errmsg("table \"%s.%s\" was reindexed",
3155 get_namespace_name(get_rel_namespace(relid)),
3156 get_rel_name(relid))));
3158 PopActiveSnapshot();
3161 CommitTransactionCommand();
3164 StartTransactionCommand();
3169 * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
3170 * relation OID
3172 * 'relationOid' can either belong to an index, a table or a materialized
3173 * view. For tables and materialized views, all its indexes will be rebuilt,
3174 * excluding invalid indexes and any indexes used in exclusion constraints,
3175 * but including its associated toast table indexes. For indexes, the index
3176 * itself will be rebuilt.
3178 * The locks taken on parent tables and involved indexes are kept until the
3179 * transaction is committed, at which point a session lock is taken on each
3180 * relation. Both of these protect against concurrent schema changes.
3182 * Returns true if any indexes have been rebuilt (including toast table's
3183 * indexes, when relevant), otherwise returns false.
3185 * NOTE: This cannot be used on temporary relations. A concurrent build would
3186 * cause issues with ON COMMIT actions triggered by the transactions of the
3187 * concurrent build. Temporary relations are not subject to concurrent
3188 * concerns, so there's no need for the more complicated concurrent build,
3189 * anyway, and a non-concurrent reindex is more efficient.
3191 static bool
3192 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
3194 typedef struct ReindexIndexInfo
3196 Oid indexId;
3197 Oid tableId;
3198 Oid amId;
3199 bool safe; /* for set_indexsafe_procflags */
3200 } ReindexIndexInfo;
3201 List *heapRelationIds = NIL;
3202 List *indexIds = NIL;
3203 List *newIndexIds = NIL;
3204 List *relationLocks = NIL;
3205 List *lockTags = NIL;
3206 ListCell *lc,
3207 *lc2;
3208 MemoryContext private_context;
3209 MemoryContext oldcontext;
3210 char relkind;
3211 char *relationName = NULL;
3212 char *relationNamespace = NULL;
3213 PGRUsage ru0;
3214 const int progress_index[] = {
3215 PROGRESS_CREATEIDX_COMMAND,
3216 PROGRESS_CREATEIDX_PHASE,
3217 PROGRESS_CREATEIDX_INDEX_OID,
3218 PROGRESS_CREATEIDX_ACCESS_METHOD_OID
3220 int64 progress_vals[4];
3223 * Create a memory context that will survive forced transaction commits we
3224 * do below. Since it is a child of PortalContext, it will go away
3225 * eventually even if we suffer an error; there's no need for special
3226 * abort cleanup logic.
3228 private_context = AllocSetContextCreate(PortalContext,
3229 "ReindexConcurrent",
3230 ALLOCSET_SMALL_SIZES);
3232 if ((params->options & REINDEXOPT_VERBOSE) != 0)
3234 /* Save data needed by REINDEX VERBOSE in private context */
3235 oldcontext = MemoryContextSwitchTo(private_context);
3237 relationName = get_rel_name(relationOid);
3238 relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
3240 pg_rusage_init(&ru0);
3242 MemoryContextSwitchTo(oldcontext);
3245 relkind = get_rel_relkind(relationOid);
3248 * Extract the list of indexes that are going to be rebuilt based on the
3249 * relation Oid given by caller.
3251 switch (relkind)
3253 case RELKIND_RELATION:
3254 case RELKIND_MATVIEW:
3255 case RELKIND_TOASTVALUE:
3258 * In the case of a relation, find all its indexes including
3259 * toast indexes.
3261 Relation heapRelation;
3263 /* Save the list of relation OIDs in private context */
3264 oldcontext = MemoryContextSwitchTo(private_context);
3266 /* Track this relation for session locks */
3267 heapRelationIds = lappend_oid(heapRelationIds, relationOid);
3269 MemoryContextSwitchTo(oldcontext);
3271 if (IsCatalogRelationOid(relationOid))
3272 ereport(ERROR,
3273 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3274 errmsg("cannot reindex system catalogs concurrently")));
3276 /* Open relation to get its indexes */
3277 if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3279 heapRelation = try_table_open(relationOid,
3280 ShareUpdateExclusiveLock);
3281 /* leave if relation does not exist */
3282 if (!heapRelation)
3283 break;
3285 else
3286 heapRelation = table_open(relationOid,
3287 ShareUpdateExclusiveLock);
3289 if (OidIsValid(params->tablespaceOid) &&
3290 IsSystemRelation(heapRelation))
3291 ereport(ERROR,
3292 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3293 errmsg("cannot move system relation \"%s\"",
3294 RelationGetRelationName(heapRelation))));
3296 /* Add all the valid indexes of relation to list */
3297 foreach(lc, RelationGetIndexList(heapRelation))
3299 Oid cellOid = lfirst_oid(lc);
3300 Relation indexRelation = index_open(cellOid,
3301 ShareUpdateExclusiveLock);
3303 if (!indexRelation->rd_index->indisvalid)
3304 ereport(WARNING,
3305 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3306 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
3307 get_namespace_name(get_rel_namespace(cellOid)),
3308 get_rel_name(cellOid))));
3309 else if (indexRelation->rd_index->indisexclusion)
3310 ereport(WARNING,
3311 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3312 errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
3313 get_namespace_name(get_rel_namespace(cellOid)),
3314 get_rel_name(cellOid))));
3315 else
3317 ReindexIndexInfo *idx;
3319 /* Save the list of relation OIDs in private context */
3320 oldcontext = MemoryContextSwitchTo(private_context);
3322 idx = palloc(sizeof(ReindexIndexInfo));
3323 idx->indexId = cellOid;
3324 /* other fields set later */
3326 indexIds = lappend(indexIds, idx);
3328 MemoryContextSwitchTo(oldcontext);
3331 index_close(indexRelation, NoLock);
3334 /* Also add the toast indexes */
3335 if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
3337 Oid toastOid = heapRelation->rd_rel->reltoastrelid;
3338 Relation toastRelation = table_open(toastOid,
3339 ShareUpdateExclusiveLock);
3341 /* Save the list of relation OIDs in private context */
3342 oldcontext = MemoryContextSwitchTo(private_context);
3344 /* Track this relation for session locks */
3345 heapRelationIds = lappend_oid(heapRelationIds, toastOid);
3347 MemoryContextSwitchTo(oldcontext);
3349 foreach(lc2, RelationGetIndexList(toastRelation))
3351 Oid cellOid = lfirst_oid(lc2);
3352 Relation indexRelation = index_open(cellOid,
3353 ShareUpdateExclusiveLock);
3355 if (!indexRelation->rd_index->indisvalid)
3356 ereport(WARNING,
3357 (errcode(ERRCODE_INDEX_CORRUPTED),
3358 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
3359 get_namespace_name(get_rel_namespace(cellOid)),
3360 get_rel_name(cellOid))));
3361 else
3363 ReindexIndexInfo *idx;
3366 * Save the list of relation OIDs in private
3367 * context
3369 oldcontext = MemoryContextSwitchTo(private_context);
3371 idx = palloc(sizeof(ReindexIndexInfo));
3372 idx->indexId = cellOid;
3373 indexIds = lappend(indexIds, idx);
3374 /* other fields set later */
3376 MemoryContextSwitchTo(oldcontext);
3379 index_close(indexRelation, NoLock);
3382 table_close(toastRelation, NoLock);
3385 table_close(heapRelation, NoLock);
3386 break;
3388 case RELKIND_INDEX:
3390 Oid heapId = IndexGetRelation(relationOid,
3391 (params->options & REINDEXOPT_MISSING_OK) != 0);
3392 Relation heapRelation;
3393 ReindexIndexInfo *idx;
3395 /* if relation is missing, leave */
3396 if (!OidIsValid(heapId))
3397 break;
3399 if (IsCatalogRelationOid(heapId))
3400 ereport(ERROR,
3401 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3402 errmsg("cannot reindex system catalogs concurrently")));
3405 * Don't allow reindex for an invalid index on TOAST table, as
3406 * if rebuilt it would not be possible to drop it. Match
3407 * error message in reindex_index().
3409 if (IsToastNamespace(get_rel_namespace(relationOid)) &&
3410 !get_index_isvalid(relationOid))
3411 ereport(ERROR,
3412 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3413 errmsg("cannot reindex invalid index on TOAST table")));
3416 * Check if parent relation can be locked and if it exists,
3417 * this needs to be done at this stage as the list of indexes
3418 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
3419 * should not be used once all the session locks are taken.
3421 if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3423 heapRelation = try_table_open(heapId,
3424 ShareUpdateExclusiveLock);
3425 /* leave if relation does not exist */
3426 if (!heapRelation)
3427 break;
3429 else
3430 heapRelation = table_open(heapId,
3431 ShareUpdateExclusiveLock);
3433 if (OidIsValid(params->tablespaceOid) &&
3434 IsSystemRelation(heapRelation))
3435 ereport(ERROR,
3436 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3437 errmsg("cannot move system relation \"%s\"",
3438 get_rel_name(relationOid))));
3440 table_close(heapRelation, NoLock);
3442 /* Save the list of relation OIDs in private context */
3443 oldcontext = MemoryContextSwitchTo(private_context);
3445 /* Track the heap relation of this index for session locks */
3446 heapRelationIds = list_make1_oid(heapId);
3449 * Save the list of relation OIDs in private context. Note
3450 * that invalid indexes are allowed here.
3452 idx = palloc(sizeof(ReindexIndexInfo));
3453 idx->indexId = relationOid;
3454 indexIds = lappend(indexIds, idx);
3455 /* other fields set later */
3457 MemoryContextSwitchTo(oldcontext);
3458 break;
3461 case RELKIND_PARTITIONED_TABLE:
3462 case RELKIND_PARTITIONED_INDEX:
3463 default:
3464 /* Return error if type of relation is not supported */
3465 ereport(ERROR,
3466 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
3467 errmsg("cannot reindex this type of relation concurrently")));
3468 break;
3472 * Definitely no indexes, so leave. Any checks based on
3473 * REINDEXOPT_MISSING_OK should be done only while the list of indexes to
3474 * work on is built as the session locks taken before this transaction
3475 * commits will make sure that they cannot be dropped by a concurrent
3476 * session until this operation completes.
3478 if (indexIds == NIL)
3480 PopActiveSnapshot();
3481 return false;
3484 /* It's not a shared catalog, so refuse to move it to shared tablespace */
3485 if (params->tablespaceOid == GLOBALTABLESPACE_OID)
3486 ereport(ERROR,
3487 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3488 errmsg("cannot move non-shared relation to tablespace \"%s\"",
3489 get_tablespace_name(params->tablespaceOid))));
3491 Assert(heapRelationIds != NIL);
3493 /*-----
3494 * Now we have all the indexes we want to process in indexIds.
3496 * The phases now are:
3498 * 1. create new indexes in the catalog
3499 * 2. build new indexes
3500 * 3. let new indexes catch up with tuples inserted in the meantime
3501 * 4. swap index names
3502 * 5. mark old indexes as dead
3503 * 6. drop old indexes
3505 * We process each phase for all indexes before moving to the next phase,
3506 * for efficiency.
3510 * Phase 1 of REINDEX CONCURRENTLY
3512 * Create a new index with the same properties as the old one, but it is
3513 * only registered in catalogs and will be built later. Then get session
3514 * locks on all involved tables. See analogous code in DefineIndex() for
3515 * more detailed comments.
3518 foreach(lc, indexIds)
3520 char *concurrentName;
3521 ReindexIndexInfo *idx = lfirst(lc);
3522 ReindexIndexInfo *newidx;
3523 Oid newIndexId;
3524 Relation indexRel;
3525 Relation heapRel;
3526 Relation newIndexRel;
3527 LockRelId *lockrelid;
3528 Oid tablespaceid;
3530 indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock);
3531 heapRel = table_open(indexRel->rd_index->indrelid,
3532 ShareUpdateExclusiveLock);
3534 /* determine safety of this index for set_indexsafe_procflags */
3535 idx->safe = (indexRel->rd_indexprs == NIL &&
3536 indexRel->rd_indpred == NIL);
3537 idx->tableId = RelationGetRelid(heapRel);
3538 idx->amId = indexRel->rd_rel->relam;
3540 /* This function shouldn't be called for temporary relations. */
3541 if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
3542 elog(ERROR, "cannot reindex a temporary table concurrently");
3544 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3545 idx->tableId);
3547 progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3548 progress_vals[1] = 0; /* initializing */
3549 progress_vals[2] = idx->indexId;
3550 progress_vals[3] = idx->amId;
3551 pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3553 /* Choose a temporary relation name for the new index */
3554 concurrentName = ChooseRelationName(get_rel_name(idx->indexId),
3555 NULL,
3556 "ccnew",
3557 get_rel_namespace(indexRel->rd_index->indrelid),
3558 false);
3560 /* Choose the new tablespace, indexes of toast tables are not moved */
3561 if (OidIsValid(params->tablespaceOid) &&
3562 heapRel->rd_rel->relkind != RELKIND_TOASTVALUE)
3563 tablespaceid = params->tablespaceOid;
3564 else
3565 tablespaceid = indexRel->rd_rel->reltablespace;
3567 /* Create new index definition based on given index */
3568 newIndexId = index_concurrently_create_copy(heapRel,
3569 idx->indexId,
3570 tablespaceid,
3571 concurrentName);
3574 * Now open the relation of the new index, a session-level lock is
3575 * also needed on it.
3577 newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock);
3580 * Save the list of OIDs and locks in private context
3582 oldcontext = MemoryContextSwitchTo(private_context);
3584 newidx = palloc(sizeof(ReindexIndexInfo));
3585 newidx->indexId = newIndexId;
3586 newidx->safe = idx->safe;
3587 newidx->tableId = idx->tableId;
3588 newidx->amId = idx->amId;
3590 newIndexIds = lappend(newIndexIds, newidx);
3593 * Save lockrelid to protect each relation from drop then close
3594 * relations. The lockrelid on parent relation is not taken here to
3595 * avoid multiple locks taken on the same relation, instead we rely on
3596 * parentRelationIds built earlier.
3598 lockrelid = palloc(sizeof(*lockrelid));
3599 *lockrelid = indexRel->rd_lockInfo.lockRelId;
3600 relationLocks = lappend(relationLocks, lockrelid);
3601 lockrelid = palloc(sizeof(*lockrelid));
3602 *lockrelid = newIndexRel->rd_lockInfo.lockRelId;
3603 relationLocks = lappend(relationLocks, lockrelid);
3605 MemoryContextSwitchTo(oldcontext);
3607 index_close(indexRel, NoLock);
3608 index_close(newIndexRel, NoLock);
3609 table_close(heapRel, NoLock);
3613 * Save the heap lock for following visibility checks with other backends
3614 * might conflict with this session.
3616 foreach(lc, heapRelationIds)
3618 Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
3619 LockRelId *lockrelid;
3620 LOCKTAG *heaplocktag;
3622 /* Save the list of locks in private context */
3623 oldcontext = MemoryContextSwitchTo(private_context);
3625 /* Add lockrelid of heap relation to the list of locked relations */
3626 lockrelid = palloc(sizeof(*lockrelid));
3627 *lockrelid = heapRelation->rd_lockInfo.lockRelId;
3628 relationLocks = lappend(relationLocks, lockrelid);
3630 heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
3632 /* Save the LOCKTAG for this parent relation for the wait phase */
3633 SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
3634 lockTags = lappend(lockTags, heaplocktag);
3636 MemoryContextSwitchTo(oldcontext);
3638 /* Close heap relation */
3639 table_close(heapRelation, NoLock);
3642 /* Get a session-level lock on each table. */
3643 foreach(lc, relationLocks)
3645 LockRelId *lockrelid = (LockRelId *) lfirst(lc);
3647 LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3650 PopActiveSnapshot();
3651 CommitTransactionCommand();
3652 StartTransactionCommand();
3655 * Because we don't take a snapshot in this transaction, there's no need
3656 * to set the PROC_IN_SAFE_IC flag here.
3660 * Phase 2 of REINDEX CONCURRENTLY
3662 * Build the new indexes in a separate transaction for each index to avoid
3663 * having open transactions for an unnecessary long time. But before
3664 * doing that, wait until no running transactions could have the table of
3665 * the index open with the old list of indexes. See "phase 2" in
3666 * DefineIndex() for more details.
3669 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3670 PROGRESS_CREATEIDX_PHASE_WAIT_1);
3671 WaitForLockersMultiple(lockTags, ShareLock, true);
3672 CommitTransactionCommand();
3674 foreach(lc, newIndexIds)
3676 ReindexIndexInfo *newidx = lfirst(lc);
3678 /* Start new transaction for this index's concurrent build */
3679 StartTransactionCommand();
3682 * Check for user-requested abort. This is inside a transaction so as
3683 * xact.c does not issue a useless WARNING, and ensures that
3684 * session-level locks are cleaned up on abort.
3686 CHECK_FOR_INTERRUPTS();
3688 /* Tell concurrent indexing to ignore us, if index qualifies */
3689 if (newidx->safe)
3690 set_indexsafe_procflags();
3692 /* Set ActiveSnapshot since functions in the indexes may need it */
3693 PushActiveSnapshot(GetTransactionSnapshot());
3696 * Update progress for the index to build, with the correct parent
3697 * table involved.
3699 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
3700 progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3701 progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
3702 progress_vals[2] = newidx->indexId;
3703 progress_vals[3] = newidx->amId;
3704 pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3706 /* Perform concurrent build of new index */
3707 index_concurrently_build(newidx->tableId, newidx->indexId);
3709 PopActiveSnapshot();
3710 CommitTransactionCommand();
3713 StartTransactionCommand();
3716 * Because we don't take a snapshot or Xid in this transaction, there's no
3717 * need to set the PROC_IN_SAFE_IC flag here.
3721 * Phase 3 of REINDEX CONCURRENTLY
3723 * During this phase the old indexes catch up with any new tuples that
3724 * were created during the previous phase. See "phase 3" in DefineIndex()
3725 * for more details.
3728 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3729 PROGRESS_CREATEIDX_PHASE_WAIT_2);
3730 WaitForLockersMultiple(lockTags, ShareLock, true);
3731 CommitTransactionCommand();
3733 foreach(lc, newIndexIds)
3735 ReindexIndexInfo *newidx = lfirst(lc);
3736 TransactionId limitXmin;
3737 Snapshot snapshot;
3739 StartTransactionCommand();
3742 * Check for user-requested abort. This is inside a transaction so as
3743 * xact.c does not issue a useless WARNING, and ensures that
3744 * session-level locks are cleaned up on abort.
3746 CHECK_FOR_INTERRUPTS();
3748 /* Tell concurrent indexing to ignore us, if index qualifies */
3749 if (newidx->safe)
3750 set_indexsafe_procflags();
3753 * Take the "reference snapshot" that will be used by validate_index()
3754 * to filter candidate tuples.
3756 snapshot = RegisterSnapshot(GetTransactionSnapshot());
3757 PushActiveSnapshot(snapshot);
3760 * Update progress for the index to build, with the correct parent
3761 * table involved.
3763 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3764 newidx->tableId);
3765 progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3766 progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
3767 progress_vals[2] = newidx->indexId;
3768 progress_vals[3] = newidx->amId;
3769 pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3771 validate_index(newidx->tableId, newidx->indexId, snapshot);
3774 * We can now do away with our active snapshot, we still need to save
3775 * the xmin limit to wait for older snapshots.
3777 limitXmin = snapshot->xmin;
3779 PopActiveSnapshot();
3780 UnregisterSnapshot(snapshot);
3783 * To ensure no deadlocks, we must commit and start yet another
3784 * transaction, and do our wait before any snapshot has been taken in
3785 * it.
3787 CommitTransactionCommand();
3788 StartTransactionCommand();
3791 * The index is now valid in the sense that it contains all currently
3792 * interesting tuples. But since it might not contain tuples deleted
3793 * just before the reference snap was taken, we have to wait out any
3794 * transactions that might have older snapshots.
3796 * Because we don't take a snapshot or Xid in this transaction,
3797 * there's no need to set the PROC_IN_SAFE_IC flag here.
3799 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3800 PROGRESS_CREATEIDX_PHASE_WAIT_3);
3801 WaitForOlderSnapshots(limitXmin, true);
3803 CommitTransactionCommand();
3807 * Phase 4 of REINDEX CONCURRENTLY
3809 * Now that the new indexes have been validated, swap each new index with
3810 * its corresponding old index.
3812 * We mark the new indexes as valid and the old indexes as not valid at
3813 * the same time to make sure we only get constraint violations from the
3814 * indexes with the correct names.
3817 StartTransactionCommand();
3820 * Because this transaction only does catalog manipulations and doesn't do
3821 * any index operations, we can set the PROC_IN_SAFE_IC flag here
3822 * unconditionally.
3824 set_indexsafe_procflags();
3826 forboth(lc, indexIds, lc2, newIndexIds)
3828 ReindexIndexInfo *oldidx = lfirst(lc);
3829 ReindexIndexInfo *newidx = lfirst(lc2);
3830 char *oldName;
3833 * Check for user-requested abort. This is inside a transaction so as
3834 * xact.c does not issue a useless WARNING, and ensures that
3835 * session-level locks are cleaned up on abort.
3837 CHECK_FOR_INTERRUPTS();
3839 /* Choose a relation name for old index */
3840 oldName = ChooseRelationName(get_rel_name(oldidx->indexId),
3841 NULL,
3842 "ccold",
3843 get_rel_namespace(oldidx->tableId),
3844 false);
3847 * Swap old index with the new one. This also marks the new one as
3848 * valid and the old one as not valid.
3850 index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName);
3853 * Invalidate the relcache for the table, so that after this commit
3854 * all sessions will refresh any cached plans that might reference the
3855 * index.
3857 CacheInvalidateRelcacheByRelid(oldidx->tableId);
3860 * CCI here so that subsequent iterations see the oldName in the
3861 * catalog and can choose a nonconflicting name for their oldName.
3862 * Otherwise, this could lead to conflicts if a table has two indexes
3863 * whose names are equal for the first NAMEDATALEN-minus-a-few
3864 * characters.
3866 CommandCounterIncrement();
3869 /* Commit this transaction and make index swaps visible */
3870 CommitTransactionCommand();
3871 StartTransactionCommand();
3874 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
3875 * real need for that, because we only acquire an Xid after the wait is
3876 * done, and that lasts for a very short period.
3880 * Phase 5 of REINDEX CONCURRENTLY
3882 * Mark the old indexes as dead. First we must wait until no running
3883 * transaction could be using the index for a query. See also
3884 * index_drop() for more details.
3887 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3888 PROGRESS_CREATEIDX_PHASE_WAIT_4);
3889 WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3891 foreach(lc, indexIds)
3893 ReindexIndexInfo *oldidx = lfirst(lc);
3896 * Check for user-requested abort. This is inside a transaction so as
3897 * xact.c does not issue a useless WARNING, and ensures that
3898 * session-level locks are cleaned up on abort.
3900 CHECK_FOR_INTERRUPTS();
3902 index_concurrently_set_dead(oldidx->tableId, oldidx->indexId);
3905 /* Commit this transaction to make the updates visible. */
3906 CommitTransactionCommand();
3907 StartTransactionCommand();
3910 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
3911 * real need for that, because we only acquire an Xid after the wait is
3912 * done, and that lasts for a very short period.
3916 * Phase 6 of REINDEX CONCURRENTLY
3918 * Drop the old indexes.
3921 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3922 PROGRESS_CREATEIDX_PHASE_WAIT_5);
3923 WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3925 PushActiveSnapshot(GetTransactionSnapshot());
3928 ObjectAddresses *objects = new_object_addresses();
3930 foreach(lc, indexIds)
3932 ReindexIndexInfo *idx = lfirst(lc);
3933 ObjectAddress object;
3935 object.classId = RelationRelationId;
3936 object.objectId = idx->indexId;
3937 object.objectSubId = 0;
3939 add_exact_object_address(&object, objects);
3943 * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
3944 * right lock level.
3946 performMultipleDeletions(objects, DROP_RESTRICT,
3947 PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
3950 PopActiveSnapshot();
3951 CommitTransactionCommand();
3954 * Finally, release the session-level lock on the table.
3956 foreach(lc, relationLocks)
3958 LockRelId *lockrelid = (LockRelId *) lfirst(lc);
3960 UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3963 /* Start a new transaction to finish process properly */
3964 StartTransactionCommand();
3966 /* Log what we did */
3967 if ((params->options & REINDEXOPT_VERBOSE) != 0)
3969 if (relkind == RELKIND_INDEX)
3970 ereport(INFO,
3971 (errmsg("index \"%s.%s\" was reindexed",
3972 relationNamespace, relationName),
3973 errdetail("%s.",
3974 pg_rusage_show(&ru0))));
3975 else
3977 foreach(lc, newIndexIds)
3979 ReindexIndexInfo *idx = lfirst(lc);
3980 Oid indOid = idx->indexId;
3982 ereport(INFO,
3983 (errmsg("index \"%s.%s\" was reindexed",
3984 get_namespace_name(get_rel_namespace(indOid)),
3985 get_rel_name(indOid))));
3986 /* Don't show rusage here, since it's not per index. */
3989 ereport(INFO,
3990 (errmsg("table \"%s.%s\" was reindexed",
3991 relationNamespace, relationName),
3992 errdetail("%s.",
3993 pg_rusage_show(&ru0))));
3997 MemoryContextDelete(private_context);
3999 pgstat_progress_end_command();
4001 return true;
4005 * Insert or delete an appropriate pg_inherits tuple to make the given index
4006 * be a partition of the indicated parent index.
4008 * This also corrects the pg_depend information for the affected index.
4010 void
4011 IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
4013 Relation pg_inherits;
4014 ScanKeyData key[2];
4015 SysScanDesc scan;
4016 Oid partRelid = RelationGetRelid(partitionIdx);
4017 HeapTuple tuple;
4018 bool fix_dependencies;
4020 /* Make sure this is an index */
4021 Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
4022 partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
4025 * Scan pg_inherits for rows linking our index to some parent.
4027 pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
4028 ScanKeyInit(&key[0],
4029 Anum_pg_inherits_inhrelid,
4030 BTEqualStrategyNumber, F_OIDEQ,
4031 ObjectIdGetDatum(partRelid));
4032 ScanKeyInit(&key[1],
4033 Anum_pg_inherits_inhseqno,
4034 BTEqualStrategyNumber, F_INT4EQ,
4035 Int32GetDatum(1));
4036 scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
4037 NULL, 2, key);
4038 tuple = systable_getnext(scan);
4040 if (!HeapTupleIsValid(tuple))
4042 if (parentOid == InvalidOid)
4045 * No pg_inherits row, and no parent wanted: nothing to do in this
4046 * case.
4048 fix_dependencies = false;
4050 else
4052 StoreSingleInheritance(partRelid, parentOid, 1);
4053 fix_dependencies = true;
4056 else
4058 Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
4060 if (parentOid == InvalidOid)
4063 * There exists a pg_inherits row, which we want to clear; do so.
4065 CatalogTupleDelete(pg_inherits, &tuple->t_self);
4066 fix_dependencies = true;
4068 else
4071 * A pg_inherits row exists. If it's the same we want, then we're
4072 * good; if it differs, that amounts to a corrupt catalog and
4073 * should not happen.
4075 if (inhForm->inhparent != parentOid)
4077 /* unexpected: we should not get called in this case */
4078 elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
4079 inhForm->inhrelid, inhForm->inhparent);
4082 /* already in the right state */
4083 fix_dependencies = false;
4087 /* done with pg_inherits */
4088 systable_endscan(scan);
4089 relation_close(pg_inherits, RowExclusiveLock);
4091 /* set relhassubclass if an index partition has been added to the parent */
4092 if (OidIsValid(parentOid))
4093 SetRelationHasSubclass(parentOid, true);
4095 /* set relispartition correctly on the partition */
4096 update_relispartition(partRelid, OidIsValid(parentOid));
4098 if (fix_dependencies)
4101 * Insert/delete pg_depend rows. If setting a parent, add PARTITION
4102 * dependencies on the parent index and the table; if removing a
4103 * parent, delete PARTITION dependencies.
4105 if (OidIsValid(parentOid))
4107 ObjectAddress partIdx;
4108 ObjectAddress parentIdx;
4109 ObjectAddress partitionTbl;
4111 ObjectAddressSet(partIdx, RelationRelationId, partRelid);
4112 ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
4113 ObjectAddressSet(partitionTbl, RelationRelationId,
4114 partitionIdx->rd_index->indrelid);
4115 recordDependencyOn(&partIdx, &parentIdx,
4116 DEPENDENCY_PARTITION_PRI);
4117 recordDependencyOn(&partIdx, &partitionTbl,
4118 DEPENDENCY_PARTITION_SEC);
4120 else
4122 deleteDependencyRecordsForClass(RelationRelationId, partRelid,
4123 RelationRelationId,
4124 DEPENDENCY_PARTITION_PRI);
4125 deleteDependencyRecordsForClass(RelationRelationId, partRelid,
4126 RelationRelationId,
4127 DEPENDENCY_PARTITION_SEC);
4130 /* make our updates visible */
4131 CommandCounterIncrement();
4136 * Subroutine of IndexSetParentIndex to update the relispartition flag of the
4137 * given index to the given value.
4139 static void
4140 update_relispartition(Oid relationId, bool newval)
4142 HeapTuple tup;
4143 Relation classRel;
4145 classRel = table_open(RelationRelationId, RowExclusiveLock);
4146 tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
4147 if (!HeapTupleIsValid(tup))
4148 elog(ERROR, "cache lookup failed for relation %u", relationId);
4149 Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
4150 ((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
4151 CatalogTupleUpdate(classRel, &tup->t_self, tup);
4152 heap_freetuple(tup);
4153 table_close(classRel, RowExclusiveLock);
4157 * Set the PROC_IN_SAFE_IC flag in MyProc->statusFlags.
4159 * When doing concurrent index builds, we can set this flag
4160 * to tell other processes concurrently running CREATE
4161 * INDEX CONCURRENTLY or REINDEX CONCURRENTLY to ignore us when
4162 * doing their waits for concurrent snapshots. On one hand it
4163 * avoids pointlessly waiting for a process that's not interesting
4164 * anyway; but more importantly it avoids deadlocks in some cases.
4166 * This can be done safely only for indexes that don't execute any
4167 * expressions that could access other tables, so index must not be
4168 * expressional nor partial. Caller is responsible for only calling
4169 * this routine when that assumption holds true.
4171 * (The flag is reset automatically at transaction end, so it must be
4172 * set for each transaction.)
4174 static inline void
4175 set_indexsafe_procflags(void)
4178 * This should only be called before installing xid or xmin in MyProc;
4179 * otherwise, concurrent processes could see an Xmin that moves backwards.
4181 Assert(MyProc->xid == InvalidTransactionId &&
4182 MyProc->xmin == InvalidTransactionId);
4184 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4185 MyProc->statusFlags |= PROC_IN_SAFE_IC;
4186 ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
4187 LWLockRelease(ProcArrayLock);