1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/index/genam.c
14 * many of the old access method routines have been turned into
15 * macros and moved to genam.h -cim 4/30/91
17 *-------------------------------------------------------------------------
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/relscan.h"
25 #include "access/tableam.h"
26 #include "access/transam.h"
27 #include "catalog/index.h"
28 #include "lib/stringinfo.h"
29 #include "miscadmin.h"
30 #include "storage/bufmgr.h"
31 #include "utils/acl.h"
32 #include "utils/builtins.h"
33 #include "utils/lsyscache.h"
34 #include "utils/rel.h"
35 #include "utils/rls.h"
36 #include "utils/ruleutils.h"
37 #include "utils/snapmgr.h"
38 #include "utils/syscache.h"
41 /* ----------------------------------------------------------------
42 * general access method routines
44 * All indexed access methods use an identical scan structure.
45 * We don't know how the various AMs do locking, however, so we don't
46 * do anything about that here.
48 * The intent is that an AM implementor will define a beginscan routine
49 * that calls RelationGetIndexScan, to fill in the scan, and then does
50 * whatever kind of locking he wants.
52 * At the end of a scan, the AM's endscan routine undoes the locking,
53 * but does *not* call IndexScanEnd --- the higher-level index_endscan
54 * routine does that. (We can't do it in the AM because index_endscan
55 * still needs to touch the IndexScanDesc after calling the AM.)
57 * Because of this, the AM does not have a choice whether to call
58 * RelationGetIndexScan or not; its beginscan routine must return an
59 * object made by RelationGetIndexScan. This is kinda ugly but not
60 * worth cleaning up now.
61 * ----------------------------------------------------------------
65 * RelationGetIndexScan -- Create and fill an IndexScanDesc.
67 * This routine creates an index scan structure and sets up initial
71 * indexRelation -- index relation for scan.
72 * nkeys -- count of scan keys (index qual conditions).
73 * norderbys -- count of index order-by operators.
76 * An initialized IndexScanDesc.
80 RelationGetIndexScan(Relation indexRelation
, int nkeys
, int norderbys
)
84 scan
= (IndexScanDesc
) palloc(sizeof(IndexScanDescData
));
86 scan
->heapRelation
= NULL
; /* may be set later */
87 scan
->xs_heapfetch
= NULL
;
88 scan
->indexRelation
= indexRelation
;
89 scan
->xs_snapshot
= InvalidSnapshot
; /* caller must initialize this */
90 scan
->numberOfKeys
= nkeys
;
91 scan
->numberOfOrderBys
= norderbys
;
94 * We allocate key workspace here, but it won't get filled until amrescan.
97 scan
->keyData
= (ScanKey
) palloc(sizeof(ScanKeyData
) * nkeys
);
101 scan
->orderByData
= (ScanKey
) palloc(sizeof(ScanKeyData
) * norderbys
);
103 scan
->orderByData
= NULL
;
105 scan
->xs_want_itup
= false; /* may be set later */
108 * During recovery we ignore killed tuples and don't bother to kill them
109 * either. We do this because the xmin on the primary node could easily be
110 * later than the xmin on the standby node, so that what the primary
111 * thinks is killed is supposed to be visible on standby. So for correct
112 * MVCC for queries during recovery we must ignore these hints and check
113 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
114 * transaction that was started during recovery. xactStartedInRecovery
115 * should not be altered by index AMs.
117 scan
->kill_prior_tuple
= false;
118 scan
->xactStartedInRecovery
= TransactionStartedDuringRecovery();
119 scan
->ignore_killed_tuples
= !scan
->xactStartedInRecovery
;
123 scan
->xs_itup
= NULL
;
124 scan
->xs_itupdesc
= NULL
;
125 scan
->xs_hitup
= NULL
;
126 scan
->xs_hitupdesc
= NULL
;
132 * IndexScanEnd -- End an index scan.
134 * This routine just releases the storage acquired by
135 * RelationGetIndexScan(). Any AM-level resources are
136 * assumed to already have been released by the AM's
144 IndexScanEnd(IndexScanDesc scan
)
146 if (scan
->keyData
!= NULL
)
147 pfree(scan
->keyData
);
148 if (scan
->orderByData
!= NULL
)
149 pfree(scan
->orderByData
);
155 * BuildIndexValueDescription
157 * Construct a string describing the contents of an index entry, in the
158 * form "(key_name, ...)=(key_value, ...)". This is currently used
159 * for building unique-constraint and exclusion-constraint error messages,
160 * so only key columns of the index are checked and printed.
162 * Note that if the user does not have permissions to view all of the
163 * columns involved then a NULL is returned. Returning a partial key seems
164 * unlikely to be useful and we have no way to know which of the columns the
165 * user provided (unlike in ExecBuildSlotValueDescription).
167 * The passed-in values/nulls arrays are the "raw" input to the index AM,
168 * e.g. results of FormIndexDatum --- this is not necessarily what is stored
169 * in the index, but it's what the user perceives to be stored.
171 * Note: if you change anything here, check whether
172 * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
176 BuildIndexValueDescription(Relation indexRelation
,
177 Datum
*values
, bool *isnull
)
180 Form_pg_index idxrec
;
184 Oid indexrelid
= RelationGetRelid(indexRelation
);
188 indnkeyatts
= IndexRelationGetNumberOfKeyAttributes(indexRelation
);
191 * Check permissions- if the user does not have access to view all of the
192 * key columns then return NULL to avoid leaking data.
194 * First check if RLS is enabled for the relation. If so, return NULL to
195 * avoid leaking data.
197 * Next we need to check table-level SELECT access and then, if there is
198 * no access there, check column-level permissions.
200 idxrec
= indexRelation
->rd_index
;
201 indrelid
= idxrec
->indrelid
;
202 Assert(indexrelid
== idxrec
->indexrelid
);
204 /* RLS check- if RLS is enabled then we don't return anything. */
205 if (check_enable_rls(indrelid
, InvalidOid
, true) == RLS_ENABLED
)
208 /* Table-level SELECT is enough, if the user has it */
209 aclresult
= pg_class_aclcheck(indrelid
, GetUserId(), ACL_SELECT
);
210 if (aclresult
!= ACLCHECK_OK
)
213 * No table-level access, so step through the columns in the index and
214 * make sure the user has SELECT rights on all of them.
216 for (keyno
= 0; keyno
< indnkeyatts
; keyno
++)
218 AttrNumber attnum
= idxrec
->indkey
.values
[keyno
];
221 * Note that if attnum == InvalidAttrNumber, then this is an index
222 * based on an expression and we return no detail rather than try
223 * to figure out what column(s) the expression includes and if the
224 * user has SELECT rights on them.
226 if (attnum
== InvalidAttrNumber
||
227 pg_attribute_aclcheck(indrelid
, attnum
, GetUserId(),
228 ACL_SELECT
) != ACLCHECK_OK
)
230 /* No access, so clean up and return */
236 initStringInfo(&buf
);
237 appendStringInfo(&buf
, "(%s)=(",
238 pg_get_indexdef_columns(indexrelid
, true));
240 for (i
= 0; i
< indnkeyatts
; i
++)
252 * The provided data is not necessarily of the type stored in the
253 * index; rather it is of the index opclass's input type. So look
254 * at rd_opcintype not the index tupdesc.
256 * Note: this is a bit shaky for opclasses that have pseudotype
257 * input types such as ANYARRAY or RECORD. Currently, the
258 * typoutput functions associated with the pseudotypes will work
259 * okay, but we might have to try harder in future.
261 getTypeOutputInfo(indexRelation
->rd_opcintype
[i
],
262 &foutoid
, &typisvarlena
);
263 val
= OidOutputFunctionCall(foutoid
, values
[i
]);
267 appendStringInfoString(&buf
, ", ");
268 appendStringInfoString(&buf
, val
);
271 appendStringInfoChar(&buf
, ')');
277 * Get the latestRemovedXid from the table entries pointed at by the index
278 * tuples being deleted.
281 index_compute_xid_horizon_for_tuples(Relation irel
,
284 OffsetNumber
*itemnos
,
287 ItemPointerData
*ttids
=
288 (ItemPointerData
*) palloc(sizeof(ItemPointerData
) * nitems
);
289 TransactionId latestRemovedXid
= InvalidTransactionId
;
290 Page ipage
= BufferGetPage(ibuf
);
293 /* identify what the index tuples about to be deleted point to */
294 for (int i
= 0; i
< nitems
; i
++)
298 iitemid
= PageGetItemId(ipage
, itemnos
[i
]);
299 itup
= (IndexTuple
) PageGetItem(ipage
, iitemid
);
301 ItemPointerCopy(&itup
->t_tid
, &ttids
[i
]);
304 /* determine the actual xid horizon */
306 table_compute_xid_horizon_for_tuples(hrel
, ttids
, nitems
);
310 return latestRemovedXid
;
314 /* ----------------------------------------------------------------
315 * heap-or-index-scan access to system catalogs
317 * These functions support system catalog accesses that normally use
318 * an index but need to be capable of being switched to heap scans
319 * if the system indexes are unavailable.
321 * The specified scan keys must be compatible with the named index.
322 * Generally this means that they must constrain either all columns
323 * of the index, or the first K columns of an N-column index.
325 * These routines could work with non-system tables, actually,
326 * but they're only useful when there is a known index to use with
327 * the given scan keys; so in practice they're only good for
328 * predetermined types of scans of system catalogs.
329 * ----------------------------------------------------------------
333 * systable_beginscan --- set up for heap-or-index scan
335 * rel: catalog to scan, already opened and suitably locked
336 * indexId: OID of index to conditionally use
337 * indexOK: if false, forces a heap scan (see notes below)
338 * snapshot: time qual to use (NULL for a recent catalog snapshot)
339 * nkeys, key: scan keys
341 * The attribute numbers in the scan key should be set for the heap case.
342 * If we choose to index, we reset them to 1..n to reference the index
343 * columns. Note this means there must be one scankey qualification per
344 * index column! This is checked by the Asserts in the normal, index-using
345 * case, but won't be checked if the heapscan path is taken.
347 * The routine checks the normal cases for whether an indexscan is safe,
348 * but caller can make additional checks and pass indexOK=false if needed.
349 * In standard case indexOK can simply be constant TRUE.
352 systable_beginscan(Relation heapRelation
,
356 int nkeys
, ScanKey key
)
362 !IgnoreSystemIndexes
&&
363 !ReindexIsProcessingIndex(indexId
))
364 irel
= index_open(indexId
, AccessShareLock
);
368 sysscan
= (SysScanDesc
) palloc(sizeof(SysScanDescData
));
370 sysscan
->heap_rel
= heapRelation
;
371 sysscan
->irel
= irel
;
372 sysscan
->slot
= table_slot_create(heapRelation
, NULL
);
374 if (snapshot
== NULL
)
376 Oid relid
= RelationGetRelid(heapRelation
);
378 snapshot
= RegisterSnapshot(GetCatalogSnapshot(relid
));
379 sysscan
->snapshot
= snapshot
;
383 /* Caller is responsible for any snapshot. */
384 sysscan
->snapshot
= NULL
;
391 /* Change attribute numbers to be index column numbers. */
392 for (i
= 0; i
< nkeys
; i
++)
396 for (j
= 0; j
< IndexRelationGetNumberOfAttributes(irel
); j
++)
398 if (key
[i
].sk_attno
== irel
->rd_index
->indkey
.values
[j
])
400 key
[i
].sk_attno
= j
+ 1;
404 if (j
== IndexRelationGetNumberOfAttributes(irel
))
405 elog(ERROR
, "column is not in index");
408 sysscan
->iscan
= index_beginscan(heapRelation
, irel
,
410 index_rescan(sysscan
->iscan
, key
, nkeys
, NULL
, 0);
411 sysscan
->scan
= NULL
;
416 * We disallow synchronized scans when forced to use a heapscan on a
417 * catalog. In most cases the desired rows are near the front, so
418 * that the unpredictable start point of a syncscan is a serious
419 * disadvantage; and there are no compensating advantages, because
420 * it's unlikely that such scans will occur in parallel.
422 sysscan
->scan
= table_beginscan_strat(heapRelation
, snapshot
,
425 sysscan
->iscan
= NULL
;
432 * systable_getnext --- get next tuple in a heap-or-index scan
434 * Returns NULL if no more tuples available.
436 * Note that returned tuple is a reference to data in a disk buffer;
437 * it must not be modified, and should be presumed inaccessible after
438 * next getnext() or endscan() call.
440 * XXX: It'd probably make sense to offer a slot based interface, at least
444 systable_getnext(SysScanDesc sysscan
)
446 HeapTuple htup
= NULL
;
450 if (index_getnext_slot(sysscan
->iscan
, ForwardScanDirection
, sysscan
->slot
))
454 htup
= ExecFetchSlotHeapTuple(sysscan
->slot
, false, &shouldFree
);
458 * We currently don't need to support lossy index operators for
459 * any system catalog scan. It could be done here, using the scan
460 * keys to drive the operator calls, if we arranged to save the
461 * heap attnums during systable_beginscan(); this is practical
462 * because we still wouldn't need to support indexes on
465 if (sysscan
->iscan
->xs_recheck
)
466 elog(ERROR
, "system catalog scans with lossy index conditions are not implemented");
471 if (table_scan_getnextslot(sysscan
->scan
, ForwardScanDirection
, sysscan
->slot
))
475 htup
= ExecFetchSlotHeapTuple(sysscan
->slot
, false, &shouldFree
);
484 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
486 * In particular, determine if this tuple would be visible to a catalog scan
487 * that started now. We don't handle the case of a non-MVCC scan snapshot,
488 * because no caller needs that yet.
490 * This is useful to test whether an object was deleted while we waited to
491 * acquire lock on it.
493 * Note: we don't actually *need* the tuple to be passed in, but it's a
494 * good crosscheck that the caller is interested in the right tuple.
497 systable_recheck_tuple(SysScanDesc sysscan
, HeapTuple tup
)
502 Assert(tup
== ExecFetchSlotHeapTuple(sysscan
->slot
, false, NULL
));
505 * Trust that table_tuple_satisfies_snapshot() and its subsidiaries
506 * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves
507 * acquire snapshots, so we need not register the snapshot. Those
508 * facilities are too low-level to have any business scanning tables.
510 freshsnap
= GetCatalogSnapshot(RelationGetRelid(sysscan
->heap_rel
));
512 result
= table_tuple_satisfies_snapshot(sysscan
->heap_rel
,
520 * systable_endscan --- close scan, release resources
522 * Note that it's still up to the caller to close the heap relation.
525 systable_endscan(SysScanDesc sysscan
)
529 ExecDropSingleTupleTableSlot(sysscan
->slot
);
530 sysscan
->slot
= NULL
;
535 index_endscan(sysscan
->iscan
);
536 index_close(sysscan
->irel
, AccessShareLock
);
539 table_endscan(sysscan
->scan
);
541 if (sysscan
->snapshot
)
542 UnregisterSnapshot(sysscan
->snapshot
);
549 * systable_beginscan_ordered --- set up for ordered catalog scan
551 * These routines have essentially the same API as systable_beginscan etc,
552 * except that they guarantee to return multiple matching tuples in
553 * index order. Also, for largely historical reasons, the index to use
554 * is opened and locked by the caller, not here.
556 * Currently we do not support non-index-based scans here. (In principle
557 * we could do a heapscan and sort, but the uses are in places that
558 * probably don't need to still work with corrupted catalog indexes.)
559 * For the moment, therefore, these functions are merely the thinnest of
560 * wrappers around index_beginscan/index_getnext. The main reason for their
561 * existence is to centralize possible future support of lossy operators
565 systable_beginscan_ordered(Relation heapRelation
,
566 Relation indexRelation
,
568 int nkeys
, ScanKey key
)
573 /* REINDEX can probably be a hard error here ... */
574 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation
)))
576 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
577 errmsg("cannot access index \"%s\" while it is being reindexed",
578 RelationGetRelationName(indexRelation
))));
579 /* ... but we only throw a warning about violating IgnoreSystemIndexes */
580 if (IgnoreSystemIndexes
)
581 elog(WARNING
, "using index \"%s\" despite IgnoreSystemIndexes",
582 RelationGetRelationName(indexRelation
));
584 sysscan
= (SysScanDesc
) palloc(sizeof(SysScanDescData
));
586 sysscan
->heap_rel
= heapRelation
;
587 sysscan
->irel
= indexRelation
;
588 sysscan
->slot
= table_slot_create(heapRelation
, NULL
);
590 if (snapshot
== NULL
)
592 Oid relid
= RelationGetRelid(heapRelation
);
594 snapshot
= RegisterSnapshot(GetCatalogSnapshot(relid
));
595 sysscan
->snapshot
= snapshot
;
599 /* Caller is responsible for any snapshot. */
600 sysscan
->snapshot
= NULL
;
603 /* Change attribute numbers to be index column numbers. */
604 for (i
= 0; i
< nkeys
; i
++)
608 for (j
= 0; j
< IndexRelationGetNumberOfAttributes(indexRelation
); j
++)
610 if (key
[i
].sk_attno
== indexRelation
->rd_index
->indkey
.values
[j
])
612 key
[i
].sk_attno
= j
+ 1;
616 if (j
== IndexRelationGetNumberOfAttributes(indexRelation
))
617 elog(ERROR
, "column is not in index");
620 sysscan
->iscan
= index_beginscan(heapRelation
, indexRelation
,
622 index_rescan(sysscan
->iscan
, key
, nkeys
, NULL
, 0);
623 sysscan
->scan
= NULL
;
629 * systable_getnext_ordered --- get next tuple in an ordered catalog scan
632 systable_getnext_ordered(SysScanDesc sysscan
, ScanDirection direction
)
634 HeapTuple htup
= NULL
;
636 Assert(sysscan
->irel
);
637 if (index_getnext_slot(sysscan
->iscan
, direction
, sysscan
->slot
))
638 htup
= ExecFetchSlotHeapTuple(sysscan
->slot
, false, NULL
);
640 /* See notes in systable_getnext */
641 if (htup
&& sysscan
->iscan
->xs_recheck
)
642 elog(ERROR
, "system catalog scans with lossy index conditions are not implemented");
648 * systable_endscan_ordered --- close scan, release resources
651 systable_endscan_ordered(SysScanDesc sysscan
)
655 ExecDropSingleTupleTableSlot(sysscan
->slot
);
656 sysscan
->slot
= NULL
;
659 Assert(sysscan
->irel
);
660 index_endscan(sysscan
->iscan
);
661 if (sysscan
->snapshot
)
662 UnregisterSnapshot(sysscan
->snapshot
);