Update copyright for 2022
[pgsql.git] / src / backend / access / gin / ginutil.c
blob3d15701a01e167b3f7120670fd66788183e5330c
1 /*-------------------------------------------------------------------------
3 * ginutil.c
4 * Utility routines for the Postgres inverted index access method.
7 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/access/gin/ginutil.c
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "access/gin_private.h"
18 #include "access/ginxlog.h"
19 #include "access/reloptions.h"
20 #include "access/xloginsert.h"
21 #include "catalog/pg_collation.h"
22 #include "catalog/pg_type.h"
23 #include "commands/vacuum.h"
24 #include "miscadmin.h"
25 #include "storage/indexfsm.h"
26 #include "storage/lmgr.h"
27 #include "storage/predicate.h"
28 #include "utils/builtins.h"
29 #include "utils/index_selfuncs.h"
30 #include "utils/typcache.h"
34 * GIN handler function: return IndexAmRoutine with access method parameters
35 * and callbacks.
37 Datum
38 ginhandler(PG_FUNCTION_ARGS)
40 IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
42 amroutine->amstrategies = 0;
43 amroutine->amsupport = GINNProcs;
44 amroutine->amoptsprocnum = GIN_OPTIONS_PROC;
45 amroutine->amcanorder = false;
46 amroutine->amcanorderbyop = false;
47 amroutine->amcanbackward = false;
48 amroutine->amcanunique = false;
49 amroutine->amcanmulticol = true;
50 amroutine->amoptionalkey = true;
51 amroutine->amsearcharray = false;
52 amroutine->amsearchnulls = false;
53 amroutine->amstorage = true;
54 amroutine->amclusterable = false;
55 amroutine->ampredlocks = true;
56 amroutine->amcanparallel = false;
57 amroutine->amcaninclude = false;
58 amroutine->amusemaintenanceworkmem = true;
59 amroutine->amhotblocking = true;
60 amroutine->amparallelvacuumoptions =
61 VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
62 amroutine->amkeytype = InvalidOid;
64 amroutine->ambuild = ginbuild;
65 amroutine->ambuildempty = ginbuildempty;
66 amroutine->aminsert = gininsert;
67 amroutine->ambulkdelete = ginbulkdelete;
68 amroutine->amvacuumcleanup = ginvacuumcleanup;
69 amroutine->amcanreturn = NULL;
70 amroutine->amcostestimate = gincostestimate;
71 amroutine->amoptions = ginoptions;
72 amroutine->amproperty = NULL;
73 amroutine->ambuildphasename = NULL;
74 amroutine->amvalidate = ginvalidate;
75 amroutine->amadjustmembers = ginadjustmembers;
76 amroutine->ambeginscan = ginbeginscan;
77 amroutine->amrescan = ginrescan;
78 amroutine->amgettuple = NULL;
79 amroutine->amgetbitmap = gingetbitmap;
80 amroutine->amendscan = ginendscan;
81 amroutine->ammarkpos = NULL;
82 amroutine->amrestrpos = NULL;
83 amroutine->amestimateparallelscan = NULL;
84 amroutine->aminitparallelscan = NULL;
85 amroutine->amparallelrescan = NULL;
87 PG_RETURN_POINTER(amroutine);
91 * initGinState: fill in an empty GinState struct to describe the index
93 * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
95 void
96 initGinState(GinState *state, Relation index)
98 TupleDesc origTupdesc = RelationGetDescr(index);
99 int i;
101 MemSet(state, 0, sizeof(GinState));
103 state->index = index;
104 state->oneCol = (origTupdesc->natts == 1);
105 state->origTupdesc = origTupdesc;
107 for (i = 0; i < origTupdesc->natts; i++)
109 Form_pg_attribute attr = TupleDescAttr(origTupdesc, i);
111 if (state->oneCol)
112 state->tupdesc[i] = state->origTupdesc;
113 else
115 state->tupdesc[i] = CreateTemplateTupleDesc(2);
117 TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
118 INT2OID, -1, 0);
119 TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
120 attr->atttypid,
121 attr->atttypmod,
122 attr->attndims);
123 TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
124 attr->attcollation);
128 * If the compare proc isn't specified in the opclass definition, look
129 * up the index key type's default btree comparator.
131 if (index_getprocid(index, i + 1, GIN_COMPARE_PROC) != InvalidOid)
133 fmgr_info_copy(&(state->compareFn[i]),
134 index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
135 CurrentMemoryContext);
137 else
139 TypeCacheEntry *typentry;
141 typentry = lookup_type_cache(attr->atttypid,
142 TYPECACHE_CMP_PROC_FINFO);
143 if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
144 ereport(ERROR,
145 (errcode(ERRCODE_UNDEFINED_FUNCTION),
146 errmsg("could not identify a comparison function for type %s",
147 format_type_be(attr->atttypid))));
148 fmgr_info_copy(&(state->compareFn[i]),
149 &(typentry->cmp_proc_finfo),
150 CurrentMemoryContext);
153 /* Opclass must always provide extract procs */
154 fmgr_info_copy(&(state->extractValueFn[i]),
155 index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
156 CurrentMemoryContext);
157 fmgr_info_copy(&(state->extractQueryFn[i]),
158 index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
159 CurrentMemoryContext);
162 * Check opclass capability to do tri-state or binary logic consistent
163 * check.
165 if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid)
167 fmgr_info_copy(&(state->triConsistentFn[i]),
168 index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
169 CurrentMemoryContext);
172 if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid)
174 fmgr_info_copy(&(state->consistentFn[i]),
175 index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
176 CurrentMemoryContext);
179 if (state->consistentFn[i].fn_oid == InvalidOid &&
180 state->triConsistentFn[i].fn_oid == InvalidOid)
182 elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"",
183 GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC,
184 i + 1, RelationGetRelationName(index));
188 * Check opclass capability to do partial match.
190 if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
192 fmgr_info_copy(&(state->comparePartialFn[i]),
193 index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
194 CurrentMemoryContext);
195 state->canPartialMatch[i] = true;
197 else
199 state->canPartialMatch[i] = false;
203 * If the index column has a specified collation, we should honor that
204 * while doing comparisons. However, we may have a collatable storage
205 * type for a noncollatable indexed data type (for instance, hstore
206 * uses text index entries). If there's no index collation then
207 * specify default collation in case the support functions need
208 * collation. This is harmless if the support functions don't care
209 * about collation, so we just do it unconditionally. (We could
210 * alternatively call get_typcollation, but that seems like expensive
211 * overkill --- there aren't going to be any cases where a GIN storage
212 * type has a nondefault collation.)
214 if (OidIsValid(index->rd_indcollation[i]))
215 state->supportCollation[i] = index->rd_indcollation[i];
216 else
217 state->supportCollation[i] = DEFAULT_COLLATION_OID;
222 * Extract attribute (column) number of stored entry from GIN tuple
224 OffsetNumber
225 gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
227 OffsetNumber colN;
229 if (ginstate->oneCol)
231 /* column number is not stored explicitly */
232 colN = FirstOffsetNumber;
234 else
236 Datum res;
237 bool isnull;
240 * First attribute is always int16, so we can safely use any tuple
241 * descriptor to obtain first attribute of tuple
243 res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
244 &isnull);
245 Assert(!isnull);
247 colN = DatumGetUInt16(res);
248 Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
251 return colN;
255 * Extract stored datum (and possible null category) from GIN tuple
257 Datum
258 gintuple_get_key(GinState *ginstate, IndexTuple tuple,
259 GinNullCategory *category)
261 Datum res;
262 bool isnull;
264 if (ginstate->oneCol)
267 * Single column index doesn't store attribute numbers in tuples
269 res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
270 &isnull);
272 else
275 * Since the datum type depends on which index column it's from, we
276 * must be careful to use the right tuple descriptor here.
278 OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
280 res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
281 ginstate->tupdesc[colN - 1],
282 &isnull);
285 if (isnull)
286 *category = GinGetNullCategory(tuple, ginstate);
287 else
288 *category = GIN_CAT_NORM_KEY;
290 return res;
294 * Allocate a new page (either by recycling, or by extending the index file)
295 * The returned buffer is already pinned and exclusive-locked
296 * Caller is responsible for initializing the page by calling GinInitBuffer
298 Buffer
299 GinNewBuffer(Relation index)
301 Buffer buffer;
302 bool needLock;
304 /* First, try to get a page from FSM */
305 for (;;)
307 BlockNumber blkno = GetFreeIndexPage(index);
309 if (blkno == InvalidBlockNumber)
310 break;
312 buffer = ReadBuffer(index, blkno);
315 * We have to guard against the possibility that someone else already
316 * recycled this page; the buffer may be locked if so.
318 if (ConditionalLockBuffer(buffer))
320 if (GinPageIsRecyclable(BufferGetPage(buffer)))
321 return buffer; /* OK to use */
323 LockBuffer(buffer, GIN_UNLOCK);
326 /* Can't use it, so release buffer and try again */
327 ReleaseBuffer(buffer);
330 /* Must extend the file */
331 needLock = !RELATION_IS_LOCAL(index);
332 if (needLock)
333 LockRelationForExtension(index, ExclusiveLock);
335 buffer = ReadBuffer(index, P_NEW);
336 LockBuffer(buffer, GIN_EXCLUSIVE);
338 if (needLock)
339 UnlockRelationForExtension(index, ExclusiveLock);
341 return buffer;
344 void
345 GinInitPage(Page page, uint32 f, Size pageSize)
347 GinPageOpaque opaque;
349 PageInit(page, pageSize, sizeof(GinPageOpaqueData));
351 opaque = GinPageGetOpaque(page);
352 opaque->flags = f;
353 opaque->rightlink = InvalidBlockNumber;
356 void
357 GinInitBuffer(Buffer b, uint32 f)
359 GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
362 void
363 GinInitMetabuffer(Buffer b)
365 GinMetaPageData *metadata;
366 Page page = BufferGetPage(b);
368 GinInitPage(page, GIN_META, BufferGetPageSize(b));
370 metadata = GinPageGetMeta(page);
372 metadata->head = metadata->tail = InvalidBlockNumber;
373 metadata->tailFreeSize = 0;
374 metadata->nPendingPages = 0;
375 metadata->nPendingHeapTuples = 0;
376 metadata->nTotalPages = 0;
377 metadata->nEntryPages = 0;
378 metadata->nDataPages = 0;
379 metadata->nEntries = 0;
380 metadata->ginVersion = GIN_CURRENT_VERSION;
383 * Set pd_lower just past the end of the metadata. This is essential,
384 * because without doing so, metadata will be lost if xlog.c compresses
385 * the page.
387 ((PageHeader) page)->pd_lower =
388 ((char *) metadata + sizeof(GinMetaPageData)) - (char *) page;
392 * Compare two keys of the same index column
395 ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
396 Datum a, GinNullCategory categorya,
397 Datum b, GinNullCategory categoryb)
399 /* if not of same null category, sort by that first */
400 if (categorya != categoryb)
401 return (categorya < categoryb) ? -1 : 1;
403 /* all null items in same category are equal */
404 if (categorya != GIN_CAT_NORM_KEY)
405 return 0;
407 /* both not null, so safe to call the compareFn */
408 return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
409 ginstate->supportCollation[attnum - 1],
410 a, b));
414 * Compare two keys of possibly different index columns
417 ginCompareAttEntries(GinState *ginstate,
418 OffsetNumber attnuma, Datum a, GinNullCategory categorya,
419 OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
421 /* attribute number is the first sort key */
422 if (attnuma != attnumb)
423 return (attnuma < attnumb) ? -1 : 1;
425 return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
430 * Support for sorting key datums in ginExtractEntries
432 * Note: we only have to worry about null and not-null keys here;
433 * ginExtractEntries never generates more than one placeholder null,
434 * so it doesn't have to sort those.
436 typedef struct
438 Datum datum;
439 bool isnull;
440 } keyEntryData;
442 typedef struct
444 FmgrInfo *cmpDatumFunc;
445 Oid collation;
446 bool haveDups;
447 } cmpEntriesArg;
449 static int
450 cmpEntries(const void *a, const void *b, void *arg)
452 const keyEntryData *aa = (const keyEntryData *) a;
453 const keyEntryData *bb = (const keyEntryData *) b;
454 cmpEntriesArg *data = (cmpEntriesArg *) arg;
455 int res;
457 if (aa->isnull)
459 if (bb->isnull)
460 res = 0; /* NULL "=" NULL */
461 else
462 res = 1; /* NULL ">" not-NULL */
464 else if (bb->isnull)
465 res = -1; /* not-NULL "<" NULL */
466 else
467 res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
468 data->collation,
469 aa->datum, bb->datum));
472 * Detect if we have any duplicates. If there are equal keys, qsort must
473 * compare them at some point, else it wouldn't know whether one should go
474 * before or after the other.
476 if (res == 0)
477 data->haveDups = true;
479 return res;
484 * Extract the index key values from an indexable item
486 * The resulting key values are sorted, and any duplicates are removed.
487 * This avoids generating redundant index entries.
489 Datum *
490 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
491 Datum value, bool isNull,
492 int32 *nentries, GinNullCategory **categories)
494 Datum *entries;
495 bool *nullFlags;
496 int32 i;
499 * We don't call the extractValueFn on a null item. Instead generate a
500 * placeholder.
502 if (isNull)
504 *nentries = 1;
505 entries = (Datum *) palloc(sizeof(Datum));
506 entries[0] = (Datum) 0;
507 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
508 (*categories)[0] = GIN_CAT_NULL_ITEM;
509 return entries;
512 /* OK, call the opclass's extractValueFn */
513 nullFlags = NULL; /* in case extractValue doesn't set it */
514 entries = (Datum *)
515 DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
516 ginstate->supportCollation[attnum - 1],
517 value,
518 PointerGetDatum(nentries),
519 PointerGetDatum(&nullFlags)));
522 * Generate a placeholder if the item contained no keys.
524 if (entries == NULL || *nentries <= 0)
526 *nentries = 1;
527 entries = (Datum *) palloc(sizeof(Datum));
528 entries[0] = (Datum) 0;
529 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
530 (*categories)[0] = GIN_CAT_EMPTY_ITEM;
531 return entries;
535 * If the extractValueFn didn't create a nullFlags array, create one,
536 * assuming that everything's non-null.
538 if (nullFlags == NULL)
539 nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
542 * If there's more than one key, sort and unique-ify.
544 * XXX Using qsort here is notationally painful, and the overhead is
545 * pretty bad too. For small numbers of keys it'd likely be better to use
546 * a simple insertion sort.
548 if (*nentries > 1)
550 keyEntryData *keydata;
551 cmpEntriesArg arg;
553 keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
554 for (i = 0; i < *nentries; i++)
556 keydata[i].datum = entries[i];
557 keydata[i].isnull = nullFlags[i];
560 arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
561 arg.collation = ginstate->supportCollation[attnum - 1];
562 arg.haveDups = false;
563 qsort_arg(keydata, *nentries, sizeof(keyEntryData),
564 cmpEntries, (void *) &arg);
566 if (arg.haveDups)
568 /* there are duplicates, must get rid of 'em */
569 int32 j;
571 entries[0] = keydata[0].datum;
572 nullFlags[0] = keydata[0].isnull;
573 j = 1;
574 for (i = 1; i < *nentries; i++)
576 if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
578 entries[j] = keydata[i].datum;
579 nullFlags[j] = keydata[i].isnull;
580 j++;
583 *nentries = j;
585 else
587 /* easy, no duplicates */
588 for (i = 0; i < *nentries; i++)
590 entries[i] = keydata[i].datum;
591 nullFlags[i] = keydata[i].isnull;
595 pfree(keydata);
599 * Create GinNullCategory representation from nullFlags.
601 *categories = (GinNullCategory *) palloc0(*nentries * sizeof(GinNullCategory));
602 for (i = 0; i < *nentries; i++)
603 (*categories)[i] = (nullFlags[i] ? GIN_CAT_NULL_KEY : GIN_CAT_NORM_KEY);
605 return entries;
608 bytea *
609 ginoptions(Datum reloptions, bool validate)
611 static const relopt_parse_elt tab[] = {
612 {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)},
613 {"gin_pending_list_limit", RELOPT_TYPE_INT, offsetof(GinOptions,
614 pendingListCleanupSize)}
617 return (bytea *) build_reloptions(reloptions, validate,
618 RELOPT_KIND_GIN,
619 sizeof(GinOptions),
620 tab, lengthof(tab));
624 * Fetch index's statistical data into *stats
626 * Note: in the result, nPendingPages can be trusted to be up-to-date,
627 * as can ginVersion; but the other fields are as of the last VACUUM.
629 void
630 ginGetStats(Relation index, GinStatsData *stats)
632 Buffer metabuffer;
633 Page metapage;
634 GinMetaPageData *metadata;
636 metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
637 LockBuffer(metabuffer, GIN_SHARE);
638 metapage = BufferGetPage(metabuffer);
639 metadata = GinPageGetMeta(metapage);
641 stats->nPendingPages = metadata->nPendingPages;
642 stats->nTotalPages = metadata->nTotalPages;
643 stats->nEntryPages = metadata->nEntryPages;
644 stats->nDataPages = metadata->nDataPages;
645 stats->nEntries = metadata->nEntries;
646 stats->ginVersion = metadata->ginVersion;
648 UnlockReleaseBuffer(metabuffer);
652 * Write the given statistics to the index's metapage
654 * Note: nPendingPages and ginVersion are *not* copied over
656 void
657 ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
659 Buffer metabuffer;
660 Page metapage;
661 GinMetaPageData *metadata;
663 metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
664 LockBuffer(metabuffer, GIN_EXCLUSIVE);
665 metapage = BufferGetPage(metabuffer);
666 metadata = GinPageGetMeta(metapage);
668 START_CRIT_SECTION();
670 metadata->nTotalPages = stats->nTotalPages;
671 metadata->nEntryPages = stats->nEntryPages;
672 metadata->nDataPages = stats->nDataPages;
673 metadata->nEntries = stats->nEntries;
676 * Set pd_lower just past the end of the metadata. This is essential,
677 * because without doing so, metadata will be lost if xlog.c compresses
678 * the page. (We must do this here because pre-v11 versions of PG did not
679 * set the metapage's pd_lower correctly, so a pg_upgraded index might
680 * contain the wrong value.)
682 ((PageHeader) metapage)->pd_lower =
683 ((char *) metadata + sizeof(GinMetaPageData)) - (char *) metapage;
685 MarkBufferDirty(metabuffer);
687 if (RelationNeedsWAL(index) && !is_build)
689 XLogRecPtr recptr;
690 ginxlogUpdateMeta data;
692 data.node = index->rd_node;
693 data.ntuples = 0;
694 data.newRightlink = data.prevTail = InvalidBlockNumber;
695 memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
697 XLogBeginInsert();
698 XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
699 XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
701 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
702 PageSetLSN(metapage, recptr);
705 UnlockReleaseBuffer(metabuffer);
707 END_CRIT_SECTION();