Update copyright for 2022
[pgsql.git] / src / backend / access / brin / brin_tuple.c
blobc0e2dbd23baad18e51982e41822f28712ebd9af2
1 /*
2 * brin_tuple.c
3 * Method implementations for tuples in BRIN indexes.
5 * Intended usage is that code outside this file only deals with
6 * BrinMemTuples, and convert to and from the on-disk representation through
7 * functions in this file.
9 * NOTES
11 * A BRIN tuple is similar to a heap tuple, with a few key differences. The
12 * first interesting difference is that the tuple header is much simpler, only
13 * containing its total length and a small area for flags. Also, the stored
14 * data does not match the relation tuple descriptor exactly: for each
15 * attribute in the descriptor, the index tuple carries an arbitrary number
16 * of values, depending on the opclass.
18 * Also, for each column of the index relation there are two null bits: one
19 * (hasnulls) stores whether any tuple within the page range has that column
20 * set to null; the other one (allnulls) stores whether the column values are
21 * all null. If allnulls is true, then the tuple data area does not contain
22 * values for that column at all; whereas it does if the hasnulls is set.
23 * Note the size of the null bitmask may not be the same as that of the
24 * datum array.
26 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
29 * IDENTIFICATION
30 * src/backend/access/brin/brin_tuple.c
32 #include "postgres.h"
34 #include "access/brin_tuple.h"
35 #include "access/detoast.h"
36 #include "access/heaptoast.h"
37 #include "access/htup_details.h"
38 #include "access/toast_internals.h"
39 #include "access/tupdesc.h"
40 #include "access/tupmacs.h"
41 #include "utils/datum.h"
42 #include "utils/memutils.h"
46 * This enables de-toasting of index entries. Needed until VACUUM is
47 * smart enough to rebuild indexes from scratch.
49 #define TOAST_INDEX_HACK
52 static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
53 char *tp, bits8 *nullbits, bool nulls,
54 Datum *values, bool *allnulls, bool *hasnulls);
58 * Return a tuple descriptor used for on-disk storage of BRIN tuples.
60 static TupleDesc
61 brtuple_disk_tupdesc(BrinDesc *brdesc)
63 /* We cache these in the BrinDesc */
64 if (brdesc->bd_disktdesc == NULL)
66 int i;
67 int j;
68 AttrNumber attno = 1;
69 TupleDesc tupdesc;
70 MemoryContext oldcxt;
72 /* make sure it's in the bdesc's context */
73 oldcxt = MemoryContextSwitchTo(brdesc->bd_context);
75 tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored);
77 for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
79 for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++)
80 TupleDescInitEntry(tupdesc, attno++, NULL,
81 brdesc->bd_info[i]->oi_typcache[j]->type_id,
82 -1, 0);
85 MemoryContextSwitchTo(oldcxt);
87 brdesc->bd_disktdesc = tupdesc;
90 return brdesc->bd_disktdesc;
94 * Generate a new on-disk tuple to be inserted in a BRIN index.
96 * See brin_form_placeholder_tuple if you touch this.
98 BrinTuple *
99 brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
100 Size *size)
102 Datum *values;
103 bool *nulls;
104 bool anynulls = false;
105 BrinTuple *rettuple;
106 int keyno;
107 int idxattno;
108 uint16 phony_infomask = 0;
109 bits8 *phony_nullbitmap;
110 Size len,
111 hoff,
112 data_len;
113 int i;
115 #ifdef TOAST_INDEX_HACK
116 Datum *untoasted_values;
117 int nuntoasted = 0;
118 #endif
120 Assert(brdesc->bd_totalstored > 0);
122 values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
123 nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored);
124 phony_nullbitmap = (bits8 *)
125 palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
127 #ifdef TOAST_INDEX_HACK
128 untoasted_values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
129 #endif
132 * Set up the values/nulls arrays for heap_fill_tuple
134 idxattno = 0;
135 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
137 int datumno;
140 * "allnulls" is set when there's no nonnull value in any row in the
141 * column; when this happens, there is no data to store. Thus set the
142 * nullable bits for all data elements of this column and we're done.
144 if (tuple->bt_columns[keyno].bv_allnulls)
146 for (datumno = 0;
147 datumno < brdesc->bd_info[keyno]->oi_nstored;
148 datumno++)
149 nulls[idxattno++] = true;
150 anynulls = true;
151 continue;
155 * The "hasnulls" bit is set when there are some null values in the
156 * data. We still need to store a real value, but the presence of
157 * this means we need a null bitmap.
159 if (tuple->bt_columns[keyno].bv_hasnulls)
160 anynulls = true;
162 /* If needed, serialize the values before forming the on-disk tuple. */
163 if (tuple->bt_columns[keyno].bv_serialize)
165 tuple->bt_columns[keyno].bv_serialize(brdesc,
166 tuple->bt_columns[keyno].bv_mem_value,
167 tuple->bt_columns[keyno].bv_values);
171 * Now obtain the values of each stored datum. Note that some values
172 * might be toasted, and we cannot rely on the original heap values
173 * sticking around forever, so we must detoast them. Also try to
174 * compress them.
176 for (datumno = 0;
177 datumno < brdesc->bd_info[keyno]->oi_nstored;
178 datumno++)
180 Datum value = tuple->bt_columns[keyno].bv_values[datumno];
182 #ifdef TOAST_INDEX_HACK
184 /* We must look at the stored type, not at the index descriptor. */
185 TypeCacheEntry *atttype = brdesc->bd_info[keyno]->oi_typcache[datumno];
187 /* Do we need to free the value at the end? */
188 bool free_value = false;
190 /* For non-varlena types we don't need to do anything special */
191 if (atttype->typlen != -1)
193 values[idxattno++] = value;
194 continue;
198 * Do nothing if value is not of varlena type. We don't need to
199 * care about NULL values here, thanks to bv_allnulls above.
201 * If value is stored EXTERNAL, must fetch it so we are not
202 * depending on outside storage.
204 * XXX Is this actually true? Could it be that the summary is NULL
205 * even for range with non-NULL data? E.g. degenerate bloom filter
206 * may be thrown away, etc.
208 if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
210 value = PointerGetDatum(detoast_external_attr((struct varlena *)
211 DatumGetPointer(value)));
212 free_value = true;
216 * If value is above size target, and is of a compressible
217 * datatype, try to compress it in-line.
219 if (!VARATT_IS_EXTENDED(DatumGetPointer(value)) &&
220 VARSIZE(DatumGetPointer(value)) > TOAST_INDEX_TARGET &&
221 (atttype->typstorage == TYPSTORAGE_EXTENDED ||
222 atttype->typstorage == TYPSTORAGE_MAIN))
224 Datum cvalue;
225 char compression;
226 Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc,
227 keyno);
230 * If the BRIN summary and indexed attribute use the same data
231 * type and it has a valid compression method, we can use the
232 * same compression method. Otherwise we have to use the
233 * default method.
235 if (att->atttypid == atttype->type_id)
236 compression = att->attcompression;
237 else
238 compression = InvalidCompressionMethod;
240 cvalue = toast_compress_datum(value, compression);
242 if (DatumGetPointer(cvalue) != NULL)
244 /* successful compression */
245 if (free_value)
246 pfree(DatumGetPointer(value));
248 value = cvalue;
249 free_value = true;
254 * If we untoasted / compressed the value, we need to free it
255 * after forming the index tuple.
257 if (free_value)
258 untoasted_values[nuntoasted++] = value;
260 #endif
262 values[idxattno++] = value;
266 /* Assert we did not overrun temp arrays */
267 Assert(idxattno <= brdesc->bd_totalstored);
269 /* compute total space needed */
270 len = SizeOfBrinTuple;
271 if (anynulls)
274 * We need a double-length bitmap on an on-disk BRIN index tuple; the
275 * first half stores the "allnulls" bits, the second stores
276 * "hasnulls".
278 len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
281 len = hoff = MAXALIGN(len);
283 data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc),
284 values, nulls);
285 len += data_len;
287 len = MAXALIGN(len);
289 rettuple = palloc0(len);
290 rettuple->bt_blkno = blkno;
291 rettuple->bt_info = hoff;
293 /* Assert that hoff fits in the space available */
294 Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff);
297 * The infomask and null bitmap as computed by heap_fill_tuple are useless
298 * to us. However, that function will not accept a null infomask; and we
299 * need to pass a valid null bitmap so that it will correctly skip
300 * outputting null attributes in the data area.
302 heap_fill_tuple(brtuple_disk_tupdesc(brdesc),
303 values,
304 nulls,
305 (char *) rettuple + hoff,
306 data_len,
307 &phony_infomask,
308 phony_nullbitmap);
310 /* done with these */
311 pfree(values);
312 pfree(nulls);
313 pfree(phony_nullbitmap);
315 #ifdef TOAST_INDEX_HACK
316 for (i = 0; i < nuntoasted; i++)
317 pfree(DatumGetPointer(untoasted_values[i]));
318 #endif
321 * Now fill in the real null bitmasks. allnulls first.
323 if (anynulls)
325 bits8 *bitP;
326 int bitmask;
328 rettuple->bt_info |= BRIN_NULLS_MASK;
331 * Note that we reverse the sense of null bits in this module: we
332 * store a 1 for a null attribute rather than a 0. So we must reverse
333 * the sense of the att_isnull test in brin_deconstruct_tuple as well.
335 bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
336 bitmask = HIGHBIT;
337 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
339 if (bitmask != HIGHBIT)
340 bitmask <<= 1;
341 else
343 bitP += 1;
344 *bitP = 0x0;
345 bitmask = 1;
348 if (!tuple->bt_columns[keyno].bv_allnulls)
349 continue;
351 *bitP |= bitmask;
353 /* hasnulls bits follow */
354 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
356 if (bitmask != HIGHBIT)
357 bitmask <<= 1;
358 else
360 bitP += 1;
361 *bitP = 0x0;
362 bitmask = 1;
365 if (!tuple->bt_columns[keyno].bv_hasnulls)
366 continue;
368 *bitP |= bitmask;
372 if (tuple->bt_placeholder)
373 rettuple->bt_info |= BRIN_PLACEHOLDER_MASK;
375 *size = len;
376 return rettuple;
380 * Generate a new on-disk tuple with no data values, marked as placeholder.
382 * This is a cut-down version of brin_form_tuple.
384 BrinTuple *
385 brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
387 Size len;
388 Size hoff;
389 BrinTuple *rettuple;
390 int keyno;
391 bits8 *bitP;
392 int bitmask;
394 /* compute total space needed: always add nulls */
395 len = SizeOfBrinTuple;
396 len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
397 len = hoff = MAXALIGN(len);
399 rettuple = palloc0(len);
400 rettuple->bt_blkno = blkno;
401 rettuple->bt_info = hoff;
402 rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK;
404 bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
405 bitmask = HIGHBIT;
406 /* set allnulls true for all attributes */
407 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
409 if (bitmask != HIGHBIT)
410 bitmask <<= 1;
411 else
413 bitP += 1;
414 *bitP = 0x0;
415 bitmask = 1;
418 *bitP |= bitmask;
420 /* no need to set hasnulls */
422 *size = len;
423 return rettuple;
427 * Free a tuple created by brin_form_tuple
429 void
430 brin_free_tuple(BrinTuple *tuple)
432 pfree(tuple);
436 * Given a brin tuple of size len, create a copy of it. If 'dest' is not
437 * NULL, its size is destsz, and can be used as output buffer; if the tuple
438 * to be copied does not fit, it is enlarged by repalloc, and the size is
439 * updated to match. This avoids palloc/free cycles when many brin tuples
440 * are being processed in loops.
442 BrinTuple *
443 brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
445 if (!destsz || *destsz == 0)
446 dest = palloc(len);
447 else if (len > *destsz)
449 dest = repalloc(dest, len);
450 *destsz = len;
453 memcpy(dest, tuple, len);
455 return dest;
459 * Return whether two BrinTuples are bitwise identical.
461 bool
462 brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
464 if (alen != blen)
465 return false;
466 if (memcmp(a, b, alen) != 0)
467 return false;
468 return true;
472 * Create a new BrinMemTuple from scratch, and initialize it to an empty
473 * state.
475 * Note: we don't provide any means to free a deformed tuple, so make sure to
476 * use a temporary memory context.
478 BrinMemTuple *
479 brin_new_memtuple(BrinDesc *brdesc)
481 BrinMemTuple *dtup;
482 long basesize;
484 basesize = MAXALIGN(sizeof(BrinMemTuple) +
485 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
486 dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
488 dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
489 dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
490 dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
492 dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
493 "brin dtuple",
494 ALLOCSET_DEFAULT_SIZES);
496 brin_memtuple_initialize(dtup, brdesc);
498 return dtup;
502 * Reset a BrinMemTuple to initial state. We return the same tuple, for
503 * notational convenience.
505 BrinMemTuple *
506 brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
508 int i;
509 char *currdatum;
511 MemoryContextReset(dtuple->bt_context);
513 currdatum = (char *) dtuple +
514 MAXALIGN(sizeof(BrinMemTuple) +
515 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
516 for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
518 dtuple->bt_columns[i].bv_attno = i + 1;
519 dtuple->bt_columns[i].bv_allnulls = true;
520 dtuple->bt_columns[i].bv_hasnulls = false;
521 dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
523 dtuple->bt_columns[i].bv_mem_value = PointerGetDatum(NULL);
524 dtuple->bt_columns[i].bv_serialize = NULL;
525 dtuple->bt_columns[i].bv_context = dtuple->bt_context;
527 currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
530 return dtuple;
534 * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
535 * brin_form_tuple.
537 * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
538 * This avoids having to allocate it here, which can be useful when this
539 * function is called many times in a loop. It is caller's responsibility
540 * that the given BrinMemTuple matches what we need here.
542 * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
543 * deconstruct the tuple from the on-disk format.
545 BrinMemTuple *
546 brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
548 BrinMemTuple *dtup;
549 Datum *values;
550 bool *allnulls;
551 bool *hasnulls;
552 char *tp;
553 bits8 *nullbits;
554 int keyno;
555 int valueno;
556 MemoryContext oldcxt;
558 dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
559 brin_new_memtuple(brdesc);
561 if (BrinTupleIsPlaceholder(tuple))
562 dtup->bt_placeholder = true;
563 dtup->bt_blkno = tuple->bt_blkno;
565 values = dtup->bt_values;
566 allnulls = dtup->bt_allnulls;
567 hasnulls = dtup->bt_hasnulls;
569 tp = (char *) tuple + BrinTupleDataOffset(tuple);
571 if (BrinTupleHasNulls(tuple))
572 nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple);
573 else
574 nullbits = NULL;
575 brin_deconstruct_tuple(brdesc,
576 tp, nullbits, BrinTupleHasNulls(tuple),
577 values, allnulls, hasnulls);
580 * Iterate to assign each of the values to the corresponding item in the
581 * values array of each column. The copies occur in the tuple's context.
583 oldcxt = MemoryContextSwitchTo(dtup->bt_context);
584 for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
586 int i;
588 if (allnulls[keyno])
590 valueno += brdesc->bd_info[keyno]->oi_nstored;
591 continue;
595 * We would like to skip datumCopy'ing the values datum in some cases,
596 * caller permitting ...
598 for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++)
599 dtup->bt_columns[keyno].bv_values[i] =
600 datumCopy(values[valueno++],
601 brdesc->bd_info[keyno]->oi_typcache[i]->typbyval,
602 brdesc->bd_info[keyno]->oi_typcache[i]->typlen);
604 dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
605 dtup->bt_columns[keyno].bv_allnulls = false;
607 dtup->bt_columns[keyno].bv_mem_value = PointerGetDatum(NULL);
608 dtup->bt_columns[keyno].bv_serialize = NULL;
609 dtup->bt_columns[keyno].bv_context = dtup->bt_context;
612 MemoryContextSwitchTo(oldcxt);
614 return dtup;
618 * brin_deconstruct_tuple
619 * Guts of attribute extraction from an on-disk BRIN tuple.
621 * Its arguments are:
622 * brdesc BRIN descriptor for the stored tuple
623 * tp pointer to the tuple data area
624 * nullbits pointer to the tuple nulls bitmask
625 * nulls "has nulls" bit in tuple infomask
626 * values output values, array of size brdesc->bd_totalstored
627 * allnulls output "allnulls", size brdesc->bd_tupdesc->natts
628 * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts
630 * Output arrays must have been allocated by caller.
632 static inline void
633 brin_deconstruct_tuple(BrinDesc *brdesc,
634 char *tp, bits8 *nullbits, bool nulls,
635 Datum *values, bool *allnulls, bool *hasnulls)
637 int attnum;
638 int stored;
639 TupleDesc diskdsc;
640 long off;
643 * First iterate to natts to obtain both null flags for each attribute.
644 * Note that we reverse the sense of the att_isnull test, because we store
645 * 1 for a null value (rather than a 1 for a not null value as is the
646 * att_isnull convention used elsewhere.) See brin_form_tuple.
648 for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
651 * the "all nulls" bit means that all values in the page range for
652 * this column are nulls. Therefore there are no values in the tuple
653 * data area.
655 allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
658 * the "has nulls" bit means that some tuples have nulls, but others
659 * have not-null values. Therefore we know the tuple contains data
660 * for this column.
662 * The hasnulls bits follow the allnulls bits in the same bitmask.
664 hasnulls[attnum] =
665 nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits);
669 * Iterate to obtain each attribute's stored values. Note that since we
670 * may reuse attribute entries for more than one column, we cannot cache
671 * offsets here.
673 diskdsc = brtuple_disk_tupdesc(brdesc);
674 stored = 0;
675 off = 0;
676 for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
678 int datumno;
680 if (allnulls[attnum])
682 stored += brdesc->bd_info[attnum]->oi_nstored;
683 continue;
686 for (datumno = 0;
687 datumno < brdesc->bd_info[attnum]->oi_nstored;
688 datumno++)
690 Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored);
692 if (thisatt->attlen == -1)
694 off = att_align_pointer(off, thisatt->attalign, -1,
695 tp + off);
697 else
699 /* not varlena, so safe to use att_align_nominal */
700 off = att_align_nominal(off, thisatt->attalign);
703 values[stored++] = fetchatt(thisatt, tp + off);
705 off = att_addlength_pointer(off, thisatt->attlen, tp + off);