3 * Method implementations for tuples in BRIN indexes.
5 * Intended usage is that code outside this file only deals with
6 * BrinMemTuples, and convert to and from the on-disk representation through
7 * functions in this file.
11 * A BRIN tuple is similar to a heap tuple, with a few key differences. The
12 * first interesting difference is that the tuple header is much simpler, only
13 * containing its total length and a small area for flags. Also, the stored
14 * data does not match the relation tuple descriptor exactly: for each
15 * attribute in the descriptor, the index tuple carries an arbitrary number
16 * of values, depending on the opclass.
18 * Also, for each column of the index relation there are two null bits: one
19 * (hasnulls) stores whether any tuple within the page range has that column
20 * set to null; the other one (allnulls) stores whether the column values are
21 * all null. If allnulls is true, then the tuple data area does not contain
22 * values for that column at all; whereas it does if the hasnulls is set.
23 * Note the size of the null bitmask may not be the same as that of the
26 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
30 * src/backend/access/brin/brin_tuple.c
34 #include "access/brin_tuple.h"
35 #include "access/detoast.h"
36 #include "access/heaptoast.h"
37 #include "access/htup_details.h"
38 #include "access/toast_internals.h"
39 #include "access/tupdesc.h"
40 #include "access/tupmacs.h"
41 #include "utils/datum.h"
42 #include "utils/memutils.h"
46 * This enables de-toasting of index entries. Needed until VACUUM is
47 * smart enough to rebuild indexes from scratch.
49 #define TOAST_INDEX_HACK
52 static inline void brin_deconstruct_tuple(BrinDesc
*brdesc
,
53 char *tp
, bits8
*nullbits
, bool nulls
,
54 Datum
*values
, bool *allnulls
, bool *hasnulls
);
58 * Return a tuple descriptor used for on-disk storage of BRIN tuples.
61 brtuple_disk_tupdesc(BrinDesc
*brdesc
)
63 /* We cache these in the BrinDesc */
64 if (brdesc
->bd_disktdesc
== NULL
)
72 /* make sure it's in the bdesc's context */
73 oldcxt
= MemoryContextSwitchTo(brdesc
->bd_context
);
75 tupdesc
= CreateTemplateTupleDesc(brdesc
->bd_totalstored
);
77 for (i
= 0; i
< brdesc
->bd_tupdesc
->natts
; i
++)
79 for (j
= 0; j
< brdesc
->bd_info
[i
]->oi_nstored
; j
++)
80 TupleDescInitEntry(tupdesc
, attno
++, NULL
,
81 brdesc
->bd_info
[i
]->oi_typcache
[j
]->type_id
,
85 MemoryContextSwitchTo(oldcxt
);
87 brdesc
->bd_disktdesc
= tupdesc
;
90 return brdesc
->bd_disktdesc
;
94 * Generate a new on-disk tuple to be inserted in a BRIN index.
96 * See brin_form_placeholder_tuple if you touch this.
99 brin_form_tuple(BrinDesc
*brdesc
, BlockNumber blkno
, BrinMemTuple
*tuple
,
104 bool anynulls
= false;
108 uint16 phony_infomask
= 0;
109 bits8
*phony_nullbitmap
;
115 #ifdef TOAST_INDEX_HACK
116 Datum
*untoasted_values
;
120 Assert(brdesc
->bd_totalstored
> 0);
122 values
= (Datum
*) palloc(sizeof(Datum
) * brdesc
->bd_totalstored
);
123 nulls
= (bool *) palloc0(sizeof(bool) * brdesc
->bd_totalstored
);
124 phony_nullbitmap
= (bits8
*)
125 palloc(sizeof(bits8
) * BITMAPLEN(brdesc
->bd_totalstored
));
127 #ifdef TOAST_INDEX_HACK
128 untoasted_values
= (Datum
*) palloc(sizeof(Datum
) * brdesc
->bd_totalstored
);
132 * Set up the values/nulls arrays for heap_fill_tuple
135 for (keyno
= 0; keyno
< brdesc
->bd_tupdesc
->natts
; keyno
++)
140 * "allnulls" is set when there's no nonnull value in any row in the
141 * column; when this happens, there is no data to store. Thus set the
142 * nullable bits for all data elements of this column and we're done.
144 if (tuple
->bt_columns
[keyno
].bv_allnulls
)
147 datumno
< brdesc
->bd_info
[keyno
]->oi_nstored
;
149 nulls
[idxattno
++] = true;
155 * The "hasnulls" bit is set when there are some null values in the
156 * data. We still need to store a real value, but the presence of
157 * this means we need a null bitmap.
159 if (tuple
->bt_columns
[keyno
].bv_hasnulls
)
162 /* If needed, serialize the values before forming the on-disk tuple. */
163 if (tuple
->bt_columns
[keyno
].bv_serialize
)
165 tuple
->bt_columns
[keyno
].bv_serialize(brdesc
,
166 tuple
->bt_columns
[keyno
].bv_mem_value
,
167 tuple
->bt_columns
[keyno
].bv_values
);
171 * Now obtain the values of each stored datum. Note that some values
172 * might be toasted, and we cannot rely on the original heap values
173 * sticking around forever, so we must detoast them. Also try to
177 datumno
< brdesc
->bd_info
[keyno
]->oi_nstored
;
180 Datum value
= tuple
->bt_columns
[keyno
].bv_values
[datumno
];
182 #ifdef TOAST_INDEX_HACK
184 /* We must look at the stored type, not at the index descriptor. */
185 TypeCacheEntry
*atttype
= brdesc
->bd_info
[keyno
]->oi_typcache
[datumno
];
187 /* Do we need to free the value at the end? */
188 bool free_value
= false;
190 /* For non-varlena types we don't need to do anything special */
191 if (atttype
->typlen
!= -1)
193 values
[idxattno
++] = value
;
198 * Do nothing if value is not of varlena type. We don't need to
199 * care about NULL values here, thanks to bv_allnulls above.
201 * If value is stored EXTERNAL, must fetch it so we are not
202 * depending on outside storage.
204 * XXX Is this actually true? Could it be that the summary is NULL
205 * even for range with non-NULL data? E.g. degenerate bloom filter
206 * may be thrown away, etc.
208 if (VARATT_IS_EXTERNAL(DatumGetPointer(value
)))
210 value
= PointerGetDatum(detoast_external_attr((struct varlena
*)
211 DatumGetPointer(value
)));
216 * If value is above size target, and is of a compressible
217 * datatype, try to compress it in-line.
219 if (!VARATT_IS_EXTENDED(DatumGetPointer(value
)) &&
220 VARSIZE(DatumGetPointer(value
)) > TOAST_INDEX_TARGET
&&
221 (atttype
->typstorage
== TYPSTORAGE_EXTENDED
||
222 atttype
->typstorage
== TYPSTORAGE_MAIN
))
226 Form_pg_attribute att
= TupleDescAttr(brdesc
->bd_tupdesc
,
230 * If the BRIN summary and indexed attribute use the same data
231 * type and it has a valid compression method, we can use the
232 * same compression method. Otherwise we have to use the
235 if (att
->atttypid
== atttype
->type_id
)
236 compression
= att
->attcompression
;
238 compression
= InvalidCompressionMethod
;
240 cvalue
= toast_compress_datum(value
, compression
);
242 if (DatumGetPointer(cvalue
) != NULL
)
244 /* successful compression */
246 pfree(DatumGetPointer(value
));
254 * If we untoasted / compressed the value, we need to free it
255 * after forming the index tuple.
258 untoasted_values
[nuntoasted
++] = value
;
262 values
[idxattno
++] = value
;
266 /* Assert we did not overrun temp arrays */
267 Assert(idxattno
<= brdesc
->bd_totalstored
);
269 /* compute total space needed */
270 len
= SizeOfBrinTuple
;
274 * We need a double-length bitmap on an on-disk BRIN index tuple; the
275 * first half stores the "allnulls" bits, the second stores
278 len
+= BITMAPLEN(brdesc
->bd_tupdesc
->natts
* 2);
281 len
= hoff
= MAXALIGN(len
);
283 data_len
= heap_compute_data_size(brtuple_disk_tupdesc(brdesc
),
289 rettuple
= palloc0(len
);
290 rettuple
->bt_blkno
= blkno
;
291 rettuple
->bt_info
= hoff
;
293 /* Assert that hoff fits in the space available */
294 Assert((rettuple
->bt_info
& BRIN_OFFSET_MASK
) == hoff
);
297 * The infomask and null bitmap as computed by heap_fill_tuple are useless
298 * to us. However, that function will not accept a null infomask; and we
299 * need to pass a valid null bitmap so that it will correctly skip
300 * outputting null attributes in the data area.
302 heap_fill_tuple(brtuple_disk_tupdesc(brdesc
),
305 (char *) rettuple
+ hoff
,
310 /* done with these */
313 pfree(phony_nullbitmap
);
315 #ifdef TOAST_INDEX_HACK
316 for (i
= 0; i
< nuntoasted
; i
++)
317 pfree(DatumGetPointer(untoasted_values
[i
]));
321 * Now fill in the real null bitmasks. allnulls first.
328 rettuple
->bt_info
|= BRIN_NULLS_MASK
;
331 * Note that we reverse the sense of null bits in this module: we
332 * store a 1 for a null attribute rather than a 0. So we must reverse
333 * the sense of the att_isnull test in brin_deconstruct_tuple as well.
335 bitP
= ((bits8
*) ((char *) rettuple
+ SizeOfBrinTuple
)) - 1;
337 for (keyno
= 0; keyno
< brdesc
->bd_tupdesc
->natts
; keyno
++)
339 if (bitmask
!= HIGHBIT
)
348 if (!tuple
->bt_columns
[keyno
].bv_allnulls
)
353 /* hasnulls bits follow */
354 for (keyno
= 0; keyno
< brdesc
->bd_tupdesc
->natts
; keyno
++)
356 if (bitmask
!= HIGHBIT
)
365 if (!tuple
->bt_columns
[keyno
].bv_hasnulls
)
372 if (tuple
->bt_placeholder
)
373 rettuple
->bt_info
|= BRIN_PLACEHOLDER_MASK
;
380 * Generate a new on-disk tuple with no data values, marked as placeholder.
382 * This is a cut-down version of brin_form_tuple.
385 brin_form_placeholder_tuple(BrinDesc
*brdesc
, BlockNumber blkno
, Size
*size
)
394 /* compute total space needed: always add nulls */
395 len
= SizeOfBrinTuple
;
396 len
+= BITMAPLEN(brdesc
->bd_tupdesc
->natts
* 2);
397 len
= hoff
= MAXALIGN(len
);
399 rettuple
= palloc0(len
);
400 rettuple
->bt_blkno
= blkno
;
401 rettuple
->bt_info
= hoff
;
402 rettuple
->bt_info
|= BRIN_NULLS_MASK
| BRIN_PLACEHOLDER_MASK
;
404 bitP
= ((bits8
*) ((char *) rettuple
+ SizeOfBrinTuple
)) - 1;
406 /* set allnulls true for all attributes */
407 for (keyno
= 0; keyno
< brdesc
->bd_tupdesc
->natts
; keyno
++)
409 if (bitmask
!= HIGHBIT
)
420 /* no need to set hasnulls */
427 * Free a tuple created by brin_form_tuple
430 brin_free_tuple(BrinTuple
*tuple
)
436 * Given a brin tuple of size len, create a copy of it. If 'dest' is not
437 * NULL, its size is destsz, and can be used as output buffer; if the tuple
438 * to be copied does not fit, it is enlarged by repalloc, and the size is
439 * updated to match. This avoids palloc/free cycles when many brin tuples
440 * are being processed in loops.
443 brin_copy_tuple(BrinTuple
*tuple
, Size len
, BrinTuple
*dest
, Size
*destsz
)
445 if (!destsz
|| *destsz
== 0)
447 else if (len
> *destsz
)
449 dest
= repalloc(dest
, len
);
453 memcpy(dest
, tuple
, len
);
459 * Return whether two BrinTuples are bitwise identical.
462 brin_tuples_equal(const BrinTuple
*a
, Size alen
, const BrinTuple
*b
, Size blen
)
466 if (memcmp(a
, b
, alen
) != 0)
472 * Create a new BrinMemTuple from scratch, and initialize it to an empty
475 * Note: we don't provide any means to free a deformed tuple, so make sure to
476 * use a temporary memory context.
479 brin_new_memtuple(BrinDesc
*brdesc
)
484 basesize
= MAXALIGN(sizeof(BrinMemTuple
) +
485 sizeof(BrinValues
) * brdesc
->bd_tupdesc
->natts
);
486 dtup
= palloc0(basesize
+ sizeof(Datum
) * brdesc
->bd_totalstored
);
488 dtup
->bt_values
= palloc(sizeof(Datum
) * brdesc
->bd_totalstored
);
489 dtup
->bt_allnulls
= palloc(sizeof(bool) * brdesc
->bd_tupdesc
->natts
);
490 dtup
->bt_hasnulls
= palloc(sizeof(bool) * brdesc
->bd_tupdesc
->natts
);
492 dtup
->bt_context
= AllocSetContextCreate(CurrentMemoryContext
,
494 ALLOCSET_DEFAULT_SIZES
);
496 brin_memtuple_initialize(dtup
, brdesc
);
502 * Reset a BrinMemTuple to initial state. We return the same tuple, for
503 * notational convenience.
506 brin_memtuple_initialize(BrinMemTuple
*dtuple
, BrinDesc
*brdesc
)
511 MemoryContextReset(dtuple
->bt_context
);
513 currdatum
= (char *) dtuple
+
514 MAXALIGN(sizeof(BrinMemTuple
) +
515 sizeof(BrinValues
) * brdesc
->bd_tupdesc
->natts
);
516 for (i
= 0; i
< brdesc
->bd_tupdesc
->natts
; i
++)
518 dtuple
->bt_columns
[i
].bv_attno
= i
+ 1;
519 dtuple
->bt_columns
[i
].bv_allnulls
= true;
520 dtuple
->bt_columns
[i
].bv_hasnulls
= false;
521 dtuple
->bt_columns
[i
].bv_values
= (Datum
*) currdatum
;
523 dtuple
->bt_columns
[i
].bv_mem_value
= PointerGetDatum(NULL
);
524 dtuple
->bt_columns
[i
].bv_serialize
= NULL
;
525 dtuple
->bt_columns
[i
].bv_context
= dtuple
->bt_context
;
527 currdatum
+= sizeof(Datum
) * brdesc
->bd_info
[i
]->oi_nstored
;
534 * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
537 * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
538 * This avoids having to allocate it here, which can be useful when this
539 * function is called many times in a loop. It is caller's responsibility
540 * that the given BrinMemTuple matches what we need here.
542 * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
543 * deconstruct the tuple from the on-disk format.
546 brin_deform_tuple(BrinDesc
*brdesc
, BrinTuple
*tuple
, BrinMemTuple
*dMemtuple
)
556 MemoryContext oldcxt
;
558 dtup
= dMemtuple
? brin_memtuple_initialize(dMemtuple
, brdesc
) :
559 brin_new_memtuple(brdesc
);
561 if (BrinTupleIsPlaceholder(tuple
))
562 dtup
->bt_placeholder
= true;
563 dtup
->bt_blkno
= tuple
->bt_blkno
;
565 values
= dtup
->bt_values
;
566 allnulls
= dtup
->bt_allnulls
;
567 hasnulls
= dtup
->bt_hasnulls
;
569 tp
= (char *) tuple
+ BrinTupleDataOffset(tuple
);
571 if (BrinTupleHasNulls(tuple
))
572 nullbits
= (bits8
*) ((char *) tuple
+ SizeOfBrinTuple
);
575 brin_deconstruct_tuple(brdesc
,
576 tp
, nullbits
, BrinTupleHasNulls(tuple
),
577 values
, allnulls
, hasnulls
);
580 * Iterate to assign each of the values to the corresponding item in the
581 * values array of each column. The copies occur in the tuple's context.
583 oldcxt
= MemoryContextSwitchTo(dtup
->bt_context
);
584 for (valueno
= 0, keyno
= 0; keyno
< brdesc
->bd_tupdesc
->natts
; keyno
++)
590 valueno
+= brdesc
->bd_info
[keyno
]->oi_nstored
;
595 * We would like to skip datumCopy'ing the values datum in some cases,
596 * caller permitting ...
598 for (i
= 0; i
< brdesc
->bd_info
[keyno
]->oi_nstored
; i
++)
599 dtup
->bt_columns
[keyno
].bv_values
[i
] =
600 datumCopy(values
[valueno
++],
601 brdesc
->bd_info
[keyno
]->oi_typcache
[i
]->typbyval
,
602 brdesc
->bd_info
[keyno
]->oi_typcache
[i
]->typlen
);
604 dtup
->bt_columns
[keyno
].bv_hasnulls
= hasnulls
[keyno
];
605 dtup
->bt_columns
[keyno
].bv_allnulls
= false;
607 dtup
->bt_columns
[keyno
].bv_mem_value
= PointerGetDatum(NULL
);
608 dtup
->bt_columns
[keyno
].bv_serialize
= NULL
;
609 dtup
->bt_columns
[keyno
].bv_context
= dtup
->bt_context
;
612 MemoryContextSwitchTo(oldcxt
);
618 * brin_deconstruct_tuple
619 * Guts of attribute extraction from an on-disk BRIN tuple.
622 * brdesc BRIN descriptor for the stored tuple
623 * tp pointer to the tuple data area
624 * nullbits pointer to the tuple nulls bitmask
625 * nulls "has nulls" bit in tuple infomask
626 * values output values, array of size brdesc->bd_totalstored
627 * allnulls output "allnulls", size brdesc->bd_tupdesc->natts
628 * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts
630 * Output arrays must have been allocated by caller.
633 brin_deconstruct_tuple(BrinDesc
*brdesc
,
634 char *tp
, bits8
*nullbits
, bool nulls
,
635 Datum
*values
, bool *allnulls
, bool *hasnulls
)
643 * First iterate to natts to obtain both null flags for each attribute.
644 * Note that we reverse the sense of the att_isnull test, because we store
645 * 1 for a null value (rather than a 1 for a not null value as is the
646 * att_isnull convention used elsewhere.) See brin_form_tuple.
648 for (attnum
= 0; attnum
< brdesc
->bd_tupdesc
->natts
; attnum
++)
651 * the "all nulls" bit means that all values in the page range for
652 * this column are nulls. Therefore there are no values in the tuple
655 allnulls
[attnum
] = nulls
&& !att_isnull(attnum
, nullbits
);
658 * the "has nulls" bit means that some tuples have nulls, but others
659 * have not-null values. Therefore we know the tuple contains data
662 * The hasnulls bits follow the allnulls bits in the same bitmask.
665 nulls
&& !att_isnull(brdesc
->bd_tupdesc
->natts
+ attnum
, nullbits
);
669 * Iterate to obtain each attribute's stored values. Note that since we
670 * may reuse attribute entries for more than one column, we cannot cache
673 diskdsc
= brtuple_disk_tupdesc(brdesc
);
676 for (attnum
= 0; attnum
< brdesc
->bd_tupdesc
->natts
; attnum
++)
680 if (allnulls
[attnum
])
682 stored
+= brdesc
->bd_info
[attnum
]->oi_nstored
;
687 datumno
< brdesc
->bd_info
[attnum
]->oi_nstored
;
690 Form_pg_attribute thisatt
= TupleDescAttr(diskdsc
, stored
);
692 if (thisatt
->attlen
== -1)
694 off
= att_align_pointer(off
, thisatt
->attalign
, -1,
699 /* not varlena, so safe to use att_align_nominal */
700 off
= att_align_nominal(off
, thisatt
->attalign
);
703 values
[stored
++] = fetchatt(thisatt
, tp
+ off
);
705 off
= att_addlength_pointer(off
, thisatt
->attlen
, tp
+ off
);