1 /*-------------------------------------------------------------------------
4 * Functions to investigate heap pages
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
18 * Copyright (c) 2007-2022, PostgreSQL Global Development Group
21 * contrib/pageinspect/heapfuncs.c
23 *-------------------------------------------------------------------------
28 #include "access/htup_details.h"
29 #include "access/relation.h"
30 #include "catalog/pg_am_d.h"
31 #include "catalog/pg_type.h"
33 #include "mb/pg_wchar.h"
34 #include "miscadmin.h"
35 #include "pageinspect.h"
36 #include "port/pg_bitutils.h"
37 #include "utils/array.h"
38 #include "utils/builtins.h"
39 #include "utils/rel.h"
42 * It's not supported to create tuples with oids anymore, but when pg_upgrade
43 * was used to upgrade from an older version, tuples might still have an
44 * oid. Seems worthwhile to display that.
46 #define HeapTupleHeaderGetOidOld(tup) \
48 ((tup)->t_infomask & HEAP_HASOID_OLD) ? \
49 *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
58 * Converts a bits8-array of 'len' bits to a human-readable
59 * c-string representation.
62 bits_to_text(bits8
*bits
, int len
)
67 str
= palloc(len
+ 1);
69 for (i
= 0; i
< len
; i
++)
70 str
[i
] = (bits
[(i
/ 8)] & (1 << (i
% 8))) ? '1' : '0';
81 * Converts a c-string representation of bits into a bits8-array. This is
82 * the reverse operation of previous routine.
85 text_to_bits(char *str
, int len
)
91 bits
= palloc(len
+ 1);
98 if ((str
[off
] == '0') || (str
[off
] == '1'))
99 byte
= byte
| ((str
[off
] - '0') << off
% 8);
102 (errcode(ERRCODE_DATA_CORRUPTED
),
103 errmsg("invalid character \"%.*s\" in t_bits string",
104 pg_mblen(str
+ off
), str
+ off
)));
107 bits
[off
/ 8] = byte
;
118 * Allows inspection of line pointers and tuple headers of a heap page.
120 PG_FUNCTION_INFO_V1(heap_page_items
);
122 typedef struct heap_page_items_state
127 } heap_page_items_state
;
130 heap_page_items(PG_FUNCTION_ARGS
)
132 bytea
*raw_page
= PG_GETARG_BYTEA_P(0);
133 heap_page_items_state
*inter_call_data
= NULL
;
134 FuncCallContext
*fctx
;
139 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
140 errmsg("must be superuser to use raw page functions")));
142 raw_page_size
= VARSIZE(raw_page
) - VARHDRSZ
;
144 if (SRF_IS_FIRSTCALL())
149 if (raw_page_size
< SizeOfPageHeaderData
)
151 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
152 errmsg("input page too small (%d bytes)", raw_page_size
)));
154 fctx
= SRF_FIRSTCALL_INIT();
155 mctx
= MemoryContextSwitchTo(fctx
->multi_call_memory_ctx
);
157 inter_call_data
= palloc(sizeof(heap_page_items_state
));
159 /* Build a tuple descriptor for our result type */
160 if (get_call_result_type(fcinfo
, NULL
, &tupdesc
) != TYPEFUNC_COMPOSITE
)
161 elog(ERROR
, "return type must be a row type");
163 inter_call_data
->tupd
= tupdesc
;
165 inter_call_data
->offset
= FirstOffsetNumber
;
166 inter_call_data
->page
= VARDATA(raw_page
);
168 fctx
->max_calls
= PageGetMaxOffsetNumber(inter_call_data
->page
);
169 fctx
->user_fctx
= inter_call_data
;
171 MemoryContextSwitchTo(mctx
);
174 fctx
= SRF_PERCALL_SETUP();
175 inter_call_data
= fctx
->user_fctx
;
177 if (fctx
->call_cntr
< fctx
->max_calls
)
179 Page page
= inter_call_data
->page
;
180 HeapTuple resultTuple
;
189 memset(nulls
, 0, sizeof(nulls
));
191 /* Extract information from the line pointer */
193 id
= PageGetItemId(page
, inter_call_data
->offset
);
195 lp_offset
= ItemIdGetOffset(id
);
196 lp_flags
= ItemIdGetFlags(id
);
197 lp_len
= ItemIdGetLength(id
);
199 values
[0] = UInt16GetDatum(inter_call_data
->offset
);
200 values
[1] = UInt16GetDatum(lp_offset
);
201 values
[2] = UInt16GetDatum(lp_flags
);
202 values
[3] = UInt16GetDatum(lp_len
);
205 * We do just enough validity checking to make sure we don't reference
206 * data outside the page passed to us. The page could be corrupt in
207 * many other ways, but at least we won't crash.
209 if (ItemIdHasStorage(id
) &&
210 lp_len
>= MinHeapTupleSize
&&
211 lp_offset
== MAXALIGN(lp_offset
) &&
212 lp_offset
+ lp_len
<= raw_page_size
)
214 HeapTupleHeader tuphdr
;
215 bytea
*tuple_data_bytea
;
218 /* Extract information from the tuple header */
220 tuphdr
= (HeapTupleHeader
) PageGetItem(page
, id
);
222 values
[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr
));
223 values
[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr
));
224 /* shared with xvac */
225 values
[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr
));
226 values
[7] = PointerGetDatum(&tuphdr
->t_ctid
);
227 values
[8] = UInt32GetDatum(tuphdr
->t_infomask2
);
228 values
[9] = UInt32GetDatum(tuphdr
->t_infomask
);
229 values
[10] = UInt8GetDatum(tuphdr
->t_hoff
);
231 /* Copy raw tuple data into bytea attribute */
232 tuple_data_len
= lp_len
- tuphdr
->t_hoff
;
233 tuple_data_bytea
= (bytea
*) palloc(tuple_data_len
+ VARHDRSZ
);
234 SET_VARSIZE(tuple_data_bytea
, tuple_data_len
+ VARHDRSZ
);
235 memcpy(VARDATA(tuple_data_bytea
), (char *) tuphdr
+ tuphdr
->t_hoff
,
237 values
[13] = PointerGetDatum(tuple_data_bytea
);
240 * We already checked that the item is completely within the raw
241 * page passed to us, with the length given in the line pointer.
242 * Let's check that t_hoff doesn't point over lp_len, before using
243 * it to access t_bits and oid.
245 if (tuphdr
->t_hoff
>= SizeofHeapTupleHeader
&&
246 tuphdr
->t_hoff
<= lp_len
&&
247 tuphdr
->t_hoff
== MAXALIGN(tuphdr
->t_hoff
))
249 if (tuphdr
->t_infomask
& HEAP_HASNULL
)
254 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr
)) * BITS_PER_BYTE
;
255 values
[11] = CStringGetTextDatum(bits_to_text(tuphdr
->t_bits
, bits_len
));
260 if (tuphdr
->t_infomask
& HEAP_HASOID_OLD
)
261 values
[12] = HeapTupleHeaderGetOidOld(tuphdr
);
274 * The line pointer is not used, or it's invalid. Set the rest of
279 for (i
= 4; i
<= 13; i
++)
283 /* Build and return the result tuple. */
284 resultTuple
= heap_form_tuple(inter_call_data
->tupd
, values
, nulls
);
285 result
= HeapTupleGetDatum(resultTuple
);
287 inter_call_data
->offset
++;
289 SRF_RETURN_NEXT(fctx
, result
);
292 SRF_RETURN_DONE(fctx
);
296 * tuple_data_split_internal
298 * Split raw tuple data taken directly from a page into an array of bytea
299 * elements. This routine does a lookup on NULL values and creates array
300 * elements accordingly. This is a reimplementation of nocachegetattr()
301 * in heaptuple.c simplified for educational purposes.
304 tuple_data_split_internal(Oid relid
, char *tupdata
,
305 uint16 tupdata_len
, uint16 t_infomask
,
306 uint16 t_infomask2
, bits8
*t_bits
,
309 ArrayBuildState
*raw_attrs
;
316 /* Get tuple descriptor from relation OID */
317 rel
= relation_open(relid
, AccessShareLock
);
318 tupdesc
= RelationGetDescr(rel
);
320 raw_attrs
= initArrayResult(BYTEAOID
, CurrentMemoryContext
, false);
321 nattrs
= tupdesc
->natts
;
323 if (rel
->rd_rel
->relam
!= HEAP_TABLE_AM_OID
)
324 ereport(ERROR
, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
325 errmsg("only heap AM is supported")));
327 if (nattrs
< (t_infomask2
& HEAP_NATTS_MASK
))
329 (errcode(ERRCODE_DATA_CORRUPTED
),
330 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
332 for (i
= 0; i
< nattrs
; i
++)
334 Form_pg_attribute attr
;
336 bytea
*attr_data
= NULL
;
338 attr
= TupleDescAttr(tupdesc
, i
);
341 * Tuple header can specify fewer attributes than tuple descriptor as
342 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
343 * change tuples in pages, so attributes with numbers greater than
344 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
346 if (i
>= (t_infomask2
& HEAP_NATTS_MASK
))
349 is_null
= (t_infomask
& HEAP_HASNULL
) && att_isnull(i
, t_bits
);
355 if (attr
->attlen
== -1)
357 off
= att_align_pointer(off
, attr
->attalign
, -1,
361 * As VARSIZE_ANY throws an exception if it can't properly
362 * detect the type of external storage in macros VARTAG_SIZE,
363 * this check is repeated to have a nicer error handling.
365 if (VARATT_IS_EXTERNAL(tupdata
+ off
) &&
366 !VARATT_IS_EXTERNAL_ONDISK(tupdata
+ off
) &&
367 !VARATT_IS_EXTERNAL_INDIRECT(tupdata
+ off
))
369 (errcode(ERRCODE_DATA_CORRUPTED
),
370 errmsg("first byte of varlena attribute is incorrect for attribute %d", i
)));
372 len
= VARSIZE_ANY(tupdata
+ off
);
376 off
= att_align_nominal(off
, attr
->attalign
);
380 if (tupdata_len
< off
+ len
)
382 (errcode(ERRCODE_DATA_CORRUPTED
),
383 errmsg("unexpected end of tuple data")));
385 if (attr
->attlen
== -1 && do_detoast
)
386 attr_data
= DatumGetByteaPCopy(tupdata
+ off
);
389 attr_data
= (bytea
*) palloc(len
+ VARHDRSZ
);
390 SET_VARSIZE(attr_data
, len
+ VARHDRSZ
);
391 memcpy(VARDATA(attr_data
), tupdata
+ off
, len
);
394 off
= att_addlength_pointer(off
, attr
->attlen
,
398 raw_attrs
= accumArrayResult(raw_attrs
, PointerGetDatum(attr_data
),
399 is_null
, BYTEAOID
, CurrentMemoryContext
);
404 if (tupdata_len
!= off
)
406 (errcode(ERRCODE_DATA_CORRUPTED
),
407 errmsg("end of tuple reached without looking at all its data")));
409 relation_close(rel
, AccessShareLock
);
411 return makeArrayResult(raw_attrs
, CurrentMemoryContext
);
417 * Split raw tuple data taken directly from page into distinct elements
418 * taking into account null values.
420 PG_FUNCTION_INFO_V1(tuple_data_split
);
423 tuple_data_split(PG_FUNCTION_ARGS
)
430 bool do_detoast
= false;
431 bits8
*t_bits
= NULL
;
434 relid
= PG_GETARG_OID(0);
435 raw_data
= PG_ARGISNULL(1) ? NULL
: PG_GETARG_BYTEA_P(1);
436 t_infomask
= PG_GETARG_INT16(2);
437 t_infomask2
= PG_GETARG_INT16(3);
438 t_bits_str
= PG_ARGISNULL(4) ? NULL
:
439 text_to_cstring(PG_GETARG_TEXT_PP(4));
442 do_detoast
= PG_GETARG_BOOL(5);
446 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
447 errmsg("must be superuser to use raw page functions")));
453 * Convert t_bits string back to the bits8 array as represented in the
456 if (t_infomask
& HEAP_HASNULL
)
461 bits_len
= BITMAPLEN(t_infomask2
& HEAP_NATTS_MASK
) * BITS_PER_BYTE
;
464 (errcode(ERRCODE_DATA_CORRUPTED
),
465 errmsg("t_bits string must not be NULL")));
467 bits_str_len
= strlen(t_bits_str
);
468 if (bits_len
!= bits_str_len
)
470 (errcode(ERRCODE_DATA_CORRUPTED
),
471 errmsg("unexpected length of t_bits string: %zu, expected %zu",
472 bits_str_len
, bits_len
)));
474 /* do the conversion */
475 t_bits
= text_to_bits(t_bits_str
, bits_str_len
);
481 (errcode(ERRCODE_DATA_CORRUPTED
),
482 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
483 strlen(t_bits_str
))));
486 /* Split tuple data */
487 res
= tuple_data_split_internal(relid
, (char *) raw_data
+ VARHDRSZ
,
488 VARSIZE(raw_data
) - VARHDRSZ
,
489 t_infomask
, t_infomask2
, t_bits
,
495 PG_RETURN_ARRAYTYPE_P(res
);
499 * heap_tuple_infomask_flags
501 * Decode into a human-readable format t_infomask and t_infomask2 associated
502 * to a tuple. All the flags are described in access/htup_details.h.
504 PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags
);
507 heap_tuple_infomask_flags(PG_FUNCTION_ARGS
)
509 #define HEAP_TUPLE_INFOMASK_COLS 2
510 Datum values
[HEAP_TUPLE_INFOMASK_COLS
];
511 bool nulls
[HEAP_TUPLE_INFOMASK_COLS
];
512 uint16 t_infomask
= PG_GETARG_INT16(0);
513 uint16 t_infomask2
= PG_GETARG_INT16(1);
523 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE
),
524 errmsg("must be superuser to use raw page functions")));
526 /* Build a tuple descriptor for our result type */
527 if (get_call_result_type(fcinfo
, NULL
, &tupdesc
) != TYPEFUNC_COMPOSITE
)
528 elog(ERROR
, "return type must be a row type");
530 bitcnt
= pg_popcount((const char *) &t_infomask
, sizeof(uint16
)) +
531 pg_popcount((const char *) &t_infomask2
, sizeof(uint16
));
533 /* Initialize values and NULL flags arrays */
534 MemSet(values
, 0, sizeof(values
));
535 MemSet(nulls
, 0, sizeof(nulls
));
537 /* If no flags, return a set of empty arrays */
540 values
[0] = PointerGetDatum(construct_empty_array(TEXTOID
));
541 values
[1] = PointerGetDatum(construct_empty_array(TEXTOID
));
542 tuple
= heap_form_tuple(tupdesc
, values
, nulls
);
543 PG_RETURN_DATUM(HeapTupleGetDatum(tuple
));
546 /* build set of raw flags */
547 flags
= (Datum
*) palloc0(sizeof(Datum
) * bitcnt
);
549 /* decode t_infomask */
550 if ((t_infomask
& HEAP_HASNULL
) != 0)
551 flags
[cnt
++] = CStringGetTextDatum("HEAP_HASNULL");
552 if ((t_infomask
& HEAP_HASVARWIDTH
) != 0)
553 flags
[cnt
++] = CStringGetTextDatum("HEAP_HASVARWIDTH");
554 if ((t_infomask
& HEAP_HASEXTERNAL
) != 0)
555 flags
[cnt
++] = CStringGetTextDatum("HEAP_HASEXTERNAL");
556 if ((t_infomask
& HEAP_HASOID_OLD
) != 0)
557 flags
[cnt
++] = CStringGetTextDatum("HEAP_HASOID_OLD");
558 if ((t_infomask
& HEAP_XMAX_KEYSHR_LOCK
) != 0)
559 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK");
560 if ((t_infomask
& HEAP_COMBOCID
) != 0)
561 flags
[cnt
++] = CStringGetTextDatum("HEAP_COMBOCID");
562 if ((t_infomask
& HEAP_XMAX_EXCL_LOCK
) != 0)
563 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK");
564 if ((t_infomask
& HEAP_XMAX_LOCK_ONLY
) != 0)
565 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY");
566 if ((t_infomask
& HEAP_XMIN_COMMITTED
) != 0)
567 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED");
568 if ((t_infomask
& HEAP_XMIN_INVALID
) != 0)
569 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMIN_INVALID");
570 if ((t_infomask
& HEAP_XMAX_COMMITTED
) != 0)
571 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED");
572 if ((t_infomask
& HEAP_XMAX_INVALID
) != 0)
573 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_INVALID");
574 if ((t_infomask
& HEAP_XMAX_IS_MULTI
) != 0)
575 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI");
576 if ((t_infomask
& HEAP_UPDATED
) != 0)
577 flags
[cnt
++] = CStringGetTextDatum("HEAP_UPDATED");
578 if ((t_infomask
& HEAP_MOVED_OFF
) != 0)
579 flags
[cnt
++] = CStringGetTextDatum("HEAP_MOVED_OFF");
580 if ((t_infomask
& HEAP_MOVED_IN
) != 0)
581 flags
[cnt
++] = CStringGetTextDatum("HEAP_MOVED_IN");
583 /* decode t_infomask2 */
584 if ((t_infomask2
& HEAP_KEYS_UPDATED
) != 0)
585 flags
[cnt
++] = CStringGetTextDatum("HEAP_KEYS_UPDATED");
586 if ((t_infomask2
& HEAP_HOT_UPDATED
) != 0)
587 flags
[cnt
++] = CStringGetTextDatum("HEAP_HOT_UPDATED");
588 if ((t_infomask2
& HEAP_ONLY_TUPLE
) != 0)
589 flags
[cnt
++] = CStringGetTextDatum("HEAP_ONLY_TUPLE");
592 Assert(cnt
<= bitcnt
);
593 a
= construct_array(flags
, cnt
, TEXTOID
, -1, false, TYPALIGN_INT
);
594 values
[0] = PointerGetDatum(a
);
597 * Build set of combined flags. Use the same array as previously, this
598 * keeps the code simple.
601 MemSet(flags
, 0, sizeof(Datum
) * bitcnt
);
603 /* decode combined masks of t_infomask */
604 if ((t_infomask
& HEAP_XMAX_SHR_LOCK
) == HEAP_XMAX_SHR_LOCK
)
605 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK");
606 if ((t_infomask
& HEAP_XMIN_FROZEN
) == HEAP_XMIN_FROZEN
)
607 flags
[cnt
++] = CStringGetTextDatum("HEAP_XMIN_FROZEN");
608 if ((t_infomask
& HEAP_MOVED
) == HEAP_MOVED
)
609 flags
[cnt
++] = CStringGetTextDatum("HEAP_MOVED");
611 /* Build an empty array if there are no combined flags */
613 a
= construct_empty_array(TEXTOID
);
615 a
= construct_array(flags
, cnt
, TEXTOID
, -1, false, TYPALIGN_INT
);
617 values
[1] = PointerGetDatum(a
);
619 /* Returns the record as Datum */
620 tuple
= heap_form_tuple(tupdesc
, values
, nulls
);
621 PG_RETURN_DATUM(HeapTupleGetDatum(tuple
));