Update copyright for 2022
[pgsql.git] / contrib / pageinspect / heapfuncs.c
blob3dd1a9bc2ab164c294c3e3e50290bf77400a1c7c
1 /*-------------------------------------------------------------------------
3 * heapfuncs.c
4 * Functions to investigate heap pages
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
16 * anyway.
18 * Copyright (c) 2007-2022, PostgreSQL Global Development Group
20 * IDENTIFICATION
21 * contrib/pageinspect/heapfuncs.c
23 *-------------------------------------------------------------------------
26 #include "postgres.h"
28 #include "access/htup_details.h"
29 #include "access/relation.h"
30 #include "catalog/pg_am_d.h"
31 #include "catalog/pg_type.h"
32 #include "funcapi.h"
33 #include "mb/pg_wchar.h"
34 #include "miscadmin.h"
35 #include "pageinspect.h"
36 #include "port/pg_bitutils.h"
37 #include "utils/array.h"
38 #include "utils/builtins.h"
39 #include "utils/rel.h"
42 * It's not supported to create tuples with oids anymore, but when pg_upgrade
43 * was used to upgrade from an older version, tuples might still have an
44 * oid. Seems worthwhile to display that.
46 #define HeapTupleHeaderGetOidOld(tup) \
47 ( \
48 ((tup)->t_infomask & HEAP_HASOID_OLD) ? \
49 *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
50 : \
51 InvalidOid \
56 * bits_to_text
58 * Converts a bits8-array of 'len' bits to a human-readable
59 * c-string representation.
61 static char *
62 bits_to_text(bits8 *bits, int len)
64 int i;
65 char *str;
67 str = palloc(len + 1);
69 for (i = 0; i < len; i++)
70 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
72 str[i] = '\0';
74 return str;
79 * text_to_bits
81 * Converts a c-string representation of bits into a bits8-array. This is
82 * the reverse operation of previous routine.
84 static bits8 *
85 text_to_bits(char *str, int len)
87 bits8 *bits;
88 int off = 0;
89 char byte = 0;
91 bits = palloc(len + 1);
93 while (off < len)
95 if (off % 8 == 0)
96 byte = 0;
98 if ((str[off] == '0') || (str[off] == '1'))
99 byte = byte | ((str[off] - '0') << off % 8);
100 else
101 ereport(ERROR,
102 (errcode(ERRCODE_DATA_CORRUPTED),
103 errmsg("invalid character \"%.*s\" in t_bits string",
104 pg_mblen(str + off), str + off)));
106 if (off % 8 == 7)
107 bits[off / 8] = byte;
109 off++;
112 return bits;
116 * heap_page_items
118 * Allows inspection of line pointers and tuple headers of a heap page.
120 PG_FUNCTION_INFO_V1(heap_page_items);
122 typedef struct heap_page_items_state
124 TupleDesc tupd;
125 Page page;
126 uint16 offset;
127 } heap_page_items_state;
129 Datum
130 heap_page_items(PG_FUNCTION_ARGS)
132 bytea *raw_page = PG_GETARG_BYTEA_P(0);
133 heap_page_items_state *inter_call_data = NULL;
134 FuncCallContext *fctx;
135 int raw_page_size;
137 if (!superuser())
138 ereport(ERROR,
139 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
140 errmsg("must be superuser to use raw page functions")));
142 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
144 if (SRF_IS_FIRSTCALL())
146 TupleDesc tupdesc;
147 MemoryContext mctx;
149 if (raw_page_size < SizeOfPageHeaderData)
150 ereport(ERROR,
151 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
152 errmsg("input page too small (%d bytes)", raw_page_size)));
154 fctx = SRF_FIRSTCALL_INIT();
155 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
157 inter_call_data = palloc(sizeof(heap_page_items_state));
159 /* Build a tuple descriptor for our result type */
160 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
161 elog(ERROR, "return type must be a row type");
163 inter_call_data->tupd = tupdesc;
165 inter_call_data->offset = FirstOffsetNumber;
166 inter_call_data->page = VARDATA(raw_page);
168 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
169 fctx->user_fctx = inter_call_data;
171 MemoryContextSwitchTo(mctx);
174 fctx = SRF_PERCALL_SETUP();
175 inter_call_data = fctx->user_fctx;
177 if (fctx->call_cntr < fctx->max_calls)
179 Page page = inter_call_data->page;
180 HeapTuple resultTuple;
181 Datum result;
182 ItemId id;
183 Datum values[14];
184 bool nulls[14];
185 uint16 lp_offset;
186 uint16 lp_flags;
187 uint16 lp_len;
189 memset(nulls, 0, sizeof(nulls));
191 /* Extract information from the line pointer */
193 id = PageGetItemId(page, inter_call_data->offset);
195 lp_offset = ItemIdGetOffset(id);
196 lp_flags = ItemIdGetFlags(id);
197 lp_len = ItemIdGetLength(id);
199 values[0] = UInt16GetDatum(inter_call_data->offset);
200 values[1] = UInt16GetDatum(lp_offset);
201 values[2] = UInt16GetDatum(lp_flags);
202 values[3] = UInt16GetDatum(lp_len);
205 * We do just enough validity checking to make sure we don't reference
206 * data outside the page passed to us. The page could be corrupt in
207 * many other ways, but at least we won't crash.
209 if (ItemIdHasStorage(id) &&
210 lp_len >= MinHeapTupleSize &&
211 lp_offset == MAXALIGN(lp_offset) &&
212 lp_offset + lp_len <= raw_page_size)
214 HeapTupleHeader tuphdr;
215 bytea *tuple_data_bytea;
216 int tuple_data_len;
218 /* Extract information from the tuple header */
220 tuphdr = (HeapTupleHeader) PageGetItem(page, id);
222 values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
223 values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
224 /* shared with xvac */
225 values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
226 values[7] = PointerGetDatum(&tuphdr->t_ctid);
227 values[8] = UInt32GetDatum(tuphdr->t_infomask2);
228 values[9] = UInt32GetDatum(tuphdr->t_infomask);
229 values[10] = UInt8GetDatum(tuphdr->t_hoff);
231 /* Copy raw tuple data into bytea attribute */
232 tuple_data_len = lp_len - tuphdr->t_hoff;
233 tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
234 SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
235 memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
236 tuple_data_len);
237 values[13] = PointerGetDatum(tuple_data_bytea);
240 * We already checked that the item is completely within the raw
241 * page passed to us, with the length given in the line pointer.
242 * Let's check that t_hoff doesn't point over lp_len, before using
243 * it to access t_bits and oid.
245 if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
246 tuphdr->t_hoff <= lp_len &&
247 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
249 if (tuphdr->t_infomask & HEAP_HASNULL)
251 int bits_len;
253 bits_len =
254 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
255 values[11] = CStringGetTextDatum(bits_to_text(tuphdr->t_bits, bits_len));
257 else
258 nulls[11] = true;
260 if (tuphdr->t_infomask & HEAP_HASOID_OLD)
261 values[12] = HeapTupleHeaderGetOidOld(tuphdr);
262 else
263 nulls[12] = true;
265 else
267 nulls[11] = true;
268 nulls[12] = true;
271 else
274 * The line pointer is not used, or it's invalid. Set the rest of
275 * the fields to NULL
277 int i;
279 for (i = 4; i <= 13; i++)
280 nulls[i] = true;
283 /* Build and return the result tuple. */
284 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
285 result = HeapTupleGetDatum(resultTuple);
287 inter_call_data->offset++;
289 SRF_RETURN_NEXT(fctx, result);
291 else
292 SRF_RETURN_DONE(fctx);
296 * tuple_data_split_internal
298 * Split raw tuple data taken directly from a page into an array of bytea
299 * elements. This routine does a lookup on NULL values and creates array
300 * elements accordingly. This is a reimplementation of nocachegetattr()
301 * in heaptuple.c simplified for educational purposes.
303 static Datum
304 tuple_data_split_internal(Oid relid, char *tupdata,
305 uint16 tupdata_len, uint16 t_infomask,
306 uint16 t_infomask2, bits8 *t_bits,
307 bool do_detoast)
309 ArrayBuildState *raw_attrs;
310 int nattrs;
311 int i;
312 int off = 0;
313 Relation rel;
314 TupleDesc tupdesc;
316 /* Get tuple descriptor from relation OID */
317 rel = relation_open(relid, AccessShareLock);
318 tupdesc = RelationGetDescr(rel);
320 raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
321 nattrs = tupdesc->natts;
323 if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
324 ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
325 errmsg("only heap AM is supported")));
327 if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
328 ereport(ERROR,
329 (errcode(ERRCODE_DATA_CORRUPTED),
330 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
332 for (i = 0; i < nattrs; i++)
334 Form_pg_attribute attr;
335 bool is_null;
336 bytea *attr_data = NULL;
338 attr = TupleDescAttr(tupdesc, i);
341 * Tuple header can specify fewer attributes than tuple descriptor as
342 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
343 * change tuples in pages, so attributes with numbers greater than
344 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
346 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
347 is_null = true;
348 else
349 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
351 if (!is_null)
353 int len;
355 if (attr->attlen == -1)
357 off = att_align_pointer(off, attr->attalign, -1,
358 tupdata + off);
361 * As VARSIZE_ANY throws an exception if it can't properly
362 * detect the type of external storage in macros VARTAG_SIZE,
363 * this check is repeated to have a nicer error handling.
365 if (VARATT_IS_EXTERNAL(tupdata + off) &&
366 !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
367 !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
368 ereport(ERROR,
369 (errcode(ERRCODE_DATA_CORRUPTED),
370 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
372 len = VARSIZE_ANY(tupdata + off);
374 else
376 off = att_align_nominal(off, attr->attalign);
377 len = attr->attlen;
380 if (tupdata_len < off + len)
381 ereport(ERROR,
382 (errcode(ERRCODE_DATA_CORRUPTED),
383 errmsg("unexpected end of tuple data")));
385 if (attr->attlen == -1 && do_detoast)
386 attr_data = DatumGetByteaPCopy(tupdata + off);
387 else
389 attr_data = (bytea *) palloc(len + VARHDRSZ);
390 SET_VARSIZE(attr_data, len + VARHDRSZ);
391 memcpy(VARDATA(attr_data), tupdata + off, len);
394 off = att_addlength_pointer(off, attr->attlen,
395 tupdata + off);
398 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
399 is_null, BYTEAOID, CurrentMemoryContext);
400 if (attr_data)
401 pfree(attr_data);
404 if (tupdata_len != off)
405 ereport(ERROR,
406 (errcode(ERRCODE_DATA_CORRUPTED),
407 errmsg("end of tuple reached without looking at all its data")));
409 relation_close(rel, AccessShareLock);
411 return makeArrayResult(raw_attrs, CurrentMemoryContext);
415 * tuple_data_split
417 * Split raw tuple data taken directly from page into distinct elements
418 * taking into account null values.
420 PG_FUNCTION_INFO_V1(tuple_data_split);
422 Datum
423 tuple_data_split(PG_FUNCTION_ARGS)
425 Oid relid;
426 bytea *raw_data;
427 uint16 t_infomask;
428 uint16 t_infomask2;
429 char *t_bits_str;
430 bool do_detoast = false;
431 bits8 *t_bits = NULL;
432 Datum res;
434 relid = PG_GETARG_OID(0);
435 raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
436 t_infomask = PG_GETARG_INT16(2);
437 t_infomask2 = PG_GETARG_INT16(3);
438 t_bits_str = PG_ARGISNULL(4) ? NULL :
439 text_to_cstring(PG_GETARG_TEXT_PP(4));
441 if (PG_NARGS() >= 6)
442 do_detoast = PG_GETARG_BOOL(5);
444 if (!superuser())
445 ereport(ERROR,
446 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
447 errmsg("must be superuser to use raw page functions")));
449 if (!raw_data)
450 PG_RETURN_NULL();
453 * Convert t_bits string back to the bits8 array as represented in the
454 * tuple header.
456 if (t_infomask & HEAP_HASNULL)
458 size_t bits_str_len;
459 size_t bits_len;
461 bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
462 if (!t_bits_str)
463 ereport(ERROR,
464 (errcode(ERRCODE_DATA_CORRUPTED),
465 errmsg("t_bits string must not be NULL")));
467 bits_str_len = strlen(t_bits_str);
468 if (bits_len != bits_str_len)
469 ereport(ERROR,
470 (errcode(ERRCODE_DATA_CORRUPTED),
471 errmsg("unexpected length of t_bits string: %zu, expected %zu",
472 bits_str_len, bits_len)));
474 /* do the conversion */
475 t_bits = text_to_bits(t_bits_str, bits_str_len);
477 else
479 if (t_bits_str)
480 ereport(ERROR,
481 (errcode(ERRCODE_DATA_CORRUPTED),
482 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
483 strlen(t_bits_str))));
486 /* Split tuple data */
487 res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
488 VARSIZE(raw_data) - VARHDRSZ,
489 t_infomask, t_infomask2, t_bits,
490 do_detoast);
492 if (t_bits)
493 pfree(t_bits);
495 PG_RETURN_ARRAYTYPE_P(res);
499 * heap_tuple_infomask_flags
501 * Decode into a human-readable format t_infomask and t_infomask2 associated
502 * to a tuple. All the flags are described in access/htup_details.h.
504 PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags);
506 Datum
507 heap_tuple_infomask_flags(PG_FUNCTION_ARGS)
509 #define HEAP_TUPLE_INFOMASK_COLS 2
510 Datum values[HEAP_TUPLE_INFOMASK_COLS];
511 bool nulls[HEAP_TUPLE_INFOMASK_COLS];
512 uint16 t_infomask = PG_GETARG_INT16(0);
513 uint16 t_infomask2 = PG_GETARG_INT16(1);
514 int cnt = 0;
515 ArrayType *a;
516 int bitcnt;
517 Datum *flags;
518 TupleDesc tupdesc;
519 HeapTuple tuple;
521 if (!superuser())
522 ereport(ERROR,
523 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
524 errmsg("must be superuser to use raw page functions")));
526 /* Build a tuple descriptor for our result type */
527 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
528 elog(ERROR, "return type must be a row type");
530 bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) +
531 pg_popcount((const char *) &t_infomask2, sizeof(uint16));
533 /* Initialize values and NULL flags arrays */
534 MemSet(values, 0, sizeof(values));
535 MemSet(nulls, 0, sizeof(nulls));
537 /* If no flags, return a set of empty arrays */
538 if (bitcnt <= 0)
540 values[0] = PointerGetDatum(construct_empty_array(TEXTOID));
541 values[1] = PointerGetDatum(construct_empty_array(TEXTOID));
542 tuple = heap_form_tuple(tupdesc, values, nulls);
543 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
546 /* build set of raw flags */
547 flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);
549 /* decode t_infomask */
550 if ((t_infomask & HEAP_HASNULL) != 0)
551 flags[cnt++] = CStringGetTextDatum("HEAP_HASNULL");
552 if ((t_infomask & HEAP_HASVARWIDTH) != 0)
553 flags[cnt++] = CStringGetTextDatum("HEAP_HASVARWIDTH");
554 if ((t_infomask & HEAP_HASEXTERNAL) != 0)
555 flags[cnt++] = CStringGetTextDatum("HEAP_HASEXTERNAL");
556 if ((t_infomask & HEAP_HASOID_OLD) != 0)
557 flags[cnt++] = CStringGetTextDatum("HEAP_HASOID_OLD");
558 if ((t_infomask & HEAP_XMAX_KEYSHR_LOCK) != 0)
559 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK");
560 if ((t_infomask & HEAP_COMBOCID) != 0)
561 flags[cnt++] = CStringGetTextDatum("HEAP_COMBOCID");
562 if ((t_infomask & HEAP_XMAX_EXCL_LOCK) != 0)
563 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK");
564 if ((t_infomask & HEAP_XMAX_LOCK_ONLY) != 0)
565 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY");
566 if ((t_infomask & HEAP_XMIN_COMMITTED) != 0)
567 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED");
568 if ((t_infomask & HEAP_XMIN_INVALID) != 0)
569 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_INVALID");
570 if ((t_infomask & HEAP_XMAX_COMMITTED) != 0)
571 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED");
572 if ((t_infomask & HEAP_XMAX_INVALID) != 0)
573 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_INVALID");
574 if ((t_infomask & HEAP_XMAX_IS_MULTI) != 0)
575 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI");
576 if ((t_infomask & HEAP_UPDATED) != 0)
577 flags[cnt++] = CStringGetTextDatum("HEAP_UPDATED");
578 if ((t_infomask & HEAP_MOVED_OFF) != 0)
579 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_OFF");
580 if ((t_infomask & HEAP_MOVED_IN) != 0)
581 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_IN");
583 /* decode t_infomask2 */
584 if ((t_infomask2 & HEAP_KEYS_UPDATED) != 0)
585 flags[cnt++] = CStringGetTextDatum("HEAP_KEYS_UPDATED");
586 if ((t_infomask2 & HEAP_HOT_UPDATED) != 0)
587 flags[cnt++] = CStringGetTextDatum("HEAP_HOT_UPDATED");
588 if ((t_infomask2 & HEAP_ONLY_TUPLE) != 0)
589 flags[cnt++] = CStringGetTextDatum("HEAP_ONLY_TUPLE");
591 /* build value */
592 Assert(cnt <= bitcnt);
593 a = construct_array(flags, cnt, TEXTOID, -1, false, TYPALIGN_INT);
594 values[0] = PointerGetDatum(a);
597 * Build set of combined flags. Use the same array as previously, this
598 * keeps the code simple.
600 cnt = 0;
601 MemSet(flags, 0, sizeof(Datum) * bitcnt);
603 /* decode combined masks of t_infomask */
604 if ((t_infomask & HEAP_XMAX_SHR_LOCK) == HEAP_XMAX_SHR_LOCK)
605 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK");
606 if ((t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN)
607 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_FROZEN");
608 if ((t_infomask & HEAP_MOVED) == HEAP_MOVED)
609 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED");
611 /* Build an empty array if there are no combined flags */
612 if (cnt == 0)
613 a = construct_empty_array(TEXTOID);
614 else
615 a = construct_array(flags, cnt, TEXTOID, -1, false, TYPALIGN_INT);
616 pfree(flags);
617 values[1] = PointerGetDatum(a);
619 /* Returns the record as Datum */
620 tuple = heap_form_tuple(tupdesc, values, nulls);
621 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));