1 /*-------------------------------------------------------------------------
4 * Concurrent ("lazy") vacuuming.
7 * The major space usage for LAZY VACUUM is storage for the array of dead
8 * tuple TIDs, with the next biggest need being storage for per-disk-page
9 * free space info. We want to ensure we can vacuum even the very largest
10 * relations with finite memory space usage. To do that, we set upper bounds
11 * on the number of tuples and pages we will keep track of at once.
13 * We are willing to use at most maintenance_work_mem memory space to keep
14 * track of dead tuples. We initially allocate an array of TIDs of that size,
15 * with an upper limit that depends on table size (this limit ensures we don't
16 * allocate a huge area uselessly for vacuuming small tables). If the array
17 * threatens to overflow, we suspend the heap scan phase and perform a pass of
18 * index cleanup and page compaction, then resume the heap scan with an empty
21 * If we're processing a table with no indexes, we can just vacuum each page
22 * as we go; there's no need to save up multiple tuples to minimize the number
23 * of index scans performed. So we don't use maintenance_work_mem memory for
24 * the TID array, just enough to hold as many heap tuples as fit on one page.
27 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
34 *-------------------------------------------------------------------------
40 #include "access/genam.h"
41 #include "access/heapam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "catalog/storage.h"
45 #include "commands/dbcommands.h"
46 #include "commands/vacuum.h"
47 #include "miscadmin.h"
49 #include "postmaster/autovacuum.h"
50 #include "storage/bufmgr.h"
51 #include "storage/freespace.h"
52 #include "storage/lmgr.h"
53 #include "utils/inval.h"
54 #include "utils/lsyscache.h"
55 #include "utils/memutils.h"
56 #include "utils/pg_rusage.h"
57 #include "utils/tqual.h"
61 * Space/time tradeoff parameters: do these need to be user-tunable?
63 * To consider truncating the relation, we want there to be at least
64 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
65 * is less) potentially-freeable pages.
67 #define REL_TRUNCATE_MINIMUM 1000
68 #define REL_TRUNCATE_FRACTION 16
71 * Guesstimation of number of dead tuples per page. This is used to
72 * provide an upper limit to memory allocated when vacuuming small
75 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
77 typedef struct LVRelStats
79 /* hasindex = true means two-pass strategy; false means one-pass */
81 /* Overall statistics about rel */
82 BlockNumber rel_pages
;
84 BlockNumber pages_removed
;
85 double tuples_deleted
;
86 BlockNumber nonempty_pages
; /* actually, last nonempty page + 1 */
87 /* List of TIDs of tuples we intend to delete */
88 /* NB: this list is ordered by TID address */
89 int num_dead_tuples
; /* current # of entries */
90 int max_dead_tuples
; /* # slots allocated in array */
91 ItemPointer dead_tuples
; /* array of ItemPointerData */
93 bool scanned_all
; /* have we scanned all pages (this far)? */
97 /* A few variables that don't seem worth passing around as parameters */
98 static int elevel
= -1;
100 static TransactionId OldestXmin
;
101 static TransactionId FreezeLimit
;
103 static BufferAccessStrategy vac_strategy
;
106 /* non-export function prototypes */
107 static void lazy_scan_heap(Relation onerel
, LVRelStats
*vacrelstats
,
108 Relation
*Irel
, int nindexes
, bool scan_all
);
109 static void lazy_vacuum_heap(Relation onerel
, LVRelStats
*vacrelstats
);
110 static void lazy_vacuum_index(Relation indrel
,
111 IndexBulkDeleteResult
**stats
,
112 LVRelStats
*vacrelstats
);
113 static void lazy_cleanup_index(Relation indrel
,
114 IndexBulkDeleteResult
*stats
,
115 LVRelStats
*vacrelstats
);
116 static int lazy_vacuum_page(Relation onerel
, BlockNumber blkno
, Buffer buffer
,
117 int tupindex
, LVRelStats
*vacrelstats
);
118 static void lazy_truncate_heap(Relation onerel
, LVRelStats
*vacrelstats
);
119 static BlockNumber
count_nondeletable_pages(Relation onerel
,
120 LVRelStats
*vacrelstats
);
121 static void lazy_space_alloc(LVRelStats
*vacrelstats
, BlockNumber relblocks
);
122 static void lazy_record_dead_tuple(LVRelStats
*vacrelstats
,
123 ItemPointer itemptr
);
124 static bool lazy_tid_reaped(ItemPointer itemptr
, void *state
);
125 static int vac_cmp_itemptr(const void *left
, const void *right
);
129 * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
131 * This routine vacuums a single heap, cleans out its indexes, and
132 * updates its relpages and reltuples statistics.
134 * At entry, we have already established a transaction and opened
135 * and locked the relation.
138 lazy_vacuum_rel(Relation onerel
, VacuumStmt
*vacstmt
,
139 BufferAccessStrategy bstrategy
, bool *scanned_all
)
141 LVRelStats
*vacrelstats
;
144 BlockNumber possibly_freeable
;
146 TimestampTz starttime
= 0;
148 TransactionId freezeTableLimit
;
150 pg_rusage_init(&ru0
);
152 /* measure elapsed time iff autovacuum logging requires it */
153 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration
> 0)
154 starttime
= GetCurrentTimestamp();
156 if (vacstmt
->verbose
)
161 vac_strategy
= bstrategy
;
163 vacuum_set_xid_limits(vacstmt
->freeze_min_age
, vacstmt
->freeze_table_age
,
164 onerel
->rd_rel
->relisshared
,
165 &OldestXmin
, &FreezeLimit
, &freezeTableLimit
);
166 scan_all
= TransactionIdPrecedesOrEquals(onerel
->rd_rel
->relfrozenxid
,
169 vacrelstats
= (LVRelStats
*) palloc0(sizeof(LVRelStats
));
171 vacrelstats
->num_index_scans
= 0;
172 vacrelstats
->scanned_all
= true; /* will be cleared if we skip a page */
174 /* Open all indexes of the relation */
175 vac_open_indexes(onerel
, RowExclusiveLock
, &nindexes
, &Irel
);
176 vacrelstats
->hasindex
= (nindexes
> 0);
178 /* Do the vacuuming */
179 lazy_scan_heap(onerel
, vacrelstats
, Irel
, nindexes
, scan_all
);
181 /* Done with indexes */
182 vac_close_indexes(nindexes
, Irel
, NoLock
);
185 * Optionally truncate the relation.
187 * Don't even think about it unless we have a shot at releasing a goodly
188 * number of pages. Otherwise, the time taken isn't worth it.
190 possibly_freeable
= vacrelstats
->rel_pages
- vacrelstats
->nonempty_pages
;
191 if (possibly_freeable
> 0 &&
192 (possibly_freeable
>= REL_TRUNCATE_MINIMUM
||
193 possibly_freeable
>= vacrelstats
->rel_pages
/ REL_TRUNCATE_FRACTION
))
194 lazy_truncate_heap(onerel
, vacrelstats
);
196 /* Vacuum the Free Space Map */
197 FreeSpaceMapVacuum(onerel
);
200 * Update statistics in pg_class. But only if we didn't skip any pages;
201 * the tuple count only includes tuples from the pages we've visited, and
202 * we haven't frozen tuples in unvisited pages either. The page count is
203 * accurate in any case, but because we use the reltuples / relpages
204 * ratio in the planner, it's better to not update relpages either if we
205 * can't update reltuples.
207 if (vacrelstats
->scanned_all
)
208 vac_update_relstats(onerel
,
209 vacrelstats
->rel_pages
, vacrelstats
->rel_tuples
,
210 vacrelstats
->hasindex
,
213 /* report results to the stats collector, too */
214 pgstat_report_vacuum(RelationGetRelid(onerel
),
215 onerel
->rd_rel
->relisshared
,
216 vacrelstats
->scanned_all
,
217 vacstmt
->analyze
, vacrelstats
->rel_tuples
);
219 /* and log the action if appropriate */
220 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration
>= 0)
222 if (Log_autovacuum_min_duration
== 0 ||
223 TimestampDifferenceExceeds(starttime
, GetCurrentTimestamp(),
224 Log_autovacuum_min_duration
))
226 (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
227 "pages: %d removed, %d remain\n"
228 "tuples: %.0f removed, %.0f remain\n"
230 get_database_name(MyDatabaseId
),
231 get_namespace_name(RelationGetNamespace(onerel
)),
232 RelationGetRelationName(onerel
),
233 vacrelstats
->num_index_scans
,
234 vacrelstats
->pages_removed
, vacrelstats
->rel_pages
,
235 vacrelstats
->tuples_deleted
, vacrelstats
->rel_tuples
,
236 pg_rusage_show(&ru0
))));
240 *scanned_all
= vacrelstats
->scanned_all
;
245 * lazy_scan_heap() -- scan an open heap relation
247 * This routine sets commit status bits, builds lists of dead tuples
248 * and pages with free space, and calculates statistics on the number
249 * of live tuples in the heap. When done, or when we run low on space
250 * for dead-tuple TIDs, invoke vacuuming of indexes and heap.
252 * If there are no indexes then we just vacuum each dirty page as we
253 * process it, since there's no point in gathering many tuples.
256 lazy_scan_heap(Relation onerel
, LVRelStats
*vacrelstats
,
257 Relation
*Irel
, int nindexes
, bool scan_all
)
263 BlockNumber empty_pages
,
270 IndexBulkDeleteResult
**indstats
;
273 Buffer vmbuffer
= InvalidBuffer
;
275 pg_rusage_init(&ru0
);
277 relname
= RelationGetRelationName(onerel
);
279 (errmsg("vacuuming \"%s.%s\"",
280 get_namespace_name(RelationGetNamespace(onerel
)),
283 empty_pages
= vacuumed_pages
= scanned_pages
= 0;
284 num_tuples
= tups_vacuumed
= nkeep
= nunused
= 0;
286 indstats
= (IndexBulkDeleteResult
**)
287 palloc0(nindexes
* sizeof(IndexBulkDeleteResult
*));
289 nblocks
= RelationGetNumberOfBlocks(onerel
);
290 vacrelstats
->rel_pages
= nblocks
;
291 vacrelstats
->nonempty_pages
= 0;
293 lazy_space_alloc(vacrelstats
, nblocks
);
295 for (blkno
= 0; blkno
< nblocks
; blkno
++)
304 OffsetNumber frozen
[MaxOffsetNumber
];
307 bool all_visible_according_to_vm
= false;
311 * Skip pages that don't require vacuuming according to the
316 all_visible_according_to_vm
=
317 visibilitymap_test(onerel
, blkno
, &vmbuffer
);
318 if (all_visible_according_to_vm
)
320 vacrelstats
->scanned_all
= false;
325 vacuum_delay_point();
330 * If we are close to overrunning the available space for dead-tuple
331 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
333 if ((vacrelstats
->max_dead_tuples
- vacrelstats
->num_dead_tuples
) < MaxHeapTuplesPerPage
&&
334 vacrelstats
->num_dead_tuples
> 0)
336 /* Remove index entries */
337 for (i
= 0; i
< nindexes
; i
++)
338 lazy_vacuum_index(Irel
[i
],
341 /* Remove tuples from heap */
342 lazy_vacuum_heap(onerel
, vacrelstats
);
343 /* Forget the now-vacuumed tuples, and press on */
344 vacrelstats
->num_dead_tuples
= 0;
345 vacrelstats
->num_index_scans
++;
348 buf
= ReadBufferExtended(onerel
, MAIN_FORKNUM
, blkno
,
349 RBM_NORMAL
, vac_strategy
);
351 /* We need buffer cleanup lock so that we can prune HOT chains. */
352 LockBufferForCleanup(buf
);
354 page
= BufferGetPage(buf
);
359 * An all-zeroes page could be left over if a backend extends the
360 * relation but crashes before initializing the page. Reclaim such
363 * We have to be careful here because we could be looking at a
364 * page that someone has just added to the relation and not yet
365 * been able to initialize (see RelationGetBufferForTuple). To
366 * protect against that, release the buffer lock, grab the
367 * relation extension lock momentarily, and re-lock the buffer. If
368 * the page is still uninitialized by then, it must be left over
369 * from a crashed backend, and we can initialize it.
371 * We don't really need the relation lock when this is a new or
372 * temp relation, but it's probably not worth the code space to
373 * check that, since this surely isn't a critical path.
375 * Note: the comparable code in vacuum.c need not worry because
376 * it's got exclusive lock on the whole relation.
378 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
379 LockRelationForExtension(onerel
, ExclusiveLock
);
380 UnlockRelationForExtension(onerel
, ExclusiveLock
);
381 LockBufferForCleanup(buf
);
385 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
387 PageInit(page
, BufferGetPageSize(buf
), 0);
390 freespace
= PageGetHeapFreeSpace(page
);
391 MarkBufferDirty(buf
);
392 UnlockReleaseBuffer(buf
);
394 RecordPageWithFreeSpace(onerel
, blkno
, freespace
);
398 if (PageIsEmpty(page
))
401 freespace
= PageGetHeapFreeSpace(page
);
403 if (!PageIsAllVisible(page
))
405 SetBufferCommitInfoNeedsSave(buf
);
406 PageSetAllVisible(page
);
409 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
411 /* Update the visibility map */
412 if (!all_visible_according_to_vm
)
414 visibilitymap_pin(onerel
, blkno
, &vmbuffer
);
415 LockBuffer(buf
, BUFFER_LOCK_SHARE
);
416 if (PageIsAllVisible(page
))
417 visibilitymap_set(onerel
, blkno
, PageGetLSN(page
), &vmbuffer
);
418 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
422 RecordPageWithFreeSpace(onerel
, blkno
, freespace
);
427 * Prune all HOT-update chains in this page.
429 * We count tuples removed by the pruning step as removed by VACUUM.
431 tups_vacuumed
+= heap_page_prune(onerel
, buf
, OldestXmin
,
435 * Now scan the page to collect vacuumable items and check for tuples
436 * requiring freezing.
441 prev_dead_count
= vacrelstats
->num_dead_tuples
;
442 maxoff
= PageGetMaxOffsetNumber(page
);
443 for (offnum
= FirstOffsetNumber
;
445 offnum
= OffsetNumberNext(offnum
))
449 itemid
= PageGetItemId(page
, offnum
);
451 /* Unused items require no processing, but we count 'em */
452 if (!ItemIdIsUsed(itemid
))
458 /* Redirect items mustn't be touched */
459 if (ItemIdIsRedirected(itemid
))
461 hastup
= true; /* this page won't be truncatable */
465 ItemPointerSet(&(tuple
.t_self
), blkno
, offnum
);
468 * DEAD item pointers are to be vacuumed normally; but we don't
469 * count them in tups_vacuumed, else we'd be double-counting (at
470 * least in the common case where heap_page_prune() just freed up
473 if (ItemIdIsDead(itemid
))
475 lazy_record_dead_tuple(vacrelstats
, &(tuple
.t_self
));
480 Assert(ItemIdIsNormal(itemid
));
482 tuple
.t_data
= (HeapTupleHeader
) PageGetItem(page
, itemid
);
483 tuple
.t_len
= ItemIdGetLength(itemid
);
487 switch (HeapTupleSatisfiesVacuum(tuple
.t_data
, OldestXmin
, buf
))
492 * Ordinarily, DEAD tuples would have been removed by
493 * heap_page_prune(), but it's possible that the tuple
494 * state changed since heap_page_prune() looked. In
495 * particular an INSERT_IN_PROGRESS tuple could have
496 * changed to DEAD if the inserter aborted. So this
497 * cannot be considered an error condition.
499 * If the tuple is HOT-updated then it must only be
500 * removed by a prune operation; so we keep it just as if
501 * it were RECENTLY_DEAD. Also, if it's a heap-only
502 * tuple, we choose to keep it, because it'll be a lot
503 * cheaper to get rid of it in the next pruning pass than
504 * to treat it like an indexed tuple.
506 if (HeapTupleIsHotUpdated(&tuple
) ||
507 HeapTupleIsHeapOnly(&tuple
))
510 tupgone
= true; /* we can delete the tuple */
514 /* Tuple is good --- but let's do some validity checks */
515 if (onerel
->rd_rel
->relhasoids
&&
516 !OidIsValid(HeapTupleGetOid(&tuple
)))
517 elog(WARNING
, "relation \"%s\" TID %u/%u: OID is invalid",
518 relname
, blkno
, offnum
);
521 * Is the tuple definitely visible to all transactions?
523 * NB: Like with per-tuple hint bits, we can't set the
524 * PD_ALL_VISIBLE flag if the inserter committed
525 * asynchronously. See SetHintBits for more info. Check
526 * that the HEAP_XMIN_COMMITTED hint bit is set because of
533 if (!(tuple
.t_data
->t_infomask
& HEAP_XMIN_COMMITTED
))
539 * The inserter definitely committed. But is it
540 * old enough that everyone sees it as committed?
542 xmin
= HeapTupleHeaderGetXmin(tuple
.t_data
);
543 if (!TransactionIdPrecedes(xmin
, OldestXmin
))
550 case HEAPTUPLE_RECENTLY_DEAD
:
553 * If tuple is recently deleted then we must not remove it
559 case HEAPTUPLE_INSERT_IN_PROGRESS
:
560 /* This is an expected case during concurrent vacuum */
563 case HEAPTUPLE_DELETE_IN_PROGRESS
:
564 /* This is an expected case during concurrent vacuum */
568 elog(ERROR
, "unexpected HeapTupleSatisfiesVacuum result");
574 lazy_record_dead_tuple(vacrelstats
, &(tuple
.t_self
));
583 * Each non-removable tuple must be checked to see if it needs
584 * freezing. Note we already have exclusive buffer lock.
586 if (heap_freeze_tuple(tuple
.t_data
, FreezeLimit
,
588 frozen
[nfrozen
++] = offnum
;
590 } /* scan along page */
593 * If we froze any tuples, mark the buffer dirty, and write a WAL
594 * record recording the changes. We must log the changes to be
595 * crash-safe against future truncation of CLOG.
599 MarkBufferDirty(buf
);
600 /* no XLOG for temp tables, though */
601 if (!onerel
->rd_istemp
)
605 recptr
= log_heap_freeze(onerel
, buf
, FreezeLimit
,
607 PageSetLSN(page
, recptr
);
608 PageSetTLI(page
, ThisTimeLineID
);
613 * If there are no indexes then we can vacuum the page right now
614 * instead of doing a second scan.
617 vacrelstats
->num_dead_tuples
> 0)
619 /* Remove tuples from heap */
620 lazy_vacuum_page(onerel
, blkno
, buf
, 0, vacrelstats
);
621 /* Forget the now-vacuumed tuples, and press on */
622 vacrelstats
->num_dead_tuples
= 0;
626 freespace
= PageGetHeapFreeSpace(page
);
628 /* Update the all-visible flag on the page */
629 if (!PageIsAllVisible(page
) && all_visible
)
631 SetBufferCommitInfoNeedsSave(buf
);
632 PageSetAllVisible(page
);
634 else if (PageIsAllVisible(page
) && !all_visible
)
636 elog(WARNING
, "PD_ALL_VISIBLE flag was incorrectly set");
637 SetBufferCommitInfoNeedsSave(buf
);
638 PageClearAllVisible(page
);
641 * Normally, we would drop the lock on the heap page before
642 * updating the visibility map, but since this is a can't-happen
643 * case anyway, don't bother.
645 visibilitymap_clear(onerel
, blkno
);
648 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
650 /* Update the visibility map */
651 if (!all_visible_according_to_vm
&& all_visible
)
653 visibilitymap_pin(onerel
, blkno
, &vmbuffer
);
654 LockBuffer(buf
, BUFFER_LOCK_SHARE
);
655 if (PageIsAllVisible(page
))
656 visibilitymap_set(onerel
, blkno
, PageGetLSN(page
), &vmbuffer
);
657 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
662 /* Remember the location of the last page with nonremovable tuples */
664 vacrelstats
->nonempty_pages
= blkno
+ 1;
667 * If we remembered any tuples for deletion, then the page will be
668 * visited again by lazy_vacuum_heap, which will compute and record
669 * its post-compaction free space. If not, then we're done with this
670 * page, so remember its free space as-is. (This path will always be
671 * taken if there are no indexes.)
673 if (vacrelstats
->num_dead_tuples
== prev_dead_count
)
674 RecordPageWithFreeSpace(onerel
, blkno
, freespace
);
677 /* save stats for use later */
678 vacrelstats
->rel_tuples
= num_tuples
;
679 vacrelstats
->tuples_deleted
= tups_vacuumed
;
681 /* If any tuples need to be deleted, perform final vacuum cycle */
682 /* XXX put a threshold on min number of tuples here? */
683 if (vacrelstats
->num_dead_tuples
> 0)
685 /* Remove index entries */
686 for (i
= 0; i
< nindexes
; i
++)
687 lazy_vacuum_index(Irel
[i
],
690 /* Remove tuples from heap */
691 lazy_vacuum_heap(onerel
, vacrelstats
);
692 vacrelstats
->num_index_scans
++;
695 /* Release the pin on the visibility map page */
696 if (BufferIsValid(vmbuffer
))
698 ReleaseBuffer(vmbuffer
);
699 vmbuffer
= InvalidBuffer
;
702 /* Do post-vacuum cleanup and statistics update for each index */
703 for (i
= 0; i
< nindexes
; i
++)
704 lazy_cleanup_index(Irel
[i
], indstats
[i
], vacrelstats
);
706 /* If no indexes, make log report that lazy_vacuum_heap would've made */
709 (errmsg("\"%s\": removed %.0f row versions in %u pages",
710 RelationGetRelationName(onerel
),
711 tups_vacuumed
, vacuumed_pages
)));
714 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
715 RelationGetRelationName(onerel
),
716 tups_vacuumed
, num_tuples
, scanned_pages
, nblocks
),
717 errdetail("%.0f dead row versions cannot be removed yet.\n"
718 "There were %.0f unused item pointers.\n"
719 "%u pages are entirely empty.\n"
724 pg_rusage_show(&ru0
))));
729 * lazy_vacuum_heap() -- second pass over the heap
731 * This routine marks dead tuples as unused and compacts out free
732 * space on their pages. Pages not having dead tuples recorded from
733 * lazy_scan_heap are not visited at all.
735 * Note: the reason for doing this as a second pass is we cannot remove
736 * the tuples until we've removed their index entries, and we want to
737 * process index entry removal in batches as large as possible.
740 lazy_vacuum_heap(Relation onerel
, LVRelStats
*vacrelstats
)
746 pg_rusage_init(&ru0
);
750 while (tupindex
< vacrelstats
->num_dead_tuples
)
757 vacuum_delay_point();
759 tblk
= ItemPointerGetBlockNumber(&vacrelstats
->dead_tuples
[tupindex
]);
760 buf
= ReadBufferExtended(onerel
, MAIN_FORKNUM
, tblk
, RBM_NORMAL
,
762 LockBufferForCleanup(buf
);
763 tupindex
= lazy_vacuum_page(onerel
, tblk
, buf
, tupindex
, vacrelstats
);
765 /* Now that we've compacted the page, record its available space */
766 page
= BufferGetPage(buf
);
767 freespace
= PageGetHeapFreeSpace(page
);
769 UnlockReleaseBuffer(buf
);
770 RecordPageWithFreeSpace(onerel
, tblk
, freespace
);
775 (errmsg("\"%s\": removed %d row versions in %d pages",
776 RelationGetRelationName(onerel
),
779 pg_rusage_show(&ru0
))));
783 * lazy_vacuum_page() -- free dead tuples on a page
784 * and repair its fragmentation.
786 * Caller must hold pin and buffer cleanup lock on the buffer.
788 * tupindex is the index in vacrelstats->dead_tuples of the first dead
789 * tuple for this page. We assume the rest follow sequentially.
790 * The return value is the first tupindex after the tuples of this page.
793 lazy_vacuum_page(Relation onerel
, BlockNumber blkno
, Buffer buffer
,
794 int tupindex
, LVRelStats
*vacrelstats
)
796 Page page
= BufferGetPage(buffer
);
797 OffsetNumber unused
[MaxOffsetNumber
];
800 START_CRIT_SECTION();
802 for (; tupindex
< vacrelstats
->num_dead_tuples
; tupindex
++)
808 tblk
= ItemPointerGetBlockNumber(&vacrelstats
->dead_tuples
[tupindex
]);
810 break; /* past end of tuples for this block */
811 toff
= ItemPointerGetOffsetNumber(&vacrelstats
->dead_tuples
[tupindex
]);
812 itemid
= PageGetItemId(page
, toff
);
813 ItemIdSetUnused(itemid
);
814 unused
[uncnt
++] = toff
;
817 PageRepairFragmentation(page
);
819 MarkBufferDirty(buffer
);
822 if (!onerel
->rd_istemp
)
826 recptr
= log_heap_clean(onerel
, buffer
,
830 PageSetLSN(page
, recptr
);
831 PageSetTLI(page
, ThisTimeLineID
);
840 * lazy_vacuum_index() -- vacuum one index relation.
842 * Delete all the index entries pointing to tuples listed in
843 * vacrelstats->dead_tuples, and update running statistics.
846 lazy_vacuum_index(Relation indrel
,
847 IndexBulkDeleteResult
**stats
,
848 LVRelStats
*vacrelstats
)
850 IndexVacuumInfo ivinfo
;
853 pg_rusage_init(&ru0
);
855 ivinfo
.index
= indrel
;
856 ivinfo
.vacuum_full
= false;
857 ivinfo
.message_level
= elevel
;
858 /* We don't yet know rel_tuples, so pass -1 */
859 ivinfo
.num_heap_tuples
= -1;
860 ivinfo
.strategy
= vac_strategy
;
862 /* Do bulk deletion */
863 *stats
= index_bulk_delete(&ivinfo
, *stats
,
864 lazy_tid_reaped
, (void *) vacrelstats
);
867 (errmsg("scanned index \"%s\" to remove %d row versions",
868 RelationGetRelationName(indrel
),
869 vacrelstats
->num_dead_tuples
),
870 errdetail("%s.", pg_rusage_show(&ru0
))));
874 * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
877 lazy_cleanup_index(Relation indrel
,
878 IndexBulkDeleteResult
*stats
,
879 LVRelStats
*vacrelstats
)
881 IndexVacuumInfo ivinfo
;
884 pg_rusage_init(&ru0
);
886 ivinfo
.index
= indrel
;
887 ivinfo
.vacuum_full
= false;
888 ivinfo
.message_level
= elevel
;
889 ivinfo
.num_heap_tuples
= vacrelstats
->rel_tuples
;
890 ivinfo
.strategy
= vac_strategy
;
892 stats
= index_vacuum_cleanup(&ivinfo
, stats
);
897 /* now update statistics in pg_class */
898 vac_update_relstats(indrel
,
899 stats
->num_pages
, stats
->num_index_tuples
,
900 false, InvalidTransactionId
);
903 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
904 RelationGetRelationName(indrel
),
905 stats
->num_index_tuples
,
907 errdetail("%.0f index row versions were removed.\n"
908 "%u index pages have been deleted, %u are currently reusable.\n"
910 stats
->tuples_removed
,
911 stats
->pages_deleted
, stats
->pages_free
,
912 pg_rusage_show(&ru0
))));
918 * lazy_truncate_heap - try to truncate off any empty pages at the end
921 lazy_truncate_heap(Relation onerel
, LVRelStats
*vacrelstats
)
923 BlockNumber old_rel_pages
= vacrelstats
->rel_pages
;
924 BlockNumber new_rel_pages
;
927 pg_rusage_init(&ru0
);
930 * We need full exclusive lock on the relation in order to do truncation.
931 * If we can't get it, give up rather than waiting --- we don't want to
932 * block other backends, and we don't want to deadlock (which is quite
933 * possible considering we already hold a lower-grade lock).
935 if (!ConditionalLockRelation(onerel
, AccessExclusiveLock
))
939 * Now that we have exclusive lock, look to see if the rel has grown
940 * whilst we were vacuuming with non-exclusive lock. If so, give up; the
941 * newly added pages presumably contain non-deletable tuples.
943 new_rel_pages
= RelationGetNumberOfBlocks(onerel
);
944 if (new_rel_pages
!= old_rel_pages
)
946 /* might as well use the latest news when we update pg_class stats */
947 vacrelstats
->rel_pages
= new_rel_pages
;
948 UnlockRelation(onerel
, AccessExclusiveLock
);
953 * Scan backwards from the end to verify that the end pages actually
954 * contain no tuples. This is *necessary*, not optional, because other
955 * backends could have added tuples to these pages whilst we were
958 new_rel_pages
= count_nondeletable_pages(onerel
, vacrelstats
);
960 if (new_rel_pages
>= old_rel_pages
)
962 /* can't do anything after all */
963 UnlockRelation(onerel
, AccessExclusiveLock
);
970 RelationTruncate(onerel
, new_rel_pages
);
972 /* force relcache inval so all backends reset their rd_targblock */
973 CacheInvalidateRelcache(onerel
);
976 * Note: once we have truncated, we *must* keep the exclusive lock until
977 * commit. The sinval message won't be sent until commit, and other
978 * backends must see it and reset their rd_targblock values before they
979 * can safely access the table again.
982 /* update statistics */
983 vacrelstats
->rel_pages
= new_rel_pages
;
984 vacrelstats
->pages_removed
= old_rel_pages
- new_rel_pages
;
987 (errmsg("\"%s\": truncated %u to %u pages",
988 RelationGetRelationName(onerel
),
989 old_rel_pages
, new_rel_pages
),
991 pg_rusage_show(&ru0
))));
995 * Rescan end pages to verify that they are (still) empty of tuples.
997 * Returns number of nondeletable pages (last nonempty page + 1).
1000 count_nondeletable_pages(Relation onerel
, LVRelStats
*vacrelstats
)
1004 /* Strange coding of loop control is needed because blkno is unsigned */
1005 blkno
= vacrelstats
->rel_pages
;
1006 while (blkno
> vacrelstats
->nonempty_pages
)
1010 OffsetNumber offnum
,
1015 * We don't insert a vacuum delay point here, because we have an
1016 * exclusive lock on the table which we want to hold for as short a
1017 * time as possible. We still need to check for interrupts however.
1019 CHECK_FOR_INTERRUPTS();
1023 buf
= ReadBufferExtended(onerel
, MAIN_FORKNUM
, blkno
,
1024 RBM_NORMAL
, vac_strategy
);
1026 /* In this phase we only need shared access to the buffer */
1027 LockBuffer(buf
, BUFFER_LOCK_SHARE
);
1029 page
= BufferGetPage(buf
);
1031 if (PageIsNew(page
) || PageIsEmpty(page
))
1033 /* PageIsNew probably shouldn't happen... */
1034 UnlockReleaseBuffer(buf
);
1039 maxoff
= PageGetMaxOffsetNumber(page
);
1040 for (offnum
= FirstOffsetNumber
;
1042 offnum
= OffsetNumberNext(offnum
))
1046 itemid
= PageGetItemId(page
, offnum
);
1049 * Note: any non-unused item should be taken as a reason to keep
1050 * this page. We formerly thought that DEAD tuples could be
1051 * thrown away, but that's not so, because we'd not have cleaned
1052 * out their index entries.
1054 if (ItemIdIsUsed(itemid
))
1057 break; /* can stop scanning */
1059 } /* scan along page */
1061 UnlockReleaseBuffer(buf
);
1063 /* Done scanning if we found a tuple here */
1069 * If we fall out of the loop, all the previously-thought-to-be-empty
1070 * pages still are; we need not bother to look at the last known-nonempty
1073 return vacrelstats
->nonempty_pages
;
1077 * lazy_space_alloc - space allocation decisions for lazy vacuum
1079 * See the comments at the head of this file for rationale.
1082 lazy_space_alloc(LVRelStats
*vacrelstats
, BlockNumber relblocks
)
1086 if (vacrelstats
->hasindex
)
1088 maxtuples
= (maintenance_work_mem
* 1024L) / sizeof(ItemPointerData
);
1089 maxtuples
= Min(maxtuples
, INT_MAX
);
1090 maxtuples
= Min(maxtuples
, MaxAllocSize
/ sizeof(ItemPointerData
));
1092 /* curious coding here to ensure the multiplication can't overflow */
1093 if ((BlockNumber
) (maxtuples
/ LAZY_ALLOC_TUPLES
) > relblocks
)
1094 maxtuples
= relblocks
* LAZY_ALLOC_TUPLES
;
1096 /* stay sane if small maintenance_work_mem */
1097 maxtuples
= Max(maxtuples
, MaxHeapTuplesPerPage
);
1101 maxtuples
= MaxHeapTuplesPerPage
;
1104 vacrelstats
->num_dead_tuples
= 0;
1105 vacrelstats
->max_dead_tuples
= (int) maxtuples
;
1106 vacrelstats
->dead_tuples
= (ItemPointer
)
1107 palloc(maxtuples
* sizeof(ItemPointerData
));
1111 * lazy_record_dead_tuple - remember one deletable tuple
1114 lazy_record_dead_tuple(LVRelStats
*vacrelstats
,
1115 ItemPointer itemptr
)
1118 * The array shouldn't overflow under normal behavior, but perhaps it
1119 * could if we are given a really small maintenance_work_mem. In that
1120 * case, just forget the last few tuples (we'll get 'em next time).
1122 if (vacrelstats
->num_dead_tuples
< vacrelstats
->max_dead_tuples
)
1124 vacrelstats
->dead_tuples
[vacrelstats
->num_dead_tuples
] = *itemptr
;
1125 vacrelstats
->num_dead_tuples
++;
1130 * lazy_tid_reaped() -- is a particular tid deletable?
1132 * This has the right signature to be an IndexBulkDeleteCallback.
1134 * Assumes dead_tuples array is in sorted order.
1137 lazy_tid_reaped(ItemPointer itemptr
, void *state
)
1139 LVRelStats
*vacrelstats
= (LVRelStats
*) state
;
1142 res
= (ItemPointer
) bsearch((void *) itemptr
,
1143 (void *) vacrelstats
->dead_tuples
,
1144 vacrelstats
->num_dead_tuples
,
1145 sizeof(ItemPointerData
),
1148 return (res
!= NULL
);
1152 * Comparator routines for use with qsort() and bsearch().
1155 vac_cmp_itemptr(const void *left
, const void *right
)
1162 lblk
= ItemPointerGetBlockNumber((ItemPointer
) left
);
1163 rblk
= ItemPointerGetBlockNumber((ItemPointer
) right
);
1170 loff
= ItemPointerGetOffsetNumber((ItemPointer
) left
);
1171 roff
= ItemPointerGetOffsetNumber((ItemPointer
) right
);