Properly access a buffer's LSN using existing access macros instead of abusing
[PostgreSQL.git] / src / backend / storage / smgr / smgr.c
bloba0c601dc28ed85f8fc89e3723be46ee3b3d8e9f7
1 /*-------------------------------------------------------------------------
3 * smgr.c
4 * public interface routines to storage manager switch.
6 * All file system operations in POSTGRES dispatch through these
7 * routines.
9 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
13 * IDENTIFICATION
14 * $PostgreSQL$
16 *-------------------------------------------------------------------------
18 #include "postgres.h"
20 #include "access/xact.h"
21 #include "access/xlogutils.h"
22 #include "commands/tablespace.h"
23 #include "storage/bufmgr.h"
24 #include "storage/ipc.h"
25 #include "storage/smgr.h"
26 #include "utils/hsearch.h"
27 #include "utils/memutils.h"
31 * This struct of function pointers defines the API between smgr.c and
32 * any individual storage manager module. Note that smgr subfunctions are
33 * generally expected to report problems via elog(ERROR). An exception is
34 * that smgr_unlink should use elog(WARNING), rather than erroring out,
35 * because we normally unlink relations during post-commit/abort cleanup,
36 * and so it's too late to raise an error. Also, various conditions that
37 * would normally be errors should be allowed during bootstrap and/or WAL
38 * recovery --- see comments in md.c for details.
40 typedef struct f_smgr
42 void (*smgr_init) (void); /* may be NULL */
43 void (*smgr_shutdown) (void); /* may be NULL */
44 void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
45 void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
46 bool isRedo);
47 bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
48 void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum,
49 bool isRedo);
50 void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
51 BlockNumber blocknum, char *buffer, bool isTemp);
52 void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
53 BlockNumber blocknum, char *buffer);
54 void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
55 BlockNumber blocknum, char *buffer, bool isTemp);
56 BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
57 void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
58 BlockNumber nblocks, bool isTemp);
59 void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
60 void (*smgr_commit) (void); /* may be NULL */
61 void (*smgr_abort) (void); /* may be NULL */
62 void (*smgr_pre_ckpt) (void); /* may be NULL */
63 void (*smgr_sync) (void); /* may be NULL */
64 void (*smgr_post_ckpt) (void); /* may be NULL */
65 } f_smgr;
68 static const f_smgr smgrsw[] = {
69 /* magnetic disk */
70 {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
71 mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
72 NULL, NULL, mdpreckpt, mdsync, mdpostckpt
76 static const int NSmgr = lengthof(smgrsw);
80 * Each backend has a hashtable that stores all extant SMgrRelation objects.
82 static HTAB *SMgrRelationHash = NULL;
85 * We keep a list of all relations (represented as RelFileNode values)
86 * that have been created or deleted in the current transaction. When
87 * a relation is created, we create the physical file immediately, but
88 * remember it so that we can delete the file again if the current
89 * transaction is aborted. Conversely, a deletion request is NOT
90 * executed immediately, but is just entered in the list. When and if
91 * the transaction commits, we can delete the physical file.
93 * To handle subtransactions, every entry is marked with its transaction
94 * nesting level. At subtransaction commit, we reassign the subtransaction's
95 * entries to the parent nesting level. At subtransaction abort, we can
96 * immediately execute the abort-time actions for all entries of the current
97 * nesting level.
99 * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
100 * unbetimes. It'd probably be OK to keep it in TopTransactionContext,
101 * but I'm being paranoid.
104 typedef struct PendingRelDelete
106 RelFileNode relnode; /* relation that may need to be deleted */
107 ForkNumber forknum; /* fork number that may need to be deleted */
108 int which; /* which storage manager? */
109 bool isTemp; /* is it a temporary relation? */
110 bool atCommit; /* T=delete at commit; F=delete at abort */
111 int nestLevel; /* xact nesting level of request */
112 struct PendingRelDelete *next; /* linked-list link */
113 } PendingRelDelete;
115 static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
119 * Declarations for smgr-related XLOG records
121 * Note: we log file creation and truncation here, but logging of deletion
122 * actions is handled by xact.c, because it is part of transaction commit.
125 /* XLOG gives us high 4 bits */
126 #define XLOG_SMGR_CREATE 0x10
127 #define XLOG_SMGR_TRUNCATE 0x20
129 typedef struct xl_smgr_create
131 RelFileNode rnode;
132 ForkNumber forknum;
133 } xl_smgr_create;
135 typedef struct xl_smgr_truncate
137 BlockNumber blkno;
138 RelFileNode rnode;
139 ForkNumber forknum;
140 } xl_smgr_truncate;
143 /* local function prototypes */
144 static void smgrshutdown(int code, Datum arg);
145 static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
146 int which, bool isTemp, bool isRedo);
150 * smgrinit(), smgrshutdown() -- Initialize or shut down storage
151 * managers.
153 * Note: smgrinit is called during backend startup (normal or standalone
154 * case), *not* during postmaster start. Therefore, any resources created
155 * here or destroyed in smgrshutdown are backend-local.
157 void
158 smgrinit(void)
160 int i;
162 for (i = 0; i < NSmgr; i++)
164 if (smgrsw[i].smgr_init)
165 (*(smgrsw[i].smgr_init)) ();
168 /* register the shutdown proc */
169 on_proc_exit(smgrshutdown, 0);
173 * on_proc_exit hook for smgr cleanup during backend shutdown
175 static void
176 smgrshutdown(int code, Datum arg)
178 int i;
180 for (i = 0; i < NSmgr; i++)
182 if (smgrsw[i].smgr_shutdown)
183 (*(smgrsw[i].smgr_shutdown)) ();
188 * smgropen() -- Return an SMgrRelation object, creating it if need be.
190 * This does not attempt to actually open the object.
192 SMgrRelation
193 smgropen(RelFileNode rnode)
195 SMgrRelation reln;
196 bool found;
198 if (SMgrRelationHash == NULL)
200 /* First time through: initialize the hash table */
201 HASHCTL ctl;
203 MemSet(&ctl, 0, sizeof(ctl));
204 ctl.keysize = sizeof(RelFileNode);
205 ctl.entrysize = sizeof(SMgrRelationData);
206 ctl.hash = tag_hash;
207 SMgrRelationHash = hash_create("smgr relation table", 400,
208 &ctl, HASH_ELEM | HASH_FUNCTION);
211 /* Look up or create an entry */
212 reln = (SMgrRelation) hash_search(SMgrRelationHash,
213 (void *) &rnode,
214 HASH_ENTER, &found);
216 /* Initialize it if not present before */
217 if (!found)
219 int forknum;
221 /* hash_search already filled in the lookup key */
222 reln->smgr_owner = NULL;
223 reln->smgr_which = 0; /* we only have md.c at present */
225 /* mark it not open */
226 for(forknum = 0; forknum <= MAX_FORKNUM; forknum++)
227 reln->md_fd[forknum] = NULL;
230 return reln;
234 * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
236 * There can be only one owner at a time; this is sufficient since currently
237 * the only such owners exist in the relcache.
239 void
240 smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
243 * First, unhook any old owner. (Normally there shouldn't be any, but it
244 * seems possible that this can happen during swap_relation_files()
245 * depending on the order of processing. It's ok to close the old
246 * relcache entry early in that case.)
248 if (reln->smgr_owner)
249 *(reln->smgr_owner) = NULL;
251 /* Now establish the ownership relationship. */
252 reln->smgr_owner = owner;
253 *owner = reln;
257 * smgrexists() -- Does the underlying file for a fork exist?
259 bool
260 smgrexists(SMgrRelation reln, ForkNumber forknum)
262 return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
266 * smgrclose() -- Close and delete an SMgrRelation object.
268 void
269 smgrclose(SMgrRelation reln)
271 SMgrRelation *owner;
272 ForkNumber forknum;
274 for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
275 (*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);
277 owner = reln->smgr_owner;
279 if (hash_search(SMgrRelationHash,
280 (void *) &(reln->smgr_rnode),
281 HASH_REMOVE, NULL) == NULL)
282 elog(ERROR, "SMgrRelation hashtable corrupted");
285 * Unhook the owner pointer, if any. We do this last since in the remote
286 * possibility of failure above, the SMgrRelation object will still exist.
288 if (owner)
289 *owner = NULL;
293 * smgrcloseall() -- Close all existing SMgrRelation objects.
295 void
296 smgrcloseall(void)
298 HASH_SEQ_STATUS status;
299 SMgrRelation reln;
301 /* Nothing to do if hashtable not set up */
302 if (SMgrRelationHash == NULL)
303 return;
305 hash_seq_init(&status, SMgrRelationHash);
307 while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
308 smgrclose(reln);
312 * smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
313 * if one exists.
315 * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
316 * uselessly creating a hashtable entry only to drop it again when no
317 * such entry exists already.
319 void
320 smgrclosenode(RelFileNode rnode)
322 SMgrRelation reln;
324 /* Nothing to do if hashtable not set up */
325 if (SMgrRelationHash == NULL)
326 return;
328 reln = (SMgrRelation) hash_search(SMgrRelationHash,
329 (void *) &rnode,
330 HASH_FIND, NULL);
331 if (reln != NULL)
332 smgrclose(reln);
336 * smgrcreate() -- Create a new relation.
338 * Given an already-created (but presumably unused) SMgrRelation,
339 * cause the underlying disk file or other storage for the fork
340 * to be created.
342 * If isRedo is true, it is okay for the underlying file to exist
343 * already because we are in a WAL replay sequence. In this case
344 * we should make no PendingRelDelete entry; the WAL sequence will
345 * tell whether to drop the file.
347 void
348 smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
350 XLogRecPtr lsn;
351 XLogRecData rdata;
352 xl_smgr_create xlrec;
353 PendingRelDelete *pending;
356 * Exit quickly in WAL replay mode if we've already opened the file.
357 * If it's open, it surely must exist.
359 if (isRedo && reln->md_fd[forknum] != NULL)
360 return;
363 * We may be using the target table space for the first time in this
364 * database, so create a per-database subdirectory if needed.
366 * XXX this is a fairly ugly violation of module layering, but this seems
367 * to be the best place to put the check. Maybe TablespaceCreateDbspace
368 * should be here and not in commands/tablespace.c? But that would imply
369 * importing a lot of stuff that smgr.c oughtn't know, either.
371 TablespaceCreateDbspace(reln->smgr_rnode.spcNode,
372 reln->smgr_rnode.dbNode,
373 isRedo);
375 (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
377 if (isRedo)
378 return;
381 * Make an XLOG entry showing the file creation. If we abort, the file
382 * will be dropped at abort time.
384 xlrec.rnode = reln->smgr_rnode;
385 xlrec.forknum = forknum;
387 rdata.data = (char *) &xlrec;
388 rdata.len = sizeof(xlrec);
389 rdata.buffer = InvalidBuffer;
390 rdata.next = NULL;
392 lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
394 /* Add the relation to the list of stuff to delete at abort */
395 pending = (PendingRelDelete *)
396 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
397 pending->relnode = reln->smgr_rnode;
398 pending->forknum = forknum;
399 pending->which = reln->smgr_which;
400 pending->isTemp = isTemp;
401 pending->atCommit = false; /* delete if abort */
402 pending->nestLevel = GetCurrentTransactionNestLevel();
403 pending->next = pendingDeletes;
404 pendingDeletes = pending;
408 * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
410 * The fork is marked to be removed from the store if we successfully
411 * commit the current transaction.
413 void
414 smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum, bool isTemp)
416 PendingRelDelete *pending;
418 /* Add the relation to the list of stuff to delete at commit */
419 pending = (PendingRelDelete *)
420 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
421 pending->relnode = reln->smgr_rnode;
422 pending->forknum = forknum;
423 pending->which = reln->smgr_which;
424 pending->isTemp = isTemp;
425 pending->atCommit = true; /* delete if commit */
426 pending->nestLevel = GetCurrentTransactionNestLevel();
427 pending->next = pendingDeletes;
428 pendingDeletes = pending;
431 * NOTE: if the relation was created in this transaction, it will now be
432 * present in the pending-delete list twice, once with atCommit true and
433 * once with atCommit false. Hence, it will be physically deleted at end
434 * of xact in either case (and the other entry will be ignored by
435 * smgrDoPendingDeletes, so no error will occur). We could instead remove
436 * the existing list entry and delete the physical file immediately, but
437 * for now I'll keep the logic simple.
442 * smgrdounlink() -- Immediately unlink a relation.
444 * The specified fork of the relation is removed from the store. This
445 * should not be used during transactional operations, since it can't be
446 * undone.
448 * If isRedo is true, it is okay for the underlying file to be gone
449 * already.
451 void
452 smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
454 RelFileNode rnode = reln->smgr_rnode;
455 int which = reln->smgr_which;
457 /* Close the fork */
458 (*(smgrsw[which].smgr_close)) (reln, forknum);
460 smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo);
464 * Shared subroutine that actually does the unlink ...
466 static void
467 smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
468 int which, bool isTemp, bool isRedo)
471 * Get rid of any remaining buffers for the relation. bufmgr will just
472 * drop them without bothering to write the contents.
474 DropRelFileNodeBuffers(rnode, forknum, isTemp, 0);
477 * It'd be nice to tell the stats collector to forget it immediately, too.
478 * But we can't because we don't know the OID (and in cases involving
479 * relfilenode swaps, it's not always clear which table OID to forget,
480 * anyway).
484 * And delete the physical files.
486 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
487 * ERROR, because we've already decided to commit or abort the current
488 * xact.
490 (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
494 * smgrextend() -- Add a new block to a file.
496 * The semantics are nearly the same as smgrwrite(): write at the
497 * specified position. However, this is to be used for the case of
498 * extending a relation (i.e., blocknum is at or beyond the current
499 * EOF). Note that we assume writing a block beyond current EOF
500 * causes intervening file space to become filled with zeroes.
502 void
503 smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
504 char *buffer, bool isTemp)
506 (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
507 buffer, isTemp);
511 * smgrread() -- read a particular block from a relation into the supplied
512 * buffer.
514 * This routine is called from the buffer manager in order to
515 * instantiate pages in the shared buffer cache. All storage managers
516 * return pages in the format that POSTGRES expects.
518 void
519 smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
520 char *buffer)
522 (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
526 * smgrwrite() -- Write the supplied buffer out.
528 * This is to be used only for updating already-existing blocks of a
529 * relation (ie, those before the current EOF). To extend a relation,
530 * use smgrextend().
532 * This is not a synchronous write -- the block is not necessarily
533 * on disk at return, only dumped out to the kernel. However,
534 * provisions will be made to fsync the write before the next checkpoint.
536 * isTemp indicates that the relation is a temp table (ie, is managed
537 * by the local-buffer manager). In this case no provisions need be
538 * made to fsync the write before checkpointing.
540 void
541 smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
542 char *buffer, bool isTemp)
544 (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
545 buffer, isTemp);
549 * smgrnblocks() -- Calculate the number of blocks in the
550 * supplied relation.
552 BlockNumber
553 smgrnblocks(SMgrRelation reln, ForkNumber forknum)
555 return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
559 * smgrtruncate() -- Truncate supplied relation to the specified number
560 * of blocks
562 void
563 smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
564 bool isTemp)
567 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
568 * just drop them without bothering to write the contents.
570 DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks);
572 /* Do the truncation */
573 (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks,
574 isTemp);
576 if (!isTemp)
579 * Make an XLOG entry showing the file truncation.
581 XLogRecPtr lsn;
582 XLogRecData rdata;
583 xl_smgr_truncate xlrec;
585 xlrec.blkno = nblocks;
586 xlrec.rnode = reln->smgr_rnode;
587 xlrec.forknum = forknum;
589 rdata.data = (char *) &xlrec;
590 rdata.len = sizeof(xlrec);
591 rdata.buffer = InvalidBuffer;
592 rdata.next = NULL;
594 lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
599 * smgrimmedsync() -- Force the specified relation to stable storage.
601 * Synchronously force all previous writes to the specified relation
602 * down to disk.
604 * This is useful for building completely new relations (eg, new
605 * indexes). Instead of incrementally WAL-logging the index build
606 * steps, we can just write completed index pages to disk with smgrwrite
607 * or smgrextend, and then fsync the completed index file before
608 * committing the transaction. (This is sufficient for purposes of
609 * crash recovery, since it effectively duplicates forcing a checkpoint
610 * for the completed index. But it is *not* sufficient if one wishes
611 * to use the WAL log for PITR or replication purposes: in that case
612 * we have to make WAL entries as well.)
614 * The preceding writes should specify isTemp = true to avoid
615 * duplicative fsyncs.
617 * Note that you need to do FlushRelationBuffers() first if there is
618 * any possibility that there are dirty buffers for the relation;
619 * otherwise the sync is not very meaningful.
621 void
622 smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
624 (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
629 * PostPrepare_smgr -- Clean up after a successful PREPARE
631 * What we have to do here is throw away the in-memory state about pending
632 * relation deletes. It's all been recorded in the 2PC state file and
633 * it's no longer smgr's job to worry about it.
635 void
636 PostPrepare_smgr(void)
638 PendingRelDelete *pending;
639 PendingRelDelete *next;
641 for (pending = pendingDeletes; pending != NULL; pending = next)
643 next = pending->next;
644 pendingDeletes = next;
645 /* must explicitly free the list entry */
646 pfree(pending);
652 * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
654 * This also runs when aborting a subxact; we want to clean up a failed
655 * subxact immediately.
657 void
658 smgrDoPendingDeletes(bool isCommit)
660 int nestLevel = GetCurrentTransactionNestLevel();
661 PendingRelDelete *pending;
662 PendingRelDelete *prev;
663 PendingRelDelete *next;
665 prev = NULL;
666 for (pending = pendingDeletes; pending != NULL; pending = next)
668 next = pending->next;
669 if (pending->nestLevel < nestLevel)
671 /* outer-level entries should not be processed yet */
672 prev = pending;
674 else
676 /* unlink list entry first, so we don't retry on failure */
677 if (prev)
678 prev->next = next;
679 else
680 pendingDeletes = next;
681 /* do deletion if called for */
682 if (pending->atCommit == isCommit)
683 smgr_internal_unlink(pending->relnode,
684 pending->forknum,
685 pending->which,
686 pending->isTemp,
687 false);
688 /* must explicitly free the list entry */
689 pfree(pending);
690 /* prev does not change */
696 * smgrGetPendingDeletes() -- Get a list of relations to be deleted.
698 * The return value is the number of relations scheduled for termination.
699 * *ptr is set to point to a freshly-palloc'd array of RelFileForks.
700 * If there are no relations to be deleted, *ptr is set to NULL.
702 * If haveNonTemp isn't NULL, the bool it points to gets set to true if
703 * there is any non-temp table pending to be deleted; false if not.
705 * Note that the list does not include anything scheduled for termination
706 * by upper-level transactions.
709 smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr, bool *haveNonTemp)
711 int nestLevel = GetCurrentTransactionNestLevel();
712 int nrels;
713 RelFileFork *rptr;
714 PendingRelDelete *pending;
716 nrels = 0;
717 if (haveNonTemp)
718 *haveNonTemp = false;
719 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
721 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
722 nrels++;
724 if (nrels == 0)
726 *ptr = NULL;
727 return 0;
729 rptr = (RelFileFork *) palloc(nrels * sizeof(RelFileFork));
730 *ptr = rptr;
731 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
733 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
735 rptr->rnode = pending->relnode;
736 rptr->forknum = pending->forknum;
737 rptr++;
739 if (haveNonTemp && !pending->isTemp)
740 *haveNonTemp = true;
742 return nrels;
746 * AtSubCommit_smgr() --- Take care of subtransaction commit.
748 * Reassign all items in the pending-deletes list to the parent transaction.
750 void
751 AtSubCommit_smgr(void)
753 int nestLevel = GetCurrentTransactionNestLevel();
754 PendingRelDelete *pending;
756 for (pending = pendingDeletes; pending != NULL; pending = pending->next)
758 if (pending->nestLevel >= nestLevel)
759 pending->nestLevel = nestLevel - 1;
764 * AtSubAbort_smgr() --- Take care of subtransaction abort.
766 * Delete created relations and forget about deleted relations.
767 * We can execute these operations immediately because we know this
768 * subtransaction will not commit.
770 void
771 AtSubAbort_smgr(void)
773 smgrDoPendingDeletes(false);
777 * smgrcommit() -- Prepare to commit changes made during the current
778 * transaction.
780 * This is called before we actually commit.
782 void
783 smgrcommit(void)
785 int i;
787 for (i = 0; i < NSmgr; i++)
789 if (smgrsw[i].smgr_commit)
790 (*(smgrsw[i].smgr_commit)) ();
795 * smgrabort() -- Clean up after transaction abort.
797 void
798 smgrabort(void)
800 int i;
802 for (i = 0; i < NSmgr; i++)
804 if (smgrsw[i].smgr_abort)
805 (*(smgrsw[i].smgr_abort)) ();
810 * smgrpreckpt() -- Prepare for checkpoint.
812 void
813 smgrpreckpt(void)
815 int i;
817 for (i = 0; i < NSmgr; i++)
819 if (smgrsw[i].smgr_pre_ckpt)
820 (*(smgrsw[i].smgr_pre_ckpt)) ();
825 * smgrsync() -- Sync files to disk during checkpoint.
827 void
828 smgrsync(void)
830 int i;
832 for (i = 0; i < NSmgr; i++)
834 if (smgrsw[i].smgr_sync)
835 (*(smgrsw[i].smgr_sync)) ();
840 * smgrpostckpt() -- Post-checkpoint cleanup.
842 void
843 smgrpostckpt(void)
845 int i;
847 for (i = 0; i < NSmgr; i++)
849 if (smgrsw[i].smgr_post_ckpt)
850 (*(smgrsw[i].smgr_post_ckpt)) ();
855 void
856 smgr_redo(XLogRecPtr lsn, XLogRecord *record)
858 uint8 info = record->xl_info & ~XLR_INFO_MASK;
860 if (info == XLOG_SMGR_CREATE)
862 xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
863 SMgrRelation reln;
865 reln = smgropen(xlrec->rnode);
866 smgrcreate(reln, xlrec->forknum, false, true);
868 else if (info == XLOG_SMGR_TRUNCATE)
870 xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
871 SMgrRelation reln;
873 reln = smgropen(xlrec->rnode);
876 * Forcibly create relation if it doesn't exist (which suggests that
877 * it was dropped somewhere later in the WAL sequence). As in
878 * XLogOpenRelation, we prefer to recreate the rel and replay the log
879 * as best we can until the drop is seen.
881 smgrcreate(reln, xlrec->forknum, false, true);
883 /* Can't use smgrtruncate because it would try to xlog */
886 * First, force bufmgr to drop any buffers it has for the to-be-
887 * truncated blocks. We must do this, else subsequent XLogReadBuffer
888 * operations will not re-extend the file properly.
890 DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false,
891 xlrec->blkno);
893 /* Do the truncation */
894 (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
895 xlrec->forknum,
896 xlrec->blkno,
897 false);
899 /* Also tell xlogutils.c about it */
900 XLogTruncateRelation(xlrec->rnode, xlrec->forknum, xlrec->blkno);
902 else
903 elog(PANIC, "smgr_redo: unknown op code %u", info);
906 void
907 smgr_desc(StringInfo buf, uint8 xl_info, char *rec)
909 uint8 info = xl_info & ~XLR_INFO_MASK;
911 if (info == XLOG_SMGR_CREATE)
913 xl_smgr_create *xlrec = (xl_smgr_create *) rec;
915 appendStringInfo(buf, "file create: %u/%u/%u/%u",
916 xlrec->rnode.spcNode, xlrec->rnode.dbNode,
917 xlrec->rnode.relNode, xlrec->forknum);
919 else if (info == XLOG_SMGR_TRUNCATE)
921 xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec;
923 appendStringInfo(buf, "file truncate: %u/%u/%u/%u to %u blocks",
924 xlrec->rnode.spcNode, xlrec->rnode.dbNode,
925 xlrec->rnode.relNode, xlrec->forknum,
926 xlrec->blkno);
928 else
929 appendStringInfo(buf, "UNKNOWN");