1 /*-------------------------------------------------------------------------
4 * manage dynamic shared memory segments
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
17 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
22 * src/backend/storage/ipc/dsm.c
24 *-------------------------------------------------------------------------
36 #include "common/pg_prng.h"
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "port/pg_bitutils.h"
40 #include "storage/dsm.h"
41 #include "storage/ipc.h"
42 #include "storage/lwlock.h"
43 #include "storage/pg_shmem.h"
44 #include "utils/freepage.h"
45 #include "utils/guc.h"
46 #include "utils/memutils.h"
47 #include "utils/resowner_private.h"
49 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
51 #define PG_DYNSHMEM_FIXED_SLOTS 64
52 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
54 #define INVALID_CONTROL_SLOT ((uint32) -1)
56 /* Backend-local tracking for on-detach callbacks. */
57 typedef struct dsm_segment_detach_callback
59 on_dsm_detach_callback function
;
62 } dsm_segment_detach_callback
;
64 /* Backend-local state for a dynamic shared memory segment. */
67 dlist_node node
; /* List link in dsm_segment_list. */
68 ResourceOwner resowner
; /* Resource owner. */
69 dsm_handle handle
; /* Segment name. */
70 uint32 control_slot
; /* Slot in control segment. */
71 void *impl_private
; /* Implementation-specific private data. */
72 void *mapped_address
; /* Mapping address, or NULL if unmapped. */
73 Size mapped_size
; /* Size of our mapping. */
74 slist_head on_detach
; /* On-detach callbacks. */
77 /* Shared-memory state for a dynamic shared memory segment. */
78 typedef struct dsm_control_item
81 uint32 refcnt
; /* 2+ = active, 1 = moribund, 0 = gone */
84 void *impl_private_pm_handle
; /* only needed on Windows */
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
94 dsm_control_item item
[FLEXIBLE_ARRAY_MEMBER
];
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code
, Datum arg
);
99 static dsm_segment
*dsm_create_descriptor(void);
100 static bool dsm_control_segment_sane(dsm_control_header
*control
,
102 static uint64
dsm_control_bytes_needed(uint32 nitems
);
103 static inline dsm_handle
make_main_region_dsm_handle(int slot
);
104 static inline bool is_main_region_dsm_handle(dsm_handle handle
);
106 /* Has this backend initialized the dynamic shared memory system yet? */
107 static bool dsm_init_done
= false;
109 /* Preallocated DSM space in the main shared memory region. */
110 static void *dsm_main_space_begin
= NULL
;
113 * List of dynamic shared memory segments used by this backend.
115 * At process exit time, we must decrement the reference count of each
116 * segment we have attached; this list makes it possible to find all such
119 * This list should always be empty in the postmaster. We could probably
120 * allow the postmaster to map dynamic shared memory segments before it
121 * begins to start child processes, provided that each process adjusted
122 * the reference counts for those segments in the control segment at
123 * startup time, but there's no obvious need for such a facility, which
124 * would also be complex to handle in the EXEC_BACKEND case. Once the
125 * postmaster has begun spawning children, there's an additional problem:
126 * each new mapping would require an update to the control segment,
127 * which requires locking, in which the postmaster must not be involved.
129 static dlist_head dsm_segment_list
= DLIST_STATIC_INIT(dsm_segment_list
);
132 * Control segment information.
134 * Unlike ordinary shared memory segments, the control segment is not
135 * reference counted; instead, it lasts for the postmaster's entire
136 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
138 static dsm_handle dsm_control_handle
;
139 static dsm_control_header
*dsm_control
;
140 static Size dsm_control_mapped_size
= 0;
141 static void *dsm_control_impl_private
= NULL
;
144 * Start up the dynamic shared memory system.
146 * This is called just once during each cluster lifetime, at postmaster
150 dsm_postmaster_startup(PGShmemHeader
*shim
)
152 void *dsm_control_address
= NULL
;
156 Assert(!IsUnderPostmaster
);
159 * If we're using the mmap implementations, clean up any leftovers.
160 * Cleanup isn't needed on Windows, and happens earlier in startup for
161 * POSIX and System V shared memory, via a direct call to
162 * dsm_cleanup_using_control_segment.
164 if (dynamic_shared_memory_type
== DSM_IMPL_MMAP
)
165 dsm_cleanup_for_mmap();
167 /* Determine size for new control segment. */
168 maxitems
= PG_DYNSHMEM_FIXED_SLOTS
169 + PG_DYNSHMEM_SLOTS_PER_BACKEND
* MaxBackends
;
170 elog(DEBUG2
, "dynamic shared memory system will support %u segments",
172 segsize
= dsm_control_bytes_needed(maxitems
);
175 * Loop until we find an unused identifier for the new control segment. We
176 * sometimes use 0 as a sentinel value indicating that no control segment
177 * is known to exist, so avoid using that value for a real control
182 Assert(dsm_control_address
== NULL
);
183 Assert(dsm_control_mapped_size
== 0);
184 /* Use even numbers only */
185 dsm_control_handle
= pg_prng_uint32(&pg_global_prng_state
) << 1;
186 if (dsm_control_handle
== DSM_HANDLE_INVALID
)
188 if (dsm_impl_op(DSM_OP_CREATE
, dsm_control_handle
, segsize
,
189 &dsm_control_impl_private
, &dsm_control_address
,
190 &dsm_control_mapped_size
, ERROR
))
193 dsm_control
= dsm_control_address
;
194 on_shmem_exit(dsm_postmaster_shutdown
, PointerGetDatum(shim
));
196 "created dynamic shared memory control segment %u (%zu bytes)",
197 dsm_control_handle
, segsize
);
198 shim
->dsm_control
= dsm_control_handle
;
200 /* Initialize control segment. */
201 dsm_control
->magic
= PG_DYNSHMEM_CONTROL_MAGIC
;
202 dsm_control
->nitems
= 0;
203 dsm_control
->maxitems
= maxitems
;
207 * Determine whether the control segment from the previous postmaster
208 * invocation still exists. If so, remove the dynamic shared memory
209 * segments to which it refers, and then the control segment itself.
212 dsm_cleanup_using_control_segment(dsm_handle old_control_handle
)
214 void *mapped_address
= NULL
;
215 void *junk_mapped_address
= NULL
;
216 void *impl_private
= NULL
;
217 void *junk_impl_private
= NULL
;
218 Size mapped_size
= 0;
219 Size junk_mapped_size
= 0;
222 dsm_control_header
*old_control
;
225 * Try to attach the segment. If this fails, it probably just means that
226 * the operating system has been rebooted and the segment no longer
227 * exists, or an unrelated process has used the same shm ID. So just fall
230 if (!dsm_impl_op(DSM_OP_ATTACH
, old_control_handle
, 0, &impl_private
,
231 &mapped_address
, &mapped_size
, DEBUG1
))
235 * We've managed to reattach it, but the contents might not be sane. If
236 * they aren't, we disregard the segment after all.
238 old_control
= (dsm_control_header
*) mapped_address
;
239 if (!dsm_control_segment_sane(old_control
, mapped_size
))
241 dsm_impl_op(DSM_OP_DETACH
, old_control_handle
, 0, &impl_private
,
242 &mapped_address
, &mapped_size
, LOG
);
247 * OK, the control segment looks basically valid, so we can use it to get
248 * a list of segments that need to be removed.
250 nitems
= old_control
->nitems
;
251 for (i
= 0; i
< nitems
; ++i
)
256 /* If the reference count is 0, the slot is actually unused. */
257 refcnt
= old_control
->item
[i
].refcnt
;
261 /* If it was using the main shmem area, there is nothing to do. */
262 handle
= old_control
->item
[i
].handle
;
263 if (is_main_region_dsm_handle(handle
))
266 /* Log debugging information. */
267 elog(DEBUG2
, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
270 /* Destroy the referenced segment. */
271 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
272 &junk_mapped_address
, &junk_mapped_size
, LOG
);
275 /* Destroy the old control segment, too. */
277 "cleaning up dynamic shared memory control segment with ID %u",
279 dsm_impl_op(DSM_OP_DESTROY
, old_control_handle
, 0, &impl_private
,
280 &mapped_address
, &mapped_size
, LOG
);
284 * When we're using the mmap shared memory implementation, "shared memory"
285 * segments might even manage to survive an operating system reboot.
286 * But there's no guarantee as to exactly what will survive: some segments
287 * may survive, and others may not, and the contents of some may be out
288 * of date. In particular, the control segment may be out of date, so we
289 * can't rely on it to figure out what to remove. However, since we know
290 * what directory contains the files we used as shared memory, we can simply
291 * scan the directory and blow everything away that shouldn't be there.
294 dsm_cleanup_for_mmap(void)
299 /* Scan the directory for something with a name of the correct format. */
300 dir
= AllocateDir(PG_DYNSHMEM_DIR
);
302 while ((dent
= ReadDir(dir
, PG_DYNSHMEM_DIR
)) != NULL
)
304 if (strncmp(dent
->d_name
, PG_DYNSHMEM_MMAP_FILE_PREFIX
,
305 strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX
)) == 0)
307 char buf
[MAXPGPATH
+ sizeof(PG_DYNSHMEM_DIR
)];
309 snprintf(buf
, sizeof(buf
), PG_DYNSHMEM_DIR
"/%s", dent
->d_name
);
311 elog(DEBUG2
, "removing file \"%s\"", buf
);
313 /* We found a matching file; so remove it. */
314 if (unlink(buf
) != 0)
316 (errcode_for_file_access(),
317 errmsg("could not remove file \"%s\": %m", buf
)));
321 /* Cleanup complete. */
326 * At shutdown time, we iterate over the control segment and remove all
327 * remaining dynamic shared memory segments. We avoid throwing errors here;
328 * the postmaster is shutting down either way, and this is just non-critical
332 dsm_postmaster_shutdown(int code
, Datum arg
)
336 void *dsm_control_address
;
337 void *junk_mapped_address
= NULL
;
338 void *junk_impl_private
= NULL
;
339 Size junk_mapped_size
= 0;
340 PGShmemHeader
*shim
= (PGShmemHeader
*) DatumGetPointer(arg
);
343 * If some other backend exited uncleanly, it might have corrupted the
344 * control segment while it was dying. In that case, we warn and ignore
345 * the contents of the control segment. This may end up leaving behind
346 * stray shared memory segments, but there's not much we can do about that
347 * if the metadata is gone.
349 nitems
= dsm_control
->nitems
;
350 if (!dsm_control_segment_sane(dsm_control
, dsm_control_mapped_size
))
353 (errmsg("dynamic shared memory control segment is corrupt")));
357 /* Remove any remaining segments. */
358 for (i
= 0; i
< nitems
; ++i
)
362 /* If the reference count is 0, the slot is actually unused. */
363 if (dsm_control
->item
[i
].refcnt
== 0)
366 handle
= dsm_control
->item
[i
].handle
;
367 if (is_main_region_dsm_handle(handle
))
370 /* Log debugging information. */
371 elog(DEBUG2
, "cleaning up orphaned dynamic shared memory with ID %u",
374 /* Destroy the segment. */
375 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
376 &junk_mapped_address
, &junk_mapped_size
, LOG
);
379 /* Remove the control segment itself. */
381 "cleaning up dynamic shared memory control segment with ID %u",
383 dsm_control_address
= dsm_control
;
384 dsm_impl_op(DSM_OP_DESTROY
, dsm_control_handle
, 0,
385 &dsm_control_impl_private
, &dsm_control_address
,
386 &dsm_control_mapped_size
, LOG
);
387 dsm_control
= dsm_control_address
;
388 shim
->dsm_control
= 0;
392 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
393 * we must reread the state file and map the control segment; in other cases,
394 * we'll have inherited the postmaster's mapping and global variables.
397 dsm_backend_startup(void)
401 void *control_address
= NULL
;
403 /* Attach control segment. */
404 Assert(dsm_control_handle
!= 0);
405 dsm_impl_op(DSM_OP_ATTACH
, dsm_control_handle
, 0,
406 &dsm_control_impl_private
, &control_address
,
407 &dsm_control_mapped_size
, ERROR
);
408 dsm_control
= control_address
;
409 /* If control segment doesn't look sane, something is badly wrong. */
410 if (!dsm_control_segment_sane(dsm_control
, dsm_control_mapped_size
))
412 dsm_impl_op(DSM_OP_DETACH
, dsm_control_handle
, 0,
413 &dsm_control_impl_private
, &control_address
,
414 &dsm_control_mapped_size
, WARNING
);
416 (errcode(ERRCODE_INTERNAL_ERROR
),
417 errmsg("dynamic shared memory control segment is not valid")));
422 dsm_init_done
= true;
427 * When running under EXEC_BACKEND, we get a callback here when the main
428 * shared memory segment is re-attached, so that we can record the control
429 * handle retrieved from it.
432 dsm_set_control_handle(dsm_handle h
)
434 Assert(dsm_control_handle
== 0 && h
!= 0);
435 dsm_control_handle
= h
;
440 * Reserve some space in the main shared memory segment for DSM segments.
443 dsm_estimate_size(void)
445 return 1024 * 1024 * (size_t) min_dynamic_shared_memory
;
449 * Initialize space in the main shared memory segment for DSM segments.
454 size_t size
= dsm_estimate_size();
460 dsm_main_space_begin
= ShmemInitStruct("Preallocated DSM", size
, &found
);
463 FreePageManager
*fpm
= (FreePageManager
*) dsm_main_space_begin
;
464 size_t first_page
= 0;
467 /* Reserve space for the FreePageManager. */
468 while (first_page
* FPM_PAGE_SIZE
< sizeof(FreePageManager
))
471 /* Initialize it and give it all the rest of the space. */
472 FreePageManagerInitialize(fpm
, dsm_main_space_begin
);
473 pages
= (size
/ FPM_PAGE_SIZE
) - first_page
;
474 FreePageManagerPut(fpm
, first_page
, pages
);
479 * Create a new dynamic shared memory segment.
481 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
482 * with it and must be detached before the resource owner releases, or a
483 * warning will be logged. If CurrentResourceOwner is NULL, the segment
484 * remains attached until explicitly detached or the session ends.
485 * Creating with a NULL CurrentResourceOwner is equivalent to creating
486 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
489 dsm_create(Size size
, int flags
)
495 size_t first_page
= 0;
496 FreePageManager
*dsm_main_space_fpm
= dsm_main_space_begin
;
497 bool using_main_dsm_region
= false;
499 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
500 Assert(IsUnderPostmaster
);
503 dsm_backend_startup();
505 /* Create a new segment descriptor. */
506 seg
= dsm_create_descriptor();
509 * Lock the control segment while we try to allocate from the main shared
510 * memory area, if configured.
512 if (dsm_main_space_fpm
)
514 npages
= size
/ FPM_PAGE_SIZE
;
515 if (size
% FPM_PAGE_SIZE
> 0)
518 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
519 if (FreePageManagerGet(dsm_main_space_fpm
, npages
, &first_page
))
521 /* We can carve out a piece of the main shared memory segment. */
522 seg
->mapped_address
= (char *) dsm_main_space_begin
+
523 first_page
* FPM_PAGE_SIZE
;
524 seg
->mapped_size
= npages
* FPM_PAGE_SIZE
;
525 using_main_dsm_region
= true;
526 /* We'll choose a handle below. */
530 if (!using_main_dsm_region
)
533 * We need to create a new memory segment. Loop until we find an
534 * unused segment identifier.
536 if (dsm_main_space_fpm
)
537 LWLockRelease(DynamicSharedMemoryControlLock
);
540 Assert(seg
->mapped_address
== NULL
&& seg
->mapped_size
== 0);
541 /* Use even numbers only */
542 seg
->handle
= pg_prng_uint32(&pg_global_prng_state
) << 1;
543 if (seg
->handle
== DSM_HANDLE_INVALID
) /* Reserve sentinel */
545 if (dsm_impl_op(DSM_OP_CREATE
, seg
->handle
, size
, &seg
->impl_private
,
546 &seg
->mapped_address
, &seg
->mapped_size
, ERROR
))
549 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
552 /* Search the control segment for an unused slot. */
553 nitems
= dsm_control
->nitems
;
554 for (i
= 0; i
< nitems
; ++i
)
556 if (dsm_control
->item
[i
].refcnt
== 0)
558 if (using_main_dsm_region
)
560 seg
->handle
= make_main_region_dsm_handle(i
);
561 dsm_control
->item
[i
].first_page
= first_page
;
562 dsm_control
->item
[i
].npages
= npages
;
565 Assert(!is_main_region_dsm_handle(seg
->handle
));
566 dsm_control
->item
[i
].handle
= seg
->handle
;
567 /* refcnt of 1 triggers destruction, so start at 2 */
568 dsm_control
->item
[i
].refcnt
= 2;
569 dsm_control
->item
[i
].impl_private_pm_handle
= NULL
;
570 dsm_control
->item
[i
].pinned
= false;
571 seg
->control_slot
= i
;
572 LWLockRelease(DynamicSharedMemoryControlLock
);
577 /* Verify that we can support an additional mapping. */
578 if (nitems
>= dsm_control
->maxitems
)
580 if (using_main_dsm_region
)
581 FreePageManagerPut(dsm_main_space_fpm
, first_page
, npages
);
582 LWLockRelease(DynamicSharedMemoryControlLock
);
583 if (!using_main_dsm_region
)
584 dsm_impl_op(DSM_OP_DESTROY
, seg
->handle
, 0, &seg
->impl_private
,
585 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
);
586 if (seg
->resowner
!= NULL
)
587 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
588 dlist_delete(&seg
->node
);
591 if ((flags
& DSM_CREATE_NULL_IF_MAXSEGMENTS
) != 0)
594 (errcode(ERRCODE_INSUFFICIENT_RESOURCES
),
595 errmsg("too many dynamic shared memory segments")));
598 /* Enter the handle into a new array slot. */
599 if (using_main_dsm_region
)
601 seg
->handle
= make_main_region_dsm_handle(nitems
);
602 dsm_control
->item
[i
].first_page
= first_page
;
603 dsm_control
->item
[i
].npages
= npages
;
605 dsm_control
->item
[nitems
].handle
= seg
->handle
;
606 /* refcnt of 1 triggers destruction, so start at 2 */
607 dsm_control
->item
[nitems
].refcnt
= 2;
608 dsm_control
->item
[nitems
].impl_private_pm_handle
= NULL
;
609 dsm_control
->item
[nitems
].pinned
= false;
610 seg
->control_slot
= nitems
;
611 dsm_control
->nitems
++;
612 LWLockRelease(DynamicSharedMemoryControlLock
);
618 * Attach a dynamic shared memory segment.
620 * See comments for dsm_segment_handle() for an explanation of how this
621 * is intended to be used.
623 * This function will return NULL if the segment isn't known to the system.
624 * This can happen if we're asked to attach the segment, but then everyone
625 * else detaches it (causing it to be destroyed) before we get around to
628 * If there is a non-NULL CurrentResourceOwner, the attached segment is
629 * associated with it and must be detached before the resource owner releases,
630 * or a warning will be logged. Otherwise the segment remains attached until
631 * explicitly detached or the session ends. See the note atop dsm_create().
634 dsm_attach(dsm_handle h
)
641 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
642 Assert(IsUnderPostmaster
);
645 dsm_backend_startup();
648 * Since this is just a debugging cross-check, we could leave it out
649 * altogether, or include it only in assert-enabled builds. But since the
650 * list of attached segments should normally be very short, let's include
651 * it always for right now.
653 * If you're hitting this error, you probably want to attempt to find an
654 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
657 dlist_foreach(iter
, &dsm_segment_list
)
659 seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
660 if (seg
->handle
== h
)
661 elog(ERROR
, "can't attach the same segment more than once");
664 /* Create a new segment descriptor. */
665 seg
= dsm_create_descriptor();
668 /* Bump reference count for this segment in shared memory. */
669 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
670 nitems
= dsm_control
->nitems
;
671 for (i
= 0; i
< nitems
; ++i
)
674 * If the reference count is 0, the slot is actually unused. If the
675 * reference count is 1, the slot is still in use, but the segment is
676 * in the process of going away; even if the handle matches, another
677 * slot may already have started using the same handle value by
678 * coincidence so we have to keep searching.
680 if (dsm_control
->item
[i
].refcnt
<= 1)
683 /* If the handle doesn't match, it's not the slot we want. */
684 if (dsm_control
->item
[i
].handle
!= seg
->handle
)
687 /* Otherwise we've found a match. */
688 dsm_control
->item
[i
].refcnt
++;
689 seg
->control_slot
= i
;
690 if (is_main_region_dsm_handle(seg
->handle
))
692 seg
->mapped_address
= (char *) dsm_main_space_begin
+
693 dsm_control
->item
[i
].first_page
* FPM_PAGE_SIZE
;
694 seg
->mapped_size
= dsm_control
->item
[i
].npages
* FPM_PAGE_SIZE
;
698 LWLockRelease(DynamicSharedMemoryControlLock
);
701 * If we didn't find the handle we're looking for in the control segment,
702 * it probably means that everyone else who had it mapped, including the
703 * original creator, died before we got to this point. It's up to the
704 * caller to decide what to do about that.
706 if (seg
->control_slot
== INVALID_CONTROL_SLOT
)
712 /* Here's where we actually try to map the segment. */
713 if (!is_main_region_dsm_handle(seg
->handle
))
714 dsm_impl_op(DSM_OP_ATTACH
, seg
->handle
, 0, &seg
->impl_private
,
715 &seg
->mapped_address
, &seg
->mapped_size
, ERROR
);
721 * At backend shutdown time, detach any segments that are still attached.
722 * (This is similar to dsm_detach_all, except that there's no reason to
723 * unmap the control segment before exiting, so we don't bother.)
726 dsm_backend_shutdown(void)
728 while (!dlist_is_empty(&dsm_segment_list
))
732 seg
= dlist_head_element(dsm_segment
, node
, &dsm_segment_list
);
738 * Detach all shared memory segments, including the control segments. This
739 * should be called, along with PGSharedMemoryDetach, in processes that
740 * might inherit mappings but are not intended to be connected to dynamic
746 void *control_address
= dsm_control
;
748 while (!dlist_is_empty(&dsm_segment_list
))
752 seg
= dlist_head_element(dsm_segment
, node
, &dsm_segment_list
);
756 if (control_address
!= NULL
)
757 dsm_impl_op(DSM_OP_DETACH
, dsm_control_handle
, 0,
758 &dsm_control_impl_private
, &control_address
,
759 &dsm_control_mapped_size
, ERROR
);
763 * Detach from a shared memory segment, destroying the segment if we
764 * remove the last reference.
766 * This function should never fail. It will often be invoked when aborting
767 * a transaction, and a further error won't serve any purpose. It's not a
768 * complete disaster if we fail to unmap or destroy the segment; it means a
769 * resource leak, but that doesn't necessarily preclude further operations.
772 dsm_detach(dsm_segment
*seg
)
775 * Invoke registered callbacks. Just in case one of those callbacks
776 * throws a further error that brings us back here, pop the callback
777 * before invoking it, to avoid infinite error recursion. Don't allow
778 * interrupts while running the individual callbacks in non-error code
779 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
780 * a statement timeout or similar.
783 while (!slist_is_empty(&seg
->on_detach
))
786 dsm_segment_detach_callback
*cb
;
787 on_dsm_detach_callback function
;
790 node
= slist_pop_head_node(&seg
->on_detach
);
791 cb
= slist_container(dsm_segment_detach_callback
, node
, node
);
792 function
= cb
->function
;
801 * Try to remove the mapping, if one exists. Normally, there will be, but
802 * maybe not, if we failed partway through a create or attach operation.
803 * We remove the mapping before decrementing the reference count so that
804 * the process that sees a zero reference count can be certain that no
805 * remaining mappings exist. Even if this fails, we pretend that it
806 * works, because retrying is likely to fail in the same way.
808 if (seg
->mapped_address
!= NULL
)
810 if (!is_main_region_dsm_handle(seg
->handle
))
811 dsm_impl_op(DSM_OP_DETACH
, seg
->handle
, 0, &seg
->impl_private
,
812 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
);
813 seg
->impl_private
= NULL
;
814 seg
->mapped_address
= NULL
;
815 seg
->mapped_size
= 0;
818 /* Reduce reference count, if we previously increased it. */
819 if (seg
->control_slot
!= INVALID_CONTROL_SLOT
)
822 uint32 control_slot
= seg
->control_slot
;
824 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
825 Assert(dsm_control
->item
[control_slot
].handle
== seg
->handle
);
826 Assert(dsm_control
->item
[control_slot
].refcnt
> 1);
827 refcnt
= --dsm_control
->item
[control_slot
].refcnt
;
828 seg
->control_slot
= INVALID_CONTROL_SLOT
;
829 LWLockRelease(DynamicSharedMemoryControlLock
);
831 /* If new reference count is 1, try to destroy the segment. */
834 /* A pinned segment should never reach 1. */
835 Assert(!dsm_control
->item
[control_slot
].pinned
);
838 * If we fail to destroy the segment here, or are killed before we
839 * finish doing so, the reference count will remain at 1, which
840 * will mean that nobody else can attach to the segment. At
841 * postmaster shutdown time, or when a new postmaster is started
842 * after a hard kill, another attempt will be made to remove the
845 * The main case we're worried about here is being killed by a
846 * signal before we can finish removing the segment. In that
847 * case, it's important to be sure that the segment still gets
848 * removed. If we actually fail to remove the segment for some
849 * other reason, the postmaster may not have any better luck than
850 * we did. There's not much we can do about that, though.
852 if (is_main_region_dsm_handle(seg
->handle
) ||
853 dsm_impl_op(DSM_OP_DESTROY
, seg
->handle
, 0, &seg
->impl_private
,
854 &seg
->mapped_address
, &seg
->mapped_size
, WARNING
))
856 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
857 if (is_main_region_dsm_handle(seg
->handle
))
858 FreePageManagerPut((FreePageManager
*) dsm_main_space_begin
,
859 dsm_control
->item
[control_slot
].first_page
,
860 dsm_control
->item
[control_slot
].npages
);
861 Assert(dsm_control
->item
[control_slot
].handle
== seg
->handle
);
862 Assert(dsm_control
->item
[control_slot
].refcnt
== 1);
863 dsm_control
->item
[control_slot
].refcnt
= 0;
864 LWLockRelease(DynamicSharedMemoryControlLock
);
869 /* Clean up our remaining backend-private data structures. */
870 if (seg
->resowner
!= NULL
)
871 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
872 dlist_delete(&seg
->node
);
877 * Keep a dynamic shared memory mapping until end of session.
879 * By default, mappings are owned by the current resource owner, which
880 * typically means they stick around for the duration of the current query
884 dsm_pin_mapping(dsm_segment
*seg
)
886 if (seg
->resowner
!= NULL
)
888 ResourceOwnerForgetDSM(seg
->resowner
, seg
);
889 seg
->resowner
= NULL
;
894 * Arrange to remove a dynamic shared memory mapping at cleanup time.
896 * dsm_pin_mapping() can be used to preserve a mapping for the entire
897 * lifetime of a process; this function reverses that decision, making
898 * the segment owned by the current resource owner. This may be useful
899 * just before performing some operation that will invalidate the segment
900 * for future use by this backend.
903 dsm_unpin_mapping(dsm_segment
*seg
)
905 Assert(seg
->resowner
== NULL
);
906 ResourceOwnerEnlargeDSMs(CurrentResourceOwner
);
907 seg
->resowner
= CurrentResourceOwner
;
908 ResourceOwnerRememberDSM(seg
->resowner
, seg
);
912 * Keep a dynamic shared memory segment until postmaster shutdown, or until
913 * dsm_unpin_segment is called.
915 * This function should not be called more than once per segment, unless the
916 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
918 * Note that this function does not arrange for the current process to
919 * keep the segment mapped indefinitely; if that behavior is desired,
920 * dsm_pin_mapping() should be used from each process that needs to
921 * retain the mapping.
924 dsm_pin_segment(dsm_segment
*seg
)
929 * Bump reference count for this segment in shared memory. This will
930 * ensure that even if there is no session which is attached to this
931 * segment, it will remain until postmaster shutdown or an explicit call
934 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
935 if (dsm_control
->item
[seg
->control_slot
].pinned
)
936 elog(ERROR
, "cannot pin a segment that is already pinned");
937 dsm_impl_pin_segment(seg
->handle
, seg
->impl_private
, &handle
);
938 dsm_control
->item
[seg
->control_slot
].pinned
= true;
939 dsm_control
->item
[seg
->control_slot
].refcnt
++;
940 dsm_control
->item
[seg
->control_slot
].impl_private_pm_handle
= handle
;
941 LWLockRelease(DynamicSharedMemoryControlLock
);
945 * Unpin a dynamic shared memory segment that was previously pinned with
946 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
947 * was previously called for this segment.
949 * The argument is a dsm_handle rather than a dsm_segment in case you want
950 * to unpin a segment to which you haven't attached. This turns out to be
951 * useful if, for example, a reference to one shared memory segment is stored
952 * within another shared memory segment. You might want to unpin the
953 * referenced segment before destroying the referencing segment.
956 dsm_unpin_segment(dsm_handle handle
)
958 uint32 control_slot
= INVALID_CONTROL_SLOT
;
959 bool destroy
= false;
962 /* Find the control slot for the given handle. */
963 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
964 for (i
= 0; i
< dsm_control
->nitems
; ++i
)
966 /* Skip unused slots and segments that are concurrently going away. */
967 if (dsm_control
->item
[i
].refcnt
<= 1)
970 /* If we've found our handle, we can stop searching. */
971 if (dsm_control
->item
[i
].handle
== handle
)
979 * We should definitely have found the slot, and it should not already be
980 * in the process of going away, because this function should only be
981 * called on a segment which is pinned.
983 if (control_slot
== INVALID_CONTROL_SLOT
)
984 elog(ERROR
, "cannot unpin unknown segment handle");
985 if (!dsm_control
->item
[control_slot
].pinned
)
986 elog(ERROR
, "cannot unpin a segment that is not pinned");
987 Assert(dsm_control
->item
[control_slot
].refcnt
> 1);
990 * Allow implementation-specific code to run. We have to do this before
991 * releasing the lock, because impl_private_pm_handle may get modified by
992 * dsm_impl_unpin_segment.
994 dsm_impl_unpin_segment(handle
,
995 &dsm_control
->item
[control_slot
].impl_private_pm_handle
);
997 /* Note that 1 means no references (0 means unused slot). */
998 if (--dsm_control
->item
[control_slot
].refcnt
== 1)
1000 dsm_control
->item
[control_slot
].pinned
= false;
1002 /* Now we can release the lock. */
1003 LWLockRelease(DynamicSharedMemoryControlLock
);
1005 /* Clean up resources if that was the last reference. */
1008 void *junk_impl_private
= NULL
;
1009 void *junk_mapped_address
= NULL
;
1010 Size junk_mapped_size
= 0;
1013 * For an explanation of how error handling works in this case, see
1014 * comments in dsm_detach. Note that if we reach this point, the
1015 * current process certainly does not have the segment mapped, because
1016 * if it did, the reference count would have still been greater than 1
1017 * even after releasing the reference count held by the pin. The fact
1018 * that there can't be a dsm_segment for this handle makes it OK to
1019 * pass the mapped size, mapped address, and private data as NULL
1022 if (is_main_region_dsm_handle(handle
) ||
1023 dsm_impl_op(DSM_OP_DESTROY
, handle
, 0, &junk_impl_private
,
1024 &junk_mapped_address
, &junk_mapped_size
, WARNING
))
1026 LWLockAcquire(DynamicSharedMemoryControlLock
, LW_EXCLUSIVE
);
1027 if (is_main_region_dsm_handle(handle
))
1028 FreePageManagerPut((FreePageManager
*) dsm_main_space_begin
,
1029 dsm_control
->item
[control_slot
].first_page
,
1030 dsm_control
->item
[control_slot
].npages
);
1031 Assert(dsm_control
->item
[control_slot
].handle
== handle
);
1032 Assert(dsm_control
->item
[control_slot
].refcnt
== 1);
1033 dsm_control
->item
[control_slot
].refcnt
= 0;
1034 LWLockRelease(DynamicSharedMemoryControlLock
);
1040 * Find an existing mapping for a shared memory segment, if there is one.
1043 dsm_find_mapping(dsm_handle h
)
1048 dlist_foreach(iter
, &dsm_segment_list
)
1050 seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
1051 if (seg
->handle
== h
)
1059 * Get the address at which a dynamic shared memory segment is mapped.
1062 dsm_segment_address(dsm_segment
*seg
)
1064 Assert(seg
->mapped_address
!= NULL
);
1065 return seg
->mapped_address
;
1069 * Get the size of a mapping.
1072 dsm_segment_map_length(dsm_segment
*seg
)
1074 Assert(seg
->mapped_address
!= NULL
);
1075 return seg
->mapped_size
;
1079 * Get a handle for a mapping.
1081 * To establish communication via dynamic shared memory between two backends,
1082 * one of them should first call dsm_create() to establish a new shared
1083 * memory mapping. That process should then call dsm_segment_handle() to
1084 * obtain a handle for the mapping, and pass that handle to the
1085 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1086 * main shared memory segment). The recipient, once in possession of the
1087 * handle, should call dsm_attach().
1090 dsm_segment_handle(dsm_segment
*seg
)
1096 * Register an on-detach callback for a dynamic shared memory segment.
1099 on_dsm_detach(dsm_segment
*seg
, on_dsm_detach_callback function
, Datum arg
)
1101 dsm_segment_detach_callback
*cb
;
1103 cb
= MemoryContextAlloc(TopMemoryContext
,
1104 sizeof(dsm_segment_detach_callback
));
1105 cb
->function
= function
;
1107 slist_push_head(&seg
->on_detach
, &cb
->node
);
1111 * Unregister an on-detach callback for a dynamic shared memory segment.
1114 cancel_on_dsm_detach(dsm_segment
*seg
, on_dsm_detach_callback function
,
1117 slist_mutable_iter iter
;
1119 slist_foreach_modify(iter
, &seg
->on_detach
)
1121 dsm_segment_detach_callback
*cb
;
1123 cb
= slist_container(dsm_segment_detach_callback
, node
, iter
.cur
);
1124 if (cb
->function
== function
&& cb
->arg
== arg
)
1126 slist_delete_current(&iter
);
1134 * Discard all registered on-detach callbacks without executing them.
1137 reset_on_dsm_detach(void)
1141 dlist_foreach(iter
, &dsm_segment_list
)
1143 dsm_segment
*seg
= dlist_container(dsm_segment
, node
, iter
.cur
);
1145 /* Throw away explicit on-detach actions one by one. */
1146 while (!slist_is_empty(&seg
->on_detach
))
1149 dsm_segment_detach_callback
*cb
;
1151 node
= slist_pop_head_node(&seg
->on_detach
);
1152 cb
= slist_container(dsm_segment_detach_callback
, node
, node
);
1157 * Decrementing the reference count is a sort of implicit on-detach
1158 * action; make sure we don't do that, either.
1160 seg
->control_slot
= INVALID_CONTROL_SLOT
;
1165 * Create a segment descriptor.
1167 static dsm_segment
*
1168 dsm_create_descriptor(void)
1172 if (CurrentResourceOwner
)
1173 ResourceOwnerEnlargeDSMs(CurrentResourceOwner
);
1175 seg
= MemoryContextAlloc(TopMemoryContext
, sizeof(dsm_segment
));
1176 dlist_push_head(&dsm_segment_list
, &seg
->node
);
1178 /* seg->handle must be initialized by the caller */
1179 seg
->control_slot
= INVALID_CONTROL_SLOT
;
1180 seg
->impl_private
= NULL
;
1181 seg
->mapped_address
= NULL
;
1182 seg
->mapped_size
= 0;
1184 seg
->resowner
= CurrentResourceOwner
;
1185 if (CurrentResourceOwner
)
1186 ResourceOwnerRememberDSM(CurrentResourceOwner
, seg
);
1188 slist_init(&seg
->on_detach
);
1194 * Sanity check a control segment.
1196 * The goal here isn't to detect everything that could possibly be wrong with
1197 * the control segment; there's not enough information for that. Rather, the
1198 * goal is to make sure that someone can iterate over the items in the segment
1199 * without overrunning the end of the mapping and crashing. We also check
1200 * the magic number since, if that's messed up, this may not even be one of
1201 * our segments at all.
1204 dsm_control_segment_sane(dsm_control_header
*control
, Size mapped_size
)
1206 if (mapped_size
< offsetof(dsm_control_header
, item
))
1207 return false; /* Mapped size too short to read header. */
1208 if (control
->magic
!= PG_DYNSHMEM_CONTROL_MAGIC
)
1209 return false; /* Magic number doesn't match. */
1210 if (dsm_control_bytes_needed(control
->maxitems
) > mapped_size
)
1211 return false; /* Max item count won't fit in map. */
1212 if (control
->nitems
> control
->maxitems
)
1213 return false; /* Overfull. */
1218 * Compute the number of control-segment bytes needed to store a given
1222 dsm_control_bytes_needed(uint32 nitems
)
1224 return offsetof(dsm_control_header
, item
)
1225 + sizeof(dsm_control_item
) * (uint64
) nitems
;
1228 static inline dsm_handle
1229 make_main_region_dsm_handle(int slot
)
1234 * We need to create a handle that doesn't collide with any existing extra
1235 * segment created by dsm_impl_op(), so we'll make it odd. It also
1236 * mustn't collide with any other main area pseudo-segment, so we'll
1237 * include the slot number in some of the bits. We also want to make an
1238 * effort to avoid newly created and recently destroyed handles from being
1239 * confused, so we'll make the rest of the bits random.
1242 handle
|= slot
<< 1;
1243 handle
|= pg_prng_uint32(&pg_global_prng_state
) << (pg_leftmost_one_pos32(dsm_control
->maxitems
) + 1);
1248 is_main_region_dsm_handle(dsm_handle handle
)