Update copyright for 2022
[pgsql.git] / src / backend / storage / ipc / dsm.c
blobae751415925ebb68d294ca9606692fa6d8022edc
1 /*-------------------------------------------------------------------------
3 * dsm.c
4 * manage dynamic shared memory segments
6 * This file provides a set of services to make programming with dynamic
7 * shared memory segments more convenient. Unlike the low-level
8 * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 * created using this module will be cleaned up automatically. Mappings
10 * will be removed when the resource owner under which they were created
11 * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 * have session lifespan. Segments will be removed when there are no
13 * remaining mappings, or at postmaster shutdown in any case. After a
14 * hard postmaster crash, remaining segments will be removed, if they
15 * still exist, at the next postmaster startup.
17 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
21 * IDENTIFICATION
22 * src/backend/storage/ipc/dsm.c
24 *-------------------------------------------------------------------------
27 #include "postgres.h"
29 #include <fcntl.h>
30 #include <unistd.h>
31 #ifndef WIN32
32 #include <sys/mman.h>
33 #endif
34 #include <sys/stat.h>
36 #include "common/pg_prng.h"
37 #include "lib/ilist.h"
38 #include "miscadmin.h"
39 #include "port/pg_bitutils.h"
40 #include "storage/dsm.h"
41 #include "storage/ipc.h"
42 #include "storage/lwlock.h"
43 #include "storage/pg_shmem.h"
44 #include "utils/freepage.h"
45 #include "utils/guc.h"
46 #include "utils/memutils.h"
47 #include "utils/resowner_private.h"
49 #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
51 #define PG_DYNSHMEM_FIXED_SLOTS 64
52 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
54 #define INVALID_CONTROL_SLOT ((uint32) -1)
56 /* Backend-local tracking for on-detach callbacks. */
57 typedef struct dsm_segment_detach_callback
59 on_dsm_detach_callback function;
60 Datum arg;
61 slist_node node;
62 } dsm_segment_detach_callback;
64 /* Backend-local state for a dynamic shared memory segment. */
65 struct dsm_segment
67 dlist_node node; /* List link in dsm_segment_list. */
68 ResourceOwner resowner; /* Resource owner. */
69 dsm_handle handle; /* Segment name. */
70 uint32 control_slot; /* Slot in control segment. */
71 void *impl_private; /* Implementation-specific private data. */
72 void *mapped_address; /* Mapping address, or NULL if unmapped. */
73 Size mapped_size; /* Size of our mapping. */
74 slist_head on_detach; /* On-detach callbacks. */
77 /* Shared-memory state for a dynamic shared memory segment. */
78 typedef struct dsm_control_item
80 dsm_handle handle;
81 uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
82 size_t first_page;
83 size_t npages;
84 void *impl_private_pm_handle; /* only needed on Windows */
85 bool pinned;
86 } dsm_control_item;
88 /* Layout of the dynamic shared memory control segment. */
89 typedef struct dsm_control_header
91 uint32 magic;
92 uint32 nitems;
93 uint32 maxitems;
94 dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
95 } dsm_control_header;
97 static void dsm_cleanup_for_mmap(void);
98 static void dsm_postmaster_shutdown(int code, Datum arg);
99 static dsm_segment *dsm_create_descriptor(void);
100 static bool dsm_control_segment_sane(dsm_control_header *control,
101 Size mapped_size);
102 static uint64 dsm_control_bytes_needed(uint32 nitems);
103 static inline dsm_handle make_main_region_dsm_handle(int slot);
104 static inline bool is_main_region_dsm_handle(dsm_handle handle);
106 /* Has this backend initialized the dynamic shared memory system yet? */
107 static bool dsm_init_done = false;
109 /* Preallocated DSM space in the main shared memory region. */
110 static void *dsm_main_space_begin = NULL;
113 * List of dynamic shared memory segments used by this backend.
115 * At process exit time, we must decrement the reference count of each
116 * segment we have attached; this list makes it possible to find all such
117 * segments.
119 * This list should always be empty in the postmaster. We could probably
120 * allow the postmaster to map dynamic shared memory segments before it
121 * begins to start child processes, provided that each process adjusted
122 * the reference counts for those segments in the control segment at
123 * startup time, but there's no obvious need for such a facility, which
124 * would also be complex to handle in the EXEC_BACKEND case. Once the
125 * postmaster has begun spawning children, there's an additional problem:
126 * each new mapping would require an update to the control segment,
127 * which requires locking, in which the postmaster must not be involved.
129 static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
132 * Control segment information.
134 * Unlike ordinary shared memory segments, the control segment is not
135 * reference counted; instead, it lasts for the postmaster's entire
136 * life cycle. For simplicity, it doesn't have a dsm_segment object either.
138 static dsm_handle dsm_control_handle;
139 static dsm_control_header *dsm_control;
140 static Size dsm_control_mapped_size = 0;
141 static void *dsm_control_impl_private = NULL;
144 * Start up the dynamic shared memory system.
146 * This is called just once during each cluster lifetime, at postmaster
147 * startup time.
149 void
150 dsm_postmaster_startup(PGShmemHeader *shim)
152 void *dsm_control_address = NULL;
153 uint32 maxitems;
154 Size segsize;
156 Assert(!IsUnderPostmaster);
159 * If we're using the mmap implementations, clean up any leftovers.
160 * Cleanup isn't needed on Windows, and happens earlier in startup for
161 * POSIX and System V shared memory, via a direct call to
162 * dsm_cleanup_using_control_segment.
164 if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
165 dsm_cleanup_for_mmap();
167 /* Determine size for new control segment. */
168 maxitems = PG_DYNSHMEM_FIXED_SLOTS
169 + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
170 elog(DEBUG2, "dynamic shared memory system will support %u segments",
171 maxitems);
172 segsize = dsm_control_bytes_needed(maxitems);
175 * Loop until we find an unused identifier for the new control segment. We
176 * sometimes use 0 as a sentinel value indicating that no control segment
177 * is known to exist, so avoid using that value for a real control
178 * segment.
180 for (;;)
182 Assert(dsm_control_address == NULL);
183 Assert(dsm_control_mapped_size == 0);
184 /* Use even numbers only */
185 dsm_control_handle = pg_prng_uint32(&pg_global_prng_state) << 1;
186 if (dsm_control_handle == DSM_HANDLE_INVALID)
187 continue;
188 if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
189 &dsm_control_impl_private, &dsm_control_address,
190 &dsm_control_mapped_size, ERROR))
191 break;
193 dsm_control = dsm_control_address;
194 on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
195 elog(DEBUG2,
196 "created dynamic shared memory control segment %u (%zu bytes)",
197 dsm_control_handle, segsize);
198 shim->dsm_control = dsm_control_handle;
200 /* Initialize control segment. */
201 dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
202 dsm_control->nitems = 0;
203 dsm_control->maxitems = maxitems;
207 * Determine whether the control segment from the previous postmaster
208 * invocation still exists. If so, remove the dynamic shared memory
209 * segments to which it refers, and then the control segment itself.
211 void
212 dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
214 void *mapped_address = NULL;
215 void *junk_mapped_address = NULL;
216 void *impl_private = NULL;
217 void *junk_impl_private = NULL;
218 Size mapped_size = 0;
219 Size junk_mapped_size = 0;
220 uint32 nitems;
221 uint32 i;
222 dsm_control_header *old_control;
225 * Try to attach the segment. If this fails, it probably just means that
226 * the operating system has been rebooted and the segment no longer
227 * exists, or an unrelated process has used the same shm ID. So just fall
228 * out quietly.
230 if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
231 &mapped_address, &mapped_size, DEBUG1))
232 return;
235 * We've managed to reattach it, but the contents might not be sane. If
236 * they aren't, we disregard the segment after all.
238 old_control = (dsm_control_header *) mapped_address;
239 if (!dsm_control_segment_sane(old_control, mapped_size))
241 dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
242 &mapped_address, &mapped_size, LOG);
243 return;
247 * OK, the control segment looks basically valid, so we can use it to get
248 * a list of segments that need to be removed.
250 nitems = old_control->nitems;
251 for (i = 0; i < nitems; ++i)
253 dsm_handle handle;
254 uint32 refcnt;
256 /* If the reference count is 0, the slot is actually unused. */
257 refcnt = old_control->item[i].refcnt;
258 if (refcnt == 0)
259 continue;
261 /* If it was using the main shmem area, there is nothing to do. */
262 handle = old_control->item[i].handle;
263 if (is_main_region_dsm_handle(handle))
264 continue;
266 /* Log debugging information. */
267 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
268 handle, refcnt);
270 /* Destroy the referenced segment. */
271 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
272 &junk_mapped_address, &junk_mapped_size, LOG);
275 /* Destroy the old control segment, too. */
276 elog(DEBUG2,
277 "cleaning up dynamic shared memory control segment with ID %u",
278 old_control_handle);
279 dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
280 &mapped_address, &mapped_size, LOG);
284 * When we're using the mmap shared memory implementation, "shared memory"
285 * segments might even manage to survive an operating system reboot.
286 * But there's no guarantee as to exactly what will survive: some segments
287 * may survive, and others may not, and the contents of some may be out
288 * of date. In particular, the control segment may be out of date, so we
289 * can't rely on it to figure out what to remove. However, since we know
290 * what directory contains the files we used as shared memory, we can simply
291 * scan the directory and blow everything away that shouldn't be there.
293 static void
294 dsm_cleanup_for_mmap(void)
296 DIR *dir;
297 struct dirent *dent;
299 /* Scan the directory for something with a name of the correct format. */
300 dir = AllocateDir(PG_DYNSHMEM_DIR);
302 while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
304 if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
305 strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
307 char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
309 snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
311 elog(DEBUG2, "removing file \"%s\"", buf);
313 /* We found a matching file; so remove it. */
314 if (unlink(buf) != 0)
315 ereport(ERROR,
316 (errcode_for_file_access(),
317 errmsg("could not remove file \"%s\": %m", buf)));
321 /* Cleanup complete. */
322 FreeDir(dir);
326 * At shutdown time, we iterate over the control segment and remove all
327 * remaining dynamic shared memory segments. We avoid throwing errors here;
328 * the postmaster is shutting down either way, and this is just non-critical
329 * resource cleanup.
331 static void
332 dsm_postmaster_shutdown(int code, Datum arg)
334 uint32 nitems;
335 uint32 i;
336 void *dsm_control_address;
337 void *junk_mapped_address = NULL;
338 void *junk_impl_private = NULL;
339 Size junk_mapped_size = 0;
340 PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
343 * If some other backend exited uncleanly, it might have corrupted the
344 * control segment while it was dying. In that case, we warn and ignore
345 * the contents of the control segment. This may end up leaving behind
346 * stray shared memory segments, but there's not much we can do about that
347 * if the metadata is gone.
349 nitems = dsm_control->nitems;
350 if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
352 ereport(LOG,
353 (errmsg("dynamic shared memory control segment is corrupt")));
354 return;
357 /* Remove any remaining segments. */
358 for (i = 0; i < nitems; ++i)
360 dsm_handle handle;
362 /* If the reference count is 0, the slot is actually unused. */
363 if (dsm_control->item[i].refcnt == 0)
364 continue;
366 handle = dsm_control->item[i].handle;
367 if (is_main_region_dsm_handle(handle))
368 continue;
370 /* Log debugging information. */
371 elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
372 handle);
374 /* Destroy the segment. */
375 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
376 &junk_mapped_address, &junk_mapped_size, LOG);
379 /* Remove the control segment itself. */
380 elog(DEBUG2,
381 "cleaning up dynamic shared memory control segment with ID %u",
382 dsm_control_handle);
383 dsm_control_address = dsm_control;
384 dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
385 &dsm_control_impl_private, &dsm_control_address,
386 &dsm_control_mapped_size, LOG);
387 dsm_control = dsm_control_address;
388 shim->dsm_control = 0;
392 * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
393 * we must reread the state file and map the control segment; in other cases,
394 * we'll have inherited the postmaster's mapping and global variables.
396 static void
397 dsm_backend_startup(void)
399 #ifdef EXEC_BACKEND
401 void *control_address = NULL;
403 /* Attach control segment. */
404 Assert(dsm_control_handle != 0);
405 dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
406 &dsm_control_impl_private, &control_address,
407 &dsm_control_mapped_size, ERROR);
408 dsm_control = control_address;
409 /* If control segment doesn't look sane, something is badly wrong. */
410 if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
412 dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
413 &dsm_control_impl_private, &control_address,
414 &dsm_control_mapped_size, WARNING);
415 ereport(FATAL,
416 (errcode(ERRCODE_INTERNAL_ERROR),
417 errmsg("dynamic shared memory control segment is not valid")));
420 #endif
422 dsm_init_done = true;
425 #ifdef EXEC_BACKEND
427 * When running under EXEC_BACKEND, we get a callback here when the main
428 * shared memory segment is re-attached, so that we can record the control
429 * handle retrieved from it.
431 void
432 dsm_set_control_handle(dsm_handle h)
434 Assert(dsm_control_handle == 0 && h != 0);
435 dsm_control_handle = h;
437 #endif
440 * Reserve some space in the main shared memory segment for DSM segments.
442 size_t
443 dsm_estimate_size(void)
445 return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
449 * Initialize space in the main shared memory segment for DSM segments.
451 void
452 dsm_shmem_init(void)
454 size_t size = dsm_estimate_size();
455 bool found;
457 if (size == 0)
458 return;
460 dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
461 if (!found)
463 FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin;
464 size_t first_page = 0;
465 size_t pages;
467 /* Reserve space for the FreePageManager. */
468 while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
469 ++first_page;
471 /* Initialize it and give it all the rest of the space. */
472 FreePageManagerInitialize(fpm, dsm_main_space_begin);
473 pages = (size / FPM_PAGE_SIZE) - first_page;
474 FreePageManagerPut(fpm, first_page, pages);
479 * Create a new dynamic shared memory segment.
481 * If there is a non-NULL CurrentResourceOwner, the new segment is associated
482 * with it and must be detached before the resource owner releases, or a
483 * warning will be logged. If CurrentResourceOwner is NULL, the segment
484 * remains attached until explicitly detached or the session ends.
485 * Creating with a NULL CurrentResourceOwner is equivalent to creating
486 * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
488 dsm_segment *
489 dsm_create(Size size, int flags)
491 dsm_segment *seg;
492 uint32 i;
493 uint32 nitems;
494 size_t npages = 0;
495 size_t first_page = 0;
496 FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
497 bool using_main_dsm_region = false;
499 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
500 Assert(IsUnderPostmaster);
502 if (!dsm_init_done)
503 dsm_backend_startup();
505 /* Create a new segment descriptor. */
506 seg = dsm_create_descriptor();
509 * Lock the control segment while we try to allocate from the main shared
510 * memory area, if configured.
512 if (dsm_main_space_fpm)
514 npages = size / FPM_PAGE_SIZE;
515 if (size % FPM_PAGE_SIZE > 0)
516 ++npages;
518 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
519 if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
521 /* We can carve out a piece of the main shared memory segment. */
522 seg->mapped_address = (char *) dsm_main_space_begin +
523 first_page * FPM_PAGE_SIZE;
524 seg->mapped_size = npages * FPM_PAGE_SIZE;
525 using_main_dsm_region = true;
526 /* We'll choose a handle below. */
530 if (!using_main_dsm_region)
533 * We need to create a new memory segment. Loop until we find an
534 * unused segment identifier.
536 if (dsm_main_space_fpm)
537 LWLockRelease(DynamicSharedMemoryControlLock);
538 for (;;)
540 Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
541 /* Use even numbers only */
542 seg->handle = pg_prng_uint32(&pg_global_prng_state) << 1;
543 if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
544 continue;
545 if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
546 &seg->mapped_address, &seg->mapped_size, ERROR))
547 break;
549 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
552 /* Search the control segment for an unused slot. */
553 nitems = dsm_control->nitems;
554 for (i = 0; i < nitems; ++i)
556 if (dsm_control->item[i].refcnt == 0)
558 if (using_main_dsm_region)
560 seg->handle = make_main_region_dsm_handle(i);
561 dsm_control->item[i].first_page = first_page;
562 dsm_control->item[i].npages = npages;
564 else
565 Assert(!is_main_region_dsm_handle(seg->handle));
566 dsm_control->item[i].handle = seg->handle;
567 /* refcnt of 1 triggers destruction, so start at 2 */
568 dsm_control->item[i].refcnt = 2;
569 dsm_control->item[i].impl_private_pm_handle = NULL;
570 dsm_control->item[i].pinned = false;
571 seg->control_slot = i;
572 LWLockRelease(DynamicSharedMemoryControlLock);
573 return seg;
577 /* Verify that we can support an additional mapping. */
578 if (nitems >= dsm_control->maxitems)
580 if (using_main_dsm_region)
581 FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
582 LWLockRelease(DynamicSharedMemoryControlLock);
583 if (!using_main_dsm_region)
584 dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
585 &seg->mapped_address, &seg->mapped_size, WARNING);
586 if (seg->resowner != NULL)
587 ResourceOwnerForgetDSM(seg->resowner, seg);
588 dlist_delete(&seg->node);
589 pfree(seg);
591 if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
592 return NULL;
593 ereport(ERROR,
594 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
595 errmsg("too many dynamic shared memory segments")));
598 /* Enter the handle into a new array slot. */
599 if (using_main_dsm_region)
601 seg->handle = make_main_region_dsm_handle(nitems);
602 dsm_control->item[i].first_page = first_page;
603 dsm_control->item[i].npages = npages;
605 dsm_control->item[nitems].handle = seg->handle;
606 /* refcnt of 1 triggers destruction, so start at 2 */
607 dsm_control->item[nitems].refcnt = 2;
608 dsm_control->item[nitems].impl_private_pm_handle = NULL;
609 dsm_control->item[nitems].pinned = false;
610 seg->control_slot = nitems;
611 dsm_control->nitems++;
612 LWLockRelease(DynamicSharedMemoryControlLock);
614 return seg;
618 * Attach a dynamic shared memory segment.
620 * See comments for dsm_segment_handle() for an explanation of how this
621 * is intended to be used.
623 * This function will return NULL if the segment isn't known to the system.
624 * This can happen if we're asked to attach the segment, but then everyone
625 * else detaches it (causing it to be destroyed) before we get around to
626 * attaching it.
628 * If there is a non-NULL CurrentResourceOwner, the attached segment is
629 * associated with it and must be detached before the resource owner releases,
630 * or a warning will be logged. Otherwise the segment remains attached until
631 * explicitly detached or the session ends. See the note atop dsm_create().
633 dsm_segment *
634 dsm_attach(dsm_handle h)
636 dsm_segment *seg;
637 dlist_iter iter;
638 uint32 i;
639 uint32 nitems;
641 /* Unsafe in postmaster (and pointless in a stand-alone backend). */
642 Assert(IsUnderPostmaster);
644 if (!dsm_init_done)
645 dsm_backend_startup();
648 * Since this is just a debugging cross-check, we could leave it out
649 * altogether, or include it only in assert-enabled builds. But since the
650 * list of attached segments should normally be very short, let's include
651 * it always for right now.
653 * If you're hitting this error, you probably want to attempt to find an
654 * existing mapping via dsm_find_mapping() before calling dsm_attach() to
655 * create a new one.
657 dlist_foreach(iter, &dsm_segment_list)
659 seg = dlist_container(dsm_segment, node, iter.cur);
660 if (seg->handle == h)
661 elog(ERROR, "can't attach the same segment more than once");
664 /* Create a new segment descriptor. */
665 seg = dsm_create_descriptor();
666 seg->handle = h;
668 /* Bump reference count for this segment in shared memory. */
669 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
670 nitems = dsm_control->nitems;
671 for (i = 0; i < nitems; ++i)
674 * If the reference count is 0, the slot is actually unused. If the
675 * reference count is 1, the slot is still in use, but the segment is
676 * in the process of going away; even if the handle matches, another
677 * slot may already have started using the same handle value by
678 * coincidence so we have to keep searching.
680 if (dsm_control->item[i].refcnt <= 1)
681 continue;
683 /* If the handle doesn't match, it's not the slot we want. */
684 if (dsm_control->item[i].handle != seg->handle)
685 continue;
687 /* Otherwise we've found a match. */
688 dsm_control->item[i].refcnt++;
689 seg->control_slot = i;
690 if (is_main_region_dsm_handle(seg->handle))
692 seg->mapped_address = (char *) dsm_main_space_begin +
693 dsm_control->item[i].first_page * FPM_PAGE_SIZE;
694 seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE;
696 break;
698 LWLockRelease(DynamicSharedMemoryControlLock);
701 * If we didn't find the handle we're looking for in the control segment,
702 * it probably means that everyone else who had it mapped, including the
703 * original creator, died before we got to this point. It's up to the
704 * caller to decide what to do about that.
706 if (seg->control_slot == INVALID_CONTROL_SLOT)
708 dsm_detach(seg);
709 return NULL;
712 /* Here's where we actually try to map the segment. */
713 if (!is_main_region_dsm_handle(seg->handle))
714 dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
715 &seg->mapped_address, &seg->mapped_size, ERROR);
717 return seg;
721 * At backend shutdown time, detach any segments that are still attached.
722 * (This is similar to dsm_detach_all, except that there's no reason to
723 * unmap the control segment before exiting, so we don't bother.)
725 void
726 dsm_backend_shutdown(void)
728 while (!dlist_is_empty(&dsm_segment_list))
730 dsm_segment *seg;
732 seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
733 dsm_detach(seg);
738 * Detach all shared memory segments, including the control segments. This
739 * should be called, along with PGSharedMemoryDetach, in processes that
740 * might inherit mappings but are not intended to be connected to dynamic
741 * shared memory.
743 void
744 dsm_detach_all(void)
746 void *control_address = dsm_control;
748 while (!dlist_is_empty(&dsm_segment_list))
750 dsm_segment *seg;
752 seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
753 dsm_detach(seg);
756 if (control_address != NULL)
757 dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
758 &dsm_control_impl_private, &control_address,
759 &dsm_control_mapped_size, ERROR);
763 * Detach from a shared memory segment, destroying the segment if we
764 * remove the last reference.
766 * This function should never fail. It will often be invoked when aborting
767 * a transaction, and a further error won't serve any purpose. It's not a
768 * complete disaster if we fail to unmap or destroy the segment; it means a
769 * resource leak, but that doesn't necessarily preclude further operations.
771 void
772 dsm_detach(dsm_segment *seg)
775 * Invoke registered callbacks. Just in case one of those callbacks
776 * throws a further error that brings us back here, pop the callback
777 * before invoking it, to avoid infinite error recursion. Don't allow
778 * interrupts while running the individual callbacks in non-error code
779 * paths, to avoid leaving cleanup work unfinished if we're interrupted by
780 * a statement timeout or similar.
782 HOLD_INTERRUPTS();
783 while (!slist_is_empty(&seg->on_detach))
785 slist_node *node;
786 dsm_segment_detach_callback *cb;
787 on_dsm_detach_callback function;
788 Datum arg;
790 node = slist_pop_head_node(&seg->on_detach);
791 cb = slist_container(dsm_segment_detach_callback, node, node);
792 function = cb->function;
793 arg = cb->arg;
794 pfree(cb);
796 function(seg, arg);
798 RESUME_INTERRUPTS();
801 * Try to remove the mapping, if one exists. Normally, there will be, but
802 * maybe not, if we failed partway through a create or attach operation.
803 * We remove the mapping before decrementing the reference count so that
804 * the process that sees a zero reference count can be certain that no
805 * remaining mappings exist. Even if this fails, we pretend that it
806 * works, because retrying is likely to fail in the same way.
808 if (seg->mapped_address != NULL)
810 if (!is_main_region_dsm_handle(seg->handle))
811 dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
812 &seg->mapped_address, &seg->mapped_size, WARNING);
813 seg->impl_private = NULL;
814 seg->mapped_address = NULL;
815 seg->mapped_size = 0;
818 /* Reduce reference count, if we previously increased it. */
819 if (seg->control_slot != INVALID_CONTROL_SLOT)
821 uint32 refcnt;
822 uint32 control_slot = seg->control_slot;
824 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
825 Assert(dsm_control->item[control_slot].handle == seg->handle);
826 Assert(dsm_control->item[control_slot].refcnt > 1);
827 refcnt = --dsm_control->item[control_slot].refcnt;
828 seg->control_slot = INVALID_CONTROL_SLOT;
829 LWLockRelease(DynamicSharedMemoryControlLock);
831 /* If new reference count is 1, try to destroy the segment. */
832 if (refcnt == 1)
834 /* A pinned segment should never reach 1. */
835 Assert(!dsm_control->item[control_slot].pinned);
838 * If we fail to destroy the segment here, or are killed before we
839 * finish doing so, the reference count will remain at 1, which
840 * will mean that nobody else can attach to the segment. At
841 * postmaster shutdown time, or when a new postmaster is started
842 * after a hard kill, another attempt will be made to remove the
843 * segment.
845 * The main case we're worried about here is being killed by a
846 * signal before we can finish removing the segment. In that
847 * case, it's important to be sure that the segment still gets
848 * removed. If we actually fail to remove the segment for some
849 * other reason, the postmaster may not have any better luck than
850 * we did. There's not much we can do about that, though.
852 if (is_main_region_dsm_handle(seg->handle) ||
853 dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
854 &seg->mapped_address, &seg->mapped_size, WARNING))
856 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
857 if (is_main_region_dsm_handle(seg->handle))
858 FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
859 dsm_control->item[control_slot].first_page,
860 dsm_control->item[control_slot].npages);
861 Assert(dsm_control->item[control_slot].handle == seg->handle);
862 Assert(dsm_control->item[control_slot].refcnt == 1);
863 dsm_control->item[control_slot].refcnt = 0;
864 LWLockRelease(DynamicSharedMemoryControlLock);
869 /* Clean up our remaining backend-private data structures. */
870 if (seg->resowner != NULL)
871 ResourceOwnerForgetDSM(seg->resowner, seg);
872 dlist_delete(&seg->node);
873 pfree(seg);
877 * Keep a dynamic shared memory mapping until end of session.
879 * By default, mappings are owned by the current resource owner, which
880 * typically means they stick around for the duration of the current query
881 * only.
883 void
884 dsm_pin_mapping(dsm_segment *seg)
886 if (seg->resowner != NULL)
888 ResourceOwnerForgetDSM(seg->resowner, seg);
889 seg->resowner = NULL;
894 * Arrange to remove a dynamic shared memory mapping at cleanup time.
896 * dsm_pin_mapping() can be used to preserve a mapping for the entire
897 * lifetime of a process; this function reverses that decision, making
898 * the segment owned by the current resource owner. This may be useful
899 * just before performing some operation that will invalidate the segment
900 * for future use by this backend.
902 void
903 dsm_unpin_mapping(dsm_segment *seg)
905 Assert(seg->resowner == NULL);
906 ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
907 seg->resowner = CurrentResourceOwner;
908 ResourceOwnerRememberDSM(seg->resowner, seg);
912 * Keep a dynamic shared memory segment until postmaster shutdown, or until
913 * dsm_unpin_segment is called.
915 * This function should not be called more than once per segment, unless the
916 * segment is explicitly unpinned with dsm_unpin_segment in between calls.
918 * Note that this function does not arrange for the current process to
919 * keep the segment mapped indefinitely; if that behavior is desired,
920 * dsm_pin_mapping() should be used from each process that needs to
921 * retain the mapping.
923 void
924 dsm_pin_segment(dsm_segment *seg)
926 void *handle;
929 * Bump reference count for this segment in shared memory. This will
930 * ensure that even if there is no session which is attached to this
931 * segment, it will remain until postmaster shutdown or an explicit call
932 * to unpin.
934 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
935 if (dsm_control->item[seg->control_slot].pinned)
936 elog(ERROR, "cannot pin a segment that is already pinned");
937 dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
938 dsm_control->item[seg->control_slot].pinned = true;
939 dsm_control->item[seg->control_slot].refcnt++;
940 dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
941 LWLockRelease(DynamicSharedMemoryControlLock);
945 * Unpin a dynamic shared memory segment that was previously pinned with
946 * dsm_pin_segment. This function should not be called unless dsm_pin_segment
947 * was previously called for this segment.
949 * The argument is a dsm_handle rather than a dsm_segment in case you want
950 * to unpin a segment to which you haven't attached. This turns out to be
951 * useful if, for example, a reference to one shared memory segment is stored
952 * within another shared memory segment. You might want to unpin the
953 * referenced segment before destroying the referencing segment.
955 void
956 dsm_unpin_segment(dsm_handle handle)
958 uint32 control_slot = INVALID_CONTROL_SLOT;
959 bool destroy = false;
960 uint32 i;
962 /* Find the control slot for the given handle. */
963 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
964 for (i = 0; i < dsm_control->nitems; ++i)
966 /* Skip unused slots and segments that are concurrently going away. */
967 if (dsm_control->item[i].refcnt <= 1)
968 continue;
970 /* If we've found our handle, we can stop searching. */
971 if (dsm_control->item[i].handle == handle)
973 control_slot = i;
974 break;
979 * We should definitely have found the slot, and it should not already be
980 * in the process of going away, because this function should only be
981 * called on a segment which is pinned.
983 if (control_slot == INVALID_CONTROL_SLOT)
984 elog(ERROR, "cannot unpin unknown segment handle");
985 if (!dsm_control->item[control_slot].pinned)
986 elog(ERROR, "cannot unpin a segment that is not pinned");
987 Assert(dsm_control->item[control_slot].refcnt > 1);
990 * Allow implementation-specific code to run. We have to do this before
991 * releasing the lock, because impl_private_pm_handle may get modified by
992 * dsm_impl_unpin_segment.
994 dsm_impl_unpin_segment(handle,
995 &dsm_control->item[control_slot].impl_private_pm_handle);
997 /* Note that 1 means no references (0 means unused slot). */
998 if (--dsm_control->item[control_slot].refcnt == 1)
999 destroy = true;
1000 dsm_control->item[control_slot].pinned = false;
1002 /* Now we can release the lock. */
1003 LWLockRelease(DynamicSharedMemoryControlLock);
1005 /* Clean up resources if that was the last reference. */
1006 if (destroy)
1008 void *junk_impl_private = NULL;
1009 void *junk_mapped_address = NULL;
1010 Size junk_mapped_size = 0;
1013 * For an explanation of how error handling works in this case, see
1014 * comments in dsm_detach. Note that if we reach this point, the
1015 * current process certainly does not have the segment mapped, because
1016 * if it did, the reference count would have still been greater than 1
1017 * even after releasing the reference count held by the pin. The fact
1018 * that there can't be a dsm_segment for this handle makes it OK to
1019 * pass the mapped size, mapped address, and private data as NULL
1020 * here.
1022 if (is_main_region_dsm_handle(handle) ||
1023 dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1024 &junk_mapped_address, &junk_mapped_size, WARNING))
1026 LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
1027 if (is_main_region_dsm_handle(handle))
1028 FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
1029 dsm_control->item[control_slot].first_page,
1030 dsm_control->item[control_slot].npages);
1031 Assert(dsm_control->item[control_slot].handle == handle);
1032 Assert(dsm_control->item[control_slot].refcnt == 1);
1033 dsm_control->item[control_slot].refcnt = 0;
1034 LWLockRelease(DynamicSharedMemoryControlLock);
1040 * Find an existing mapping for a shared memory segment, if there is one.
1042 dsm_segment *
1043 dsm_find_mapping(dsm_handle h)
1045 dlist_iter iter;
1046 dsm_segment *seg;
1048 dlist_foreach(iter, &dsm_segment_list)
1050 seg = dlist_container(dsm_segment, node, iter.cur);
1051 if (seg->handle == h)
1052 return seg;
1055 return NULL;
1059 * Get the address at which a dynamic shared memory segment is mapped.
1061 void *
1062 dsm_segment_address(dsm_segment *seg)
1064 Assert(seg->mapped_address != NULL);
1065 return seg->mapped_address;
1069 * Get the size of a mapping.
1071 Size
1072 dsm_segment_map_length(dsm_segment *seg)
1074 Assert(seg->mapped_address != NULL);
1075 return seg->mapped_size;
1079 * Get a handle for a mapping.
1081 * To establish communication via dynamic shared memory between two backends,
1082 * one of them should first call dsm_create() to establish a new shared
1083 * memory mapping. That process should then call dsm_segment_handle() to
1084 * obtain a handle for the mapping, and pass that handle to the
1085 * coordinating backend via some means (e.g. bgw_main_arg, or via the
1086 * main shared memory segment). The recipient, once in possession of the
1087 * handle, should call dsm_attach().
1089 dsm_handle
1090 dsm_segment_handle(dsm_segment *seg)
1092 return seg->handle;
1096 * Register an on-detach callback for a dynamic shared memory segment.
1098 void
1099 on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
1101 dsm_segment_detach_callback *cb;
1103 cb = MemoryContextAlloc(TopMemoryContext,
1104 sizeof(dsm_segment_detach_callback));
1105 cb->function = function;
1106 cb->arg = arg;
1107 slist_push_head(&seg->on_detach, &cb->node);
1111 * Unregister an on-detach callback for a dynamic shared memory segment.
1113 void
1114 cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function,
1115 Datum arg)
1117 slist_mutable_iter iter;
1119 slist_foreach_modify(iter, &seg->on_detach)
1121 dsm_segment_detach_callback *cb;
1123 cb = slist_container(dsm_segment_detach_callback, node, iter.cur);
1124 if (cb->function == function && cb->arg == arg)
1126 slist_delete_current(&iter);
1127 pfree(cb);
1128 break;
1134 * Discard all registered on-detach callbacks without executing them.
1136 void
1137 reset_on_dsm_detach(void)
1139 dlist_iter iter;
1141 dlist_foreach(iter, &dsm_segment_list)
1143 dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1145 /* Throw away explicit on-detach actions one by one. */
1146 while (!slist_is_empty(&seg->on_detach))
1148 slist_node *node;
1149 dsm_segment_detach_callback *cb;
1151 node = slist_pop_head_node(&seg->on_detach);
1152 cb = slist_container(dsm_segment_detach_callback, node, node);
1153 pfree(cb);
1157 * Decrementing the reference count is a sort of implicit on-detach
1158 * action; make sure we don't do that, either.
1160 seg->control_slot = INVALID_CONTROL_SLOT;
1165 * Create a segment descriptor.
1167 static dsm_segment *
1168 dsm_create_descriptor(void)
1170 dsm_segment *seg;
1172 if (CurrentResourceOwner)
1173 ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
1175 seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
1176 dlist_push_head(&dsm_segment_list, &seg->node);
1178 /* seg->handle must be initialized by the caller */
1179 seg->control_slot = INVALID_CONTROL_SLOT;
1180 seg->impl_private = NULL;
1181 seg->mapped_address = NULL;
1182 seg->mapped_size = 0;
1184 seg->resowner = CurrentResourceOwner;
1185 if (CurrentResourceOwner)
1186 ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
1188 slist_init(&seg->on_detach);
1190 return seg;
1194 * Sanity check a control segment.
1196 * The goal here isn't to detect everything that could possibly be wrong with
1197 * the control segment; there's not enough information for that. Rather, the
1198 * goal is to make sure that someone can iterate over the items in the segment
1199 * without overrunning the end of the mapping and crashing. We also check
1200 * the magic number since, if that's messed up, this may not even be one of
1201 * our segments at all.
1203 static bool
1204 dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
1206 if (mapped_size < offsetof(dsm_control_header, item))
1207 return false; /* Mapped size too short to read header. */
1208 if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1209 return false; /* Magic number doesn't match. */
1210 if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1211 return false; /* Max item count won't fit in map. */
1212 if (control->nitems > control->maxitems)
1213 return false; /* Overfull. */
1214 return true;
1218 * Compute the number of control-segment bytes needed to store a given
1219 * number of items.
1221 static uint64
1222 dsm_control_bytes_needed(uint32 nitems)
1224 return offsetof(dsm_control_header, item)
1225 + sizeof(dsm_control_item) * (uint64) nitems;
1228 static inline dsm_handle
1229 make_main_region_dsm_handle(int slot)
1231 dsm_handle handle;
1234 * We need to create a handle that doesn't collide with any existing extra
1235 * segment created by dsm_impl_op(), so we'll make it odd. It also
1236 * mustn't collide with any other main area pseudo-segment, so we'll
1237 * include the slot number in some of the bits. We also want to make an
1238 * effort to avoid newly created and recently destroyed handles from being
1239 * confused, so we'll make the rest of the bits random.
1241 handle = 1;
1242 handle |= slot << 1;
1243 handle |= pg_prng_uint32(&pg_global_prng_state) << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1);
1244 return handle;
1247 static inline bool
1248 is_main_region_dsm_handle(dsm_handle handle)
1250 return handle & 1;