Update copyright for 2022
[pgsql.git] / src / backend / storage / ipc / shmem.c
blobc682775db4405f75d0be782bc14c9f4515fdb604
1 /*-------------------------------------------------------------------------
3 * shmem.c
4 * create shared memory and initialize shared memory data structures.
6 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/storage/ipc/shmem.c
13 *-------------------------------------------------------------------------
16 * POSTGRES processes share one or more regions of shared memory.
17 * The shared memory is created by a postmaster and is inherited
18 * by each backend via fork() (or, in some ports, via other OS-specific
19 * methods). The routines in this file are used for allocating and
20 * binding to shared memory data structures.
22 * NOTES:
23 * (a) There are three kinds of shared memory data structures
24 * available to POSTGRES: fixed-size structures, queues and hash
25 * tables. Fixed-size structures contain things like global variables
26 * for a module and should never be allocated after the shared memory
27 * initialization phase. Hash tables have a fixed maximum size, but
28 * their actual size can vary dynamically. When entries are added
29 * to the table, more space is allocated. Queues link data structures
30 * that have been allocated either within fixed-size structures or as hash
31 * buckets. Each shared data structure has a string name to identify
32 * it (assigned in the module that declares it).
34 * (b) During initialization, each module looks for its
35 * shared data structures in a hash table called the "Shmem Index".
36 * If the data structure is not present, the caller can allocate
37 * a new one and initialize it. If the data structure is present,
38 * the caller "attaches" to the structure by initializing a pointer
39 * in the local address space.
40 * The shmem index has two purposes: first, it gives us
41 * a simple model of how the world looks when a backend process
42 * initializes. If something is present in the shmem index,
43 * it is initialized. If it is not, it is uninitialized. Second,
44 * the shmem index allows us to allocate shared memory on demand
45 * instead of trying to preallocate structures and hard-wire the
46 * sizes and locations in header files. If you are using a lot
47 * of shared memory in a lot of different places (and changing
48 * things during development), this is important.
50 * (c) In standard Unix-ish environments, individual backends do not
51 * need to re-establish their local pointers into shared memory, because
52 * they inherit correct values of those variables via fork() from the
53 * postmaster. However, this does not work in the EXEC_BACKEND case.
54 * In ports using EXEC_BACKEND, new backends have to set up their local
55 * pointers using the method described in (b) above.
57 * (d) memory allocation model: shared memory can never be
58 * freed, once allocated. Each hash table has its own free list,
59 * so hash buckets can be reused when an item is deleted. However,
60 * if one hash table grows very large and then shrinks, its space
61 * cannot be redistributed to other tables. We could build a simple
62 * hash bucket garbage collector if need be. Right now, it seems
63 * unnecessary.
66 #include "postgres.h"
68 #include "access/transam.h"
69 #include "fmgr.h"
70 #include "funcapi.h"
71 #include "miscadmin.h"
72 #include "storage/lwlock.h"
73 #include "storage/pg_shmem.h"
74 #include "storage/shmem.h"
75 #include "storage/spin.h"
76 #include "utils/builtins.h"
78 static void *ShmemAllocRaw(Size size, Size *allocated_size);
80 /* shared memory global variables */
82 static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
84 static void *ShmemBase; /* start address of shared memory */
86 static void *ShmemEnd; /* end+1 address of shared memory */
88 slock_t *ShmemLock; /* spinlock for shared memory and LWLock
89 * allocation */
91 static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
95 * InitShmemAccess() --- set up basic pointers to shared memory.
97 * Note: the argument should be declared "PGShmemHeader *seghdr",
98 * but we use void to avoid having to include ipc.h in shmem.h.
100 void
101 InitShmemAccess(void *seghdr)
103 PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
105 ShmemSegHdr = shmhdr;
106 ShmemBase = (void *) shmhdr;
107 ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
111 * InitShmemAllocation() --- set up shared-memory space allocation.
113 * This should be called only in the postmaster or a standalone backend.
115 void
116 InitShmemAllocation(void)
118 PGShmemHeader *shmhdr = ShmemSegHdr;
119 char *aligned;
121 Assert(shmhdr != NULL);
124 * Initialize the spinlock used by ShmemAlloc. We must use
125 * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
127 ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
129 SpinLockInit(ShmemLock);
132 * Allocations after this point should go through ShmemAlloc, which
133 * expects to allocate everything on cache line boundaries. Make sure the
134 * first allocation begins on a cache line boundary.
136 aligned = (char *)
137 (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
138 shmhdr->freeoffset = aligned - (char *) shmhdr;
140 /* ShmemIndex can't be set up yet (need LWLocks first) */
141 shmhdr->index = NULL;
142 ShmemIndex = (HTAB *) NULL;
145 * Initialize ShmemVariableCache for transaction manager. (This doesn't
146 * really belong here, but not worth moving.)
148 ShmemVariableCache = (VariableCache)
149 ShmemAlloc(sizeof(*ShmemVariableCache));
150 memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
154 * ShmemAlloc -- allocate max-aligned chunk from shared memory
156 * Throws error if request cannot be satisfied.
158 * Assumes ShmemLock and ShmemSegHdr are initialized.
160 void *
161 ShmemAlloc(Size size)
163 void *newSpace;
164 Size allocated_size;
166 newSpace = ShmemAllocRaw(size, &allocated_size);
167 if (!newSpace)
168 ereport(ERROR,
169 (errcode(ERRCODE_OUT_OF_MEMORY),
170 errmsg("out of shared memory (%zu bytes requested)",
171 size)));
172 return newSpace;
176 * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
178 * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
180 void *
181 ShmemAllocNoError(Size size)
183 Size allocated_size;
185 return ShmemAllocRaw(size, &allocated_size);
189 * ShmemAllocRaw -- allocate align chunk and return allocated size
191 * Also sets *allocated_size to the number of bytes allocated, which will
192 * be equal to the number requested plus any padding we choose to add.
194 static void *
195 ShmemAllocRaw(Size size, Size *allocated_size)
197 Size newStart;
198 Size newFree;
199 void *newSpace;
202 * Ensure all space is adequately aligned. We used to only MAXALIGN this
203 * space but experience has proved that on modern systems that is not good
204 * enough. Many parts of the system are very sensitive to critical data
205 * structures getting split across cache line boundaries. To avoid that,
206 * attempt to align the beginning of the allocation to a cache line
207 * boundary. The calling code will still need to be careful about how it
208 * uses the allocated space - e.g. by padding each element in an array of
209 * structures out to a power-of-two size - but without this, even that
210 * won't be sufficient.
212 size = CACHELINEALIGN(size);
213 *allocated_size = size;
215 Assert(ShmemSegHdr != NULL);
217 SpinLockAcquire(ShmemLock);
219 newStart = ShmemSegHdr->freeoffset;
221 newFree = newStart + size;
222 if (newFree <= ShmemSegHdr->totalsize)
224 newSpace = (void *) ((char *) ShmemBase + newStart);
225 ShmemSegHdr->freeoffset = newFree;
227 else
228 newSpace = NULL;
230 SpinLockRelease(ShmemLock);
232 /* note this assert is okay with newSpace == NULL */
233 Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
235 return newSpace;
239 * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
241 * Allocate space without locking ShmemLock. This should be used for,
242 * and only for, allocations that must happen before ShmemLock is ready.
244 * We consider maxalign, rather than cachealign, sufficient here.
246 void *
247 ShmemAllocUnlocked(Size size)
249 Size newStart;
250 Size newFree;
251 void *newSpace;
254 * Ensure allocated space is adequately aligned.
256 size = MAXALIGN(size);
258 Assert(ShmemSegHdr != NULL);
260 newStart = ShmemSegHdr->freeoffset;
262 newFree = newStart + size;
263 if (newFree > ShmemSegHdr->totalsize)
264 ereport(ERROR,
265 (errcode(ERRCODE_OUT_OF_MEMORY),
266 errmsg("out of shared memory (%zu bytes requested)",
267 size)));
268 ShmemSegHdr->freeoffset = newFree;
270 newSpace = (void *) ((char *) ShmemBase + newStart);
272 Assert(newSpace == (void *) MAXALIGN(newSpace));
274 return newSpace;
278 * ShmemAddrIsValid -- test if an address refers to shared memory
280 * Returns true if the pointer points within the shared memory segment.
282 bool
283 ShmemAddrIsValid(const void *addr)
285 return (addr >= ShmemBase) && (addr < ShmemEnd);
289 * InitShmemIndex() --- set up or attach to shmem index table.
291 void
292 InitShmemIndex(void)
294 HASHCTL info;
297 * Create the shared memory shmem index.
299 * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
300 * hashtable to exist already, we have a bit of a circularity problem in
301 * initializing the ShmemIndex itself. The special "ShmemIndex" hash
302 * table name will tell ShmemInitStruct to fake it.
304 info.keysize = SHMEM_INDEX_KEYSIZE;
305 info.entrysize = sizeof(ShmemIndexEnt);
307 ShmemIndex = ShmemInitHash("ShmemIndex",
308 SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
309 &info,
310 HASH_ELEM | HASH_STRINGS);
314 * ShmemInitHash -- Create and initialize, or attach to, a
315 * shared memory hash table.
317 * We assume caller is doing some kind of synchronization
318 * so that two processes don't try to create/initialize the same
319 * table at once. (In practice, all creations are done in the postmaster
320 * process; child processes should always be attaching to existing tables.)
322 * max_size is the estimated maximum number of hashtable entries. This is
323 * not a hard limit, but the access efficiency will degrade if it is
324 * exceeded substantially (since it's used to compute directory size and
325 * the hash table buckets will get overfull).
327 * init_size is the number of hashtable entries to preallocate. For a table
328 * whose maximum size is certain, this should be equal to max_size; that
329 * ensures that no run-time out-of-shared-memory failures can occur.
331 * *infoP and hash_flags must specify at least the entry sizes and key
332 * comparison semantics (see hash_create()). Flag bits and values specific
333 * to shared-memory hash tables are added here, except that callers may
334 * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
336 * Note: before Postgres 9.0, this function returned NULL for some failure
337 * cases. Now, it always throws error instead, so callers need not check
338 * for NULL.
340 HTAB *
341 ShmemInitHash(const char *name, /* table string name for shmem index */
342 long init_size, /* initial table size */
343 long max_size, /* max size of the table */
344 HASHCTL *infoP, /* info about key and bucket size */
345 int hash_flags) /* info about infoP */
347 bool found;
348 void *location;
351 * Hash tables allocated in shared memory have a fixed directory; it can't
352 * grow or other backends wouldn't be able to find it. So, make sure we
353 * make it big enough to start with.
355 * The shared memory allocator must be specified too.
357 infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
358 infoP->alloc = ShmemAllocNoError;
359 hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
361 /* look it up in the shmem index */
362 location = ShmemInitStruct(name,
363 hash_get_shared_size(infoP, hash_flags),
364 &found);
367 * if it already exists, attach to it rather than allocate and initialize
368 * new space
370 if (found)
371 hash_flags |= HASH_ATTACH;
373 /* Pass location of hashtable header to hash_create */
374 infoP->hctl = (HASHHDR *) location;
376 return hash_create(name, init_size, infoP, hash_flags);
380 * ShmemInitStruct -- Create/attach to a structure in shared memory.
382 * This is called during initialization to find or allocate
383 * a data structure in shared memory. If no other process
384 * has created the structure, this routine allocates space
385 * for it. If it exists already, a pointer to the existing
386 * structure is returned.
388 * Returns: pointer to the object. *foundPtr is set true if the object was
389 * already in the shmem index (hence, already initialized).
391 * Note: before Postgres 9.0, this function returned NULL for some failure
392 * cases. Now, it always throws error instead, so callers need not check
393 * for NULL.
395 void *
396 ShmemInitStruct(const char *name, Size size, bool *foundPtr)
398 ShmemIndexEnt *result;
399 void *structPtr;
401 LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
403 if (!ShmemIndex)
405 PGShmemHeader *shmemseghdr = ShmemSegHdr;
407 /* Must be trying to create/attach to ShmemIndex itself */
408 Assert(strcmp(name, "ShmemIndex") == 0);
410 if (IsUnderPostmaster)
412 /* Must be initializing a (non-standalone) backend */
413 Assert(shmemseghdr->index != NULL);
414 structPtr = shmemseghdr->index;
415 *foundPtr = true;
417 else
420 * If the shmem index doesn't exist, we are bootstrapping: we must
421 * be trying to init the shmem index itself.
423 * Notice that the ShmemIndexLock is released before the shmem
424 * index has been initialized. This should be OK because no other
425 * process can be accessing shared memory yet.
427 Assert(shmemseghdr->index == NULL);
428 structPtr = ShmemAlloc(size);
429 shmemseghdr->index = structPtr;
430 *foundPtr = false;
432 LWLockRelease(ShmemIndexLock);
433 return structPtr;
436 /* look it up in the shmem index */
437 result = (ShmemIndexEnt *)
438 hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
440 if (!result)
442 LWLockRelease(ShmemIndexLock);
443 ereport(ERROR,
444 (errcode(ERRCODE_OUT_OF_MEMORY),
445 errmsg("could not create ShmemIndex entry for data structure \"%s\"",
446 name)));
449 if (*foundPtr)
452 * Structure is in the shmem index so someone else has allocated it
453 * already. The size better be the same as the size we are trying to
454 * initialize to, or there is a name conflict (or worse).
456 if (result->size != size)
458 LWLockRelease(ShmemIndexLock);
459 ereport(ERROR,
460 (errmsg("ShmemIndex entry size is wrong for data structure"
461 " \"%s\": expected %zu, actual %zu",
462 name, size, result->size)));
464 structPtr = result->location;
466 else
468 Size allocated_size;
470 /* It isn't in the table yet. allocate and initialize it */
471 structPtr = ShmemAllocRaw(size, &allocated_size);
472 if (structPtr == NULL)
474 /* out of memory; remove the failed ShmemIndex entry */
475 hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
476 LWLockRelease(ShmemIndexLock);
477 ereport(ERROR,
478 (errcode(ERRCODE_OUT_OF_MEMORY),
479 errmsg("not enough shared memory for data structure"
480 " \"%s\" (%zu bytes requested)",
481 name, size)));
483 result->size = size;
484 result->allocated_size = allocated_size;
485 result->location = structPtr;
488 LWLockRelease(ShmemIndexLock);
490 Assert(ShmemAddrIsValid(structPtr));
492 Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
494 return structPtr;
499 * Add two Size values, checking for overflow
501 Size
502 add_size(Size s1, Size s2)
504 Size result;
506 result = s1 + s2;
507 /* We are assuming Size is an unsigned type here... */
508 if (result < s1 || result < s2)
509 ereport(ERROR,
510 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
511 errmsg("requested shared memory size overflows size_t")));
512 return result;
516 * Multiply two Size values, checking for overflow
518 Size
519 mul_size(Size s1, Size s2)
521 Size result;
523 if (s1 == 0 || s2 == 0)
524 return 0;
525 result = s1 * s2;
526 /* We are assuming Size is an unsigned type here... */
527 if (result / s2 != s1)
528 ereport(ERROR,
529 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
530 errmsg("requested shared memory size overflows size_t")));
531 return result;
534 /* SQL SRF showing allocated shared memory */
535 Datum
536 pg_get_shmem_allocations(PG_FUNCTION_ARGS)
538 #define PG_GET_SHMEM_SIZES_COLS 4
539 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
540 TupleDesc tupdesc;
541 Tuplestorestate *tupstore;
542 MemoryContext per_query_ctx;
543 MemoryContext oldcontext;
544 HASH_SEQ_STATUS hstat;
545 ShmemIndexEnt *ent;
546 Size named_allocated = 0;
547 Datum values[PG_GET_SHMEM_SIZES_COLS];
548 bool nulls[PG_GET_SHMEM_SIZES_COLS];
550 /* check to see if caller supports us returning a tuplestore */
551 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
552 ereport(ERROR,
553 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
554 errmsg("set-valued function called in context that cannot accept a set")));
555 if (!(rsinfo->allowedModes & SFRM_Materialize))
556 ereport(ERROR,
557 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
558 errmsg("materialize mode required, but it is not allowed in this context")));
560 /* Build a tuple descriptor for our result type */
561 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
562 elog(ERROR, "return type must be a row type");
564 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
565 oldcontext = MemoryContextSwitchTo(per_query_ctx);
567 tupstore = tuplestore_begin_heap(true, false, work_mem);
568 rsinfo->returnMode = SFRM_Materialize;
569 rsinfo->setResult = tupstore;
570 rsinfo->setDesc = tupdesc;
572 MemoryContextSwitchTo(oldcontext);
574 LWLockAcquire(ShmemIndexLock, LW_SHARED);
576 hash_seq_init(&hstat, ShmemIndex);
578 /* output all allocated entries */
579 memset(nulls, 0, sizeof(nulls));
580 while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
582 values[0] = CStringGetTextDatum(ent->key);
583 values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
584 values[2] = Int64GetDatum(ent->size);
585 values[3] = Int64GetDatum(ent->allocated_size);
586 named_allocated += ent->allocated_size;
588 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
591 /* output shared memory allocated but not counted via the shmem index */
592 values[0] = CStringGetTextDatum("<anonymous>");
593 nulls[1] = true;
594 values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
595 values[3] = values[2];
596 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
598 /* output as-of-yet unused shared memory */
599 nulls[0] = true;
600 values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
601 nulls[1] = false;
602 values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
603 values[3] = values[2];
604 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
606 LWLockRelease(ShmemIndexLock);
608 tuplestore_donestoring(tupstore);
610 return (Datum) 0;