Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * shmem.c
4 : * create shared memory and initialize shared memory data structures.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/shmem.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * POSTGRES processes share one or more regions of shared memory.
17 : * The shared memory is created by a postmaster and is inherited
18 : * by each backend via fork() (or, in some ports, via other OS-specific
19 : * methods). The routines in this file are used for allocating and
20 : * binding to shared memory data structures.
21 : *
22 : * NOTES:
23 : * (a) There are three kinds of shared memory data structures
24 : * available to POSTGRES: fixed-size structures, queues and hash
25 : * tables. Fixed-size structures contain things like global variables
26 : * for a module and should never be allocated after the shared memory
27 : * initialization phase. Hash tables have a fixed maximum size, but
28 : * their actual size can vary dynamically. When entries are added
29 : * to the table, more space is allocated. Queues link data structures
30 : * that have been allocated either within fixed-size structures or as hash
31 : * buckets. Each shared data structure has a string name to identify
32 : * it (assigned in the module that declares it).
33 : *
34 : * (b) During initialization, each module looks for its
35 : * shared data structures in a hash table called the "Shmem Index".
36 : * If the data structure is not present, the caller can allocate
37 : * a new one and initialize it. If the data structure is present,
38 : * the caller "attaches" to the structure by initializing a pointer
39 : * in the local address space.
40 : * The shmem index has two purposes: first, it gives us
41 : * a simple model of how the world looks when a backend process
42 : * initializes. If something is present in the shmem index,
43 : * it is initialized. If it is not, it is uninitialized. Second,
44 : * the shmem index allows us to allocate shared memory on demand
45 : * instead of trying to preallocate structures and hard-wire the
46 : * sizes and locations in header files. If you are using a lot
47 : * of shared memory in a lot of different places (and changing
48 : * things during development), this is important.
49 : *
50 : * (c) In standard Unix-ish environments, individual backends do not
51 : * need to re-establish their local pointers into shared memory, because
52 : * they inherit correct values of those variables via fork() from the
53 : * postmaster. However, this does not work in the EXEC_BACKEND case.
54 : * In ports using EXEC_BACKEND, new backends have to set up their local
55 : * pointers using the method described in (b) above.
56 : *
57 : * (d) memory allocation model: shared memory can never be
58 : * freed, once allocated. Each hash table has its own free list,
59 : * so hash buckets can be reused when an item is deleted. However,
60 : * if one hash table grows very large and then shrinks, its space
61 : * cannot be redistributed to other tables. We could build a simple
62 : * hash bucket garbage collector if need be. Right now, it seems
63 : * unnecessary.
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include "fmgr.h"
69 : #include "funcapi.h"
70 : #include "miscadmin.h"
71 : #include "storage/lwlock.h"
72 : #include "storage/pg_shmem.h"
73 : #include "storage/shmem.h"
74 : #include "storage/spin.h"
75 : #include "utils/builtins.h"
76 :
77 : static void *ShmemAllocRaw(Size size, Size *allocated_size);
78 :
79 : /* shared memory global variables */
80 :
81 : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
82 :
83 : static void *ShmemBase; /* start address of shared memory */
84 :
85 : static void *ShmemEnd; /* end+1 address of shared memory */
86 :
87 : slock_t *ShmemLock; /* spinlock for shared memory and LWLock
88 : * allocation */
89 :
90 : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
91 :
92 :
93 : /*
94 : * InitShmemAccess() --- set up basic pointers to shared memory.
95 : */
96 : void
97 1918 : InitShmemAccess(PGShmemHeader *seghdr)
98 : {
99 1918 : ShmemSegHdr = seghdr;
100 1918 : ShmemBase = seghdr;
101 1918 : ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
102 1918 : }
103 :
104 : /*
105 : * InitShmemAllocation() --- set up shared-memory space allocation.
106 : *
107 : * This should be called only in the postmaster or a standalone backend.
108 : */
109 : void
110 1918 : InitShmemAllocation(void)
111 : {
112 1918 : PGShmemHeader *shmhdr = ShmemSegHdr;
113 : char *aligned;
114 :
115 : Assert(shmhdr != NULL);
116 :
117 : /*
118 : * Initialize the spinlock used by ShmemAlloc. We must use
119 : * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
120 : */
121 1918 : ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
122 :
123 1918 : SpinLockInit(ShmemLock);
124 :
125 : /*
126 : * Allocations after this point should go through ShmemAlloc, which
127 : * expects to allocate everything on cache line boundaries. Make sure the
128 : * first allocation begins on a cache line boundary.
129 : */
130 1918 : aligned = (char *)
131 1918 : (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
132 1918 : shmhdr->freeoffset = aligned - (char *) shmhdr;
133 :
134 : /* ShmemIndex can't be set up yet (need LWLocks first) */
135 1918 : shmhdr->index = NULL;
136 1918 : ShmemIndex = (HTAB *) NULL;
137 1918 : }
138 :
139 : /*
140 : * ShmemAlloc -- allocate max-aligned chunk from shared memory
141 : *
142 : * Throws error if request cannot be satisfied.
143 : *
144 : * Assumes ShmemLock and ShmemSegHdr are initialized.
145 : */
146 : void *
147 19188 : ShmemAlloc(Size size)
148 : {
149 : void *newSpace;
150 : Size allocated_size;
151 :
152 19188 : newSpace = ShmemAllocRaw(size, &allocated_size);
153 19188 : if (!newSpace)
154 0 : ereport(ERROR,
155 : (errcode(ERRCODE_OUT_OF_MEMORY),
156 : errmsg("out of shared memory (%zu bytes requested)",
157 : size)));
158 19188 : return newSpace;
159 : }
160 :
161 : /*
162 : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
163 : *
164 : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
165 : */
166 : void *
167 780326 : ShmemAllocNoError(Size size)
168 : {
169 : Size allocated_size;
170 :
171 780326 : return ShmemAllocRaw(size, &allocated_size);
172 : }
173 :
174 : /*
175 : * ShmemAllocRaw -- allocate align chunk and return allocated size
176 : *
177 : * Also sets *allocated_size to the number of bytes allocated, which will
178 : * be equal to the number requested plus any padding we choose to add.
179 : */
180 : static void *
181 920364 : ShmemAllocRaw(Size size, Size *allocated_size)
182 : {
183 : Size newStart;
184 : Size newFree;
185 : void *newSpace;
186 :
187 : /*
188 : * Ensure all space is adequately aligned. We used to only MAXALIGN this
189 : * space but experience has proved that on modern systems that is not good
190 : * enough. Many parts of the system are very sensitive to critical data
191 : * structures getting split across cache line boundaries. To avoid that,
192 : * attempt to align the beginning of the allocation to a cache line
193 : * boundary. The calling code will still need to be careful about how it
194 : * uses the allocated space - e.g. by padding each element in an array of
195 : * structures out to a power-of-two size - but without this, even that
196 : * won't be sufficient.
197 : */
198 920364 : size = CACHELINEALIGN(size);
199 920364 : *allocated_size = size;
200 :
201 : Assert(ShmemSegHdr != NULL);
202 :
203 920364 : SpinLockAcquire(ShmemLock);
204 :
205 920364 : newStart = ShmemSegHdr->freeoffset;
206 :
207 920364 : newFree = newStart + size;
208 920364 : if (newFree <= ShmemSegHdr->totalsize)
209 : {
210 920364 : newSpace = (char *) ShmemBase + newStart;
211 920364 : ShmemSegHdr->freeoffset = newFree;
212 : }
213 : else
214 0 : newSpace = NULL;
215 :
216 920364 : SpinLockRelease(ShmemLock);
217 :
218 : /* note this assert is okay with newSpace == NULL */
219 : Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
220 :
221 920364 : return newSpace;
222 : }
223 :
224 : /*
225 : * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
226 : *
227 : * Allocate space without locking ShmemLock. This should be used for,
228 : * and only for, allocations that must happen before ShmemLock is ready.
229 : *
230 : * We consider maxalign, rather than cachealign, sufficient here.
231 : */
232 : void *
233 3836 : ShmemAllocUnlocked(Size size)
234 : {
235 : Size newStart;
236 : Size newFree;
237 : void *newSpace;
238 :
239 : /*
240 : * Ensure allocated space is adequately aligned.
241 : */
242 3836 : size = MAXALIGN(size);
243 :
244 : Assert(ShmemSegHdr != NULL);
245 :
246 3836 : newStart = ShmemSegHdr->freeoffset;
247 :
248 3836 : newFree = newStart + size;
249 3836 : if (newFree > ShmemSegHdr->totalsize)
250 0 : ereport(ERROR,
251 : (errcode(ERRCODE_OUT_OF_MEMORY),
252 : errmsg("out of shared memory (%zu bytes requested)",
253 : size)));
254 3836 : ShmemSegHdr->freeoffset = newFree;
255 :
256 3836 : newSpace = (char *) ShmemBase + newStart;
257 :
258 : Assert(newSpace == (void *) MAXALIGN(newSpace));
259 :
260 3836 : return newSpace;
261 : }
262 :
263 : /*
264 : * ShmemAddrIsValid -- test if an address refers to shared memory
265 : *
266 : * Returns true if the pointer points within the shared memory segment.
267 : */
268 : bool
269 0 : ShmemAddrIsValid(const void *addr)
270 : {
271 0 : return (addr >= ShmemBase) && (addr < ShmemEnd);
272 : }
273 :
274 : /*
275 : * InitShmemIndex() --- set up or attach to shmem index table.
276 : */
277 : void
278 1918 : InitShmemIndex(void)
279 : {
280 : HASHCTL info;
281 :
282 : /*
283 : * Create the shared memory shmem index.
284 : *
285 : * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
286 : * hashtable to exist already, we have a bit of a circularity problem in
287 : * initializing the ShmemIndex itself. The special "ShmemIndex" hash
288 : * table name will tell ShmemInitStruct to fake it.
289 : */
290 1918 : info.keysize = SHMEM_INDEX_KEYSIZE;
291 1918 : info.entrysize = sizeof(ShmemIndexEnt);
292 :
293 1918 : ShmemIndex = ShmemInitHash("ShmemIndex",
294 : SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
295 : &info,
296 : HASH_ELEM | HASH_STRINGS);
297 1918 : }
298 :
299 : /*
300 : * ShmemInitHash -- Create and initialize, or attach to, a
301 : * shared memory hash table.
302 : *
303 : * We assume caller is doing some kind of synchronization
304 : * so that two processes don't try to create/initialize the same
305 : * table at once. (In practice, all creations are done in the postmaster
306 : * process; child processes should always be attaching to existing tables.)
307 : *
308 : * max_size is the estimated maximum number of hashtable entries. This is
309 : * not a hard limit, but the access efficiency will degrade if it is
310 : * exceeded substantially (since it's used to compute directory size and
311 : * the hash table buckets will get overfull).
312 : *
313 : * init_size is the number of hashtable entries to preallocate. For a table
314 : * whose maximum size is certain, this should be equal to max_size; that
315 : * ensures that no run-time out-of-shared-memory failures can occur.
316 : *
317 : * *infoP and hash_flags must specify at least the entry sizes and key
318 : * comparison semantics (see hash_create()). Flag bits and values specific
319 : * to shared-memory hash tables are added here, except that callers may
320 : * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
321 : *
322 : * Note: before Postgres 9.0, this function returned NULL for some failure
323 : * cases. Now, it always throws error instead, so callers need not check
324 : * for NULL.
325 : */
326 : HTAB *
327 17276 : ShmemInitHash(const char *name, /* table string name for shmem index */
328 : long init_size, /* initial table size */
329 : long max_size, /* max size of the table */
330 : HASHCTL *infoP, /* info about key and bucket size */
331 : int hash_flags) /* info about infoP */
332 : {
333 : bool found;
334 : void *location;
335 :
336 : /*
337 : * Hash tables allocated in shared memory have a fixed directory; it can't
338 : * grow or other backends wouldn't be able to find it. So, make sure we
339 : * make it big enough to start with.
340 : *
341 : * The shared memory allocator must be specified too.
342 : */
343 17276 : infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
344 17276 : infoP->alloc = ShmemAllocNoError;
345 17276 : hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
346 :
347 : /* look it up in the shmem index */
348 17276 : location = ShmemInitStruct(name,
349 : hash_get_shared_size(infoP, hash_flags),
350 : &found);
351 :
352 : /*
353 : * if it already exists, attach to it rather than allocate and initialize
354 : * new space
355 : */
356 17276 : if (found)
357 0 : hash_flags |= HASH_ATTACH;
358 :
359 : /* Pass location of hashtable header to hash_create */
360 17276 : infoP->hctl = (HASHHDR *) location;
361 :
362 17276 : return hash_create(name, init_size, infoP, hash_flags);
363 : }
364 :
365 : /*
366 : * ShmemInitStruct -- Create/attach to a structure in shared memory.
367 : *
368 : * This is called during initialization to find or allocate
369 : * a data structure in shared memory. If no other process
370 : * has created the structure, this routine allocates space
371 : * for it. If it exists already, a pointer to the existing
372 : * structure is returned.
373 : *
374 : * Returns: pointer to the object. *foundPtr is set true if the object was
375 : * already in the shmem index (hence, already initialized).
376 : *
377 : * Note: before Postgres 9.0, this function returned NULL for some failure
378 : * cases. Now, it always throws error instead, so callers need not check
379 : * for NULL.
380 : */
381 : void *
382 122768 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
383 : {
384 : ShmemIndexEnt *result;
385 : void *structPtr;
386 :
387 122768 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
388 :
389 122768 : if (!ShmemIndex)
390 : {
391 1918 : PGShmemHeader *shmemseghdr = ShmemSegHdr;
392 :
393 : /* Must be trying to create/attach to ShmemIndex itself */
394 : Assert(strcmp(name, "ShmemIndex") == 0);
395 :
396 1918 : if (IsUnderPostmaster)
397 : {
398 : /* Must be initializing a (non-standalone) backend */
399 : Assert(shmemseghdr->index != NULL);
400 0 : structPtr = shmemseghdr->index;
401 0 : *foundPtr = true;
402 : }
403 : else
404 : {
405 : /*
406 : * If the shmem index doesn't exist, we are bootstrapping: we must
407 : * be trying to init the shmem index itself.
408 : *
409 : * Notice that the ShmemIndexLock is released before the shmem
410 : * index has been initialized. This should be OK because no other
411 : * process can be accessing shared memory yet.
412 : */
413 : Assert(shmemseghdr->index == NULL);
414 1918 : structPtr = ShmemAlloc(size);
415 1918 : shmemseghdr->index = structPtr;
416 1918 : *foundPtr = false;
417 : }
418 1918 : LWLockRelease(ShmemIndexLock);
419 1918 : return structPtr;
420 : }
421 :
422 : /* look it up in the shmem index */
423 : result = (ShmemIndexEnt *)
424 120850 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
425 :
426 120850 : if (!result)
427 : {
428 0 : LWLockRelease(ShmemIndexLock);
429 0 : ereport(ERROR,
430 : (errcode(ERRCODE_OUT_OF_MEMORY),
431 : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
432 : name)));
433 : }
434 :
435 120850 : if (*foundPtr)
436 : {
437 : /*
438 : * Structure is in the shmem index so someone else has allocated it
439 : * already. The size better be the same as the size we are trying to
440 : * initialize to, or there is a name conflict (or worse).
441 : */
442 0 : if (result->size != size)
443 : {
444 0 : LWLockRelease(ShmemIndexLock);
445 0 : ereport(ERROR,
446 : (errmsg("ShmemIndex entry size is wrong for data structure"
447 : " \"%s\": expected %zu, actual %zu",
448 : name, size, result->size)));
449 : }
450 0 : structPtr = result->location;
451 : }
452 : else
453 : {
454 : Size allocated_size;
455 :
456 : /* It isn't in the table yet. allocate and initialize it */
457 120850 : structPtr = ShmemAllocRaw(size, &allocated_size);
458 120850 : if (structPtr == NULL)
459 : {
460 : /* out of memory; remove the failed ShmemIndex entry */
461 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
462 0 : LWLockRelease(ShmemIndexLock);
463 0 : ereport(ERROR,
464 : (errcode(ERRCODE_OUT_OF_MEMORY),
465 : errmsg("not enough shared memory for data structure"
466 : " \"%s\" (%zu bytes requested)",
467 : name, size)));
468 : }
469 120850 : result->size = size;
470 120850 : result->allocated_size = allocated_size;
471 120850 : result->location = structPtr;
472 : }
473 :
474 120850 : LWLockRelease(ShmemIndexLock);
475 :
476 : Assert(ShmemAddrIsValid(structPtr));
477 :
478 : Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
479 :
480 120850 : return structPtr;
481 : }
482 :
483 :
484 : /*
485 : * Add two Size values, checking for overflow
486 : */
487 : Size
488 879038 : add_size(Size s1, Size s2)
489 : {
490 : Size result;
491 :
492 879038 : result = s1 + s2;
493 : /* We are assuming Size is an unsigned type here... */
494 879038 : if (result < s1 || result < s2)
495 0 : ereport(ERROR,
496 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
497 : errmsg("requested shared memory size overflows size_t")));
498 879038 : return result;
499 : }
500 :
501 : /*
502 : * Multiply two Size values, checking for overflow
503 : */
504 : Size
505 373578 : mul_size(Size s1, Size s2)
506 : {
507 : Size result;
508 :
509 373578 : if (s1 == 0 || s2 == 0)
510 19490 : return 0;
511 354088 : result = s1 * s2;
512 : /* We are assuming Size is an unsigned type here... */
513 354088 : if (result / s2 != s1)
514 0 : ereport(ERROR,
515 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
516 : errmsg("requested shared memory size overflows size_t")));
517 354088 : return result;
518 : }
519 :
520 : /* SQL SRF showing allocated shared memory */
521 : Datum
522 6 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
523 : {
524 : #define PG_GET_SHMEM_SIZES_COLS 4
525 6 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
526 : HASH_SEQ_STATUS hstat;
527 : ShmemIndexEnt *ent;
528 6 : Size named_allocated = 0;
529 : Datum values[PG_GET_SHMEM_SIZES_COLS];
530 : bool nulls[PG_GET_SHMEM_SIZES_COLS];
531 :
532 6 : InitMaterializedSRF(fcinfo, 0);
533 :
534 6 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
535 :
536 6 : hash_seq_init(&hstat, ShmemIndex);
537 :
538 : /* output all allocated entries */
539 6 : memset(nulls, 0, sizeof(nulls));
540 388 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
541 : {
542 382 : values[0] = CStringGetTextDatum(ent->key);
543 382 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
544 382 : values[2] = Int64GetDatum(ent->size);
545 382 : values[3] = Int64GetDatum(ent->allocated_size);
546 382 : named_allocated += ent->allocated_size;
547 :
548 382 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
549 : values, nulls);
550 : }
551 :
552 : /* output shared memory allocated but not counted via the shmem index */
553 6 : values[0] = CStringGetTextDatum("<anonymous>");
554 6 : nulls[1] = true;
555 6 : values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
556 6 : values[3] = values[2];
557 6 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
558 :
559 : /* output as-of-yet unused shared memory */
560 6 : nulls[0] = true;
561 6 : values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
562 6 : nulls[1] = false;
563 6 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
564 6 : values[3] = values[2];
565 6 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
566 :
567 6 : LWLockRelease(ShmemIndexLock);
568 :
569 6 : return (Datum) 0;
570 : }
|