Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * shmem.c
4 : * create shared memory and initialize shared memory data structures.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/shmem.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * POSTGRES processes share one or more regions of shared memory.
17 : * The shared memory is created by a postmaster and is inherited
18 : * by each backend via fork() (or, in some ports, via other OS-specific
19 : * methods). The routines in this file are used for allocating and
20 : * binding to shared memory data structures.
21 : *
22 : * NOTES:
23 : * (a) There are three kinds of shared memory data structures
24 : * available to POSTGRES: fixed-size structures, queues and hash
25 : * tables. Fixed-size structures contain things like global variables
26 : * for a module and should never be allocated after the shared memory
27 : * initialization phase. Hash tables have a fixed maximum size, but
28 : * their actual size can vary dynamically. When entries are added
29 : * to the table, more space is allocated. Queues link data structures
30 : * that have been allocated either within fixed-size structures or as hash
31 : * buckets. Each shared data structure has a string name to identify
32 : * it (assigned in the module that declares it).
33 : *
34 : * (b) During initialization, each module looks for its
35 : * shared data structures in a hash table called the "Shmem Index".
36 : * If the data structure is not present, the caller can allocate
37 : * a new one and initialize it. If the data structure is present,
38 : * the caller "attaches" to the structure by initializing a pointer
39 : * in the local address space.
40 : * The shmem index has two purposes: first, it gives us
41 : * a simple model of how the world looks when a backend process
42 : * initializes. If something is present in the shmem index,
43 : * it is initialized. If it is not, it is uninitialized. Second,
44 : * the shmem index allows us to allocate shared memory on demand
45 : * instead of trying to preallocate structures and hard-wire the
46 : * sizes and locations in header files. If you are using a lot
47 : * of shared memory in a lot of different places (and changing
48 : * things during development), this is important.
49 : *
50 : * (c) In standard Unix-ish environments, individual backends do not
51 : * need to re-establish their local pointers into shared memory, because
52 : * they inherit correct values of those variables via fork() from the
53 : * postmaster. However, this does not work in the EXEC_BACKEND case.
54 : * In ports using EXEC_BACKEND, new backends have to set up their local
55 : * pointers using the method described in (b) above.
56 : *
57 : * (d) memory allocation model: shared memory can never be
58 : * freed, once allocated. Each hash table has its own free list,
59 : * so hash buckets can be reused when an item is deleted. However,
60 : * if one hash table grows very large and then shrinks, its space
61 : * cannot be redistributed to other tables. We could build a simple
62 : * hash bucket garbage collector if need be. Right now, it seems
63 : * unnecessary.
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include "fmgr.h"
69 : #include "funcapi.h"
70 : #include "miscadmin.h"
71 : #include "storage/lwlock.h"
72 : #include "storage/pg_shmem.h"
73 : #include "storage/shmem.h"
74 : #include "storage/spin.h"
75 : #include "utils/builtins.h"
76 :
77 : static void *ShmemAllocRaw(Size size, Size *allocated_size);
78 :
79 : /* shared memory global variables */
80 :
81 : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
82 :
83 : static void *ShmemBase; /* start address of shared memory */
84 :
85 : static void *ShmemEnd; /* end+1 address of shared memory */
86 :
87 : slock_t *ShmemLock; /* spinlock for shared memory and LWLock
88 : * allocation */
89 :
90 : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
91 :
92 :
93 : /*
94 : * InitShmemAccess() --- set up basic pointers to shared memory.
95 : *
96 : * Note: the argument should be declared "PGShmemHeader *seghdr",
97 : * but we use void to avoid having to include ipc.h in shmem.h.
98 : */
99 : void
100 1902 : InitShmemAccess(void *seghdr)
101 : {
102 1902 : PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
103 :
104 1902 : ShmemSegHdr = shmhdr;
105 1902 : ShmemBase = (void *) shmhdr;
106 1902 : ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
107 1902 : }
108 :
109 : /*
110 : * InitShmemAllocation() --- set up shared-memory space allocation.
111 : *
112 : * This should be called only in the postmaster or a standalone backend.
113 : */
114 : void
115 1902 : InitShmemAllocation(void)
116 : {
117 1902 : PGShmemHeader *shmhdr = ShmemSegHdr;
118 : char *aligned;
119 :
120 : Assert(shmhdr != NULL);
121 :
122 : /*
123 : * Initialize the spinlock used by ShmemAlloc. We must use
124 : * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
125 : */
126 1902 : ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
127 :
128 1902 : SpinLockInit(ShmemLock);
129 :
130 : /*
131 : * Allocations after this point should go through ShmemAlloc, which
132 : * expects to allocate everything on cache line boundaries. Make sure the
133 : * first allocation begins on a cache line boundary.
134 : */
135 1902 : aligned = (char *)
136 1902 : (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
137 1902 : shmhdr->freeoffset = aligned - (char *) shmhdr;
138 :
139 : /* ShmemIndex can't be set up yet (need LWLocks first) */
140 1902 : shmhdr->index = NULL;
141 1902 : ShmemIndex = (HTAB *) NULL;
142 1902 : }
143 :
144 : /*
145 : * ShmemAlloc -- allocate max-aligned chunk from shared memory
146 : *
147 : * Throws error if request cannot be satisfied.
148 : *
149 : * Assumes ShmemLock and ShmemSegHdr are initialized.
150 : */
151 : void *
152 19028 : ShmemAlloc(Size size)
153 : {
154 : void *newSpace;
155 : Size allocated_size;
156 :
157 19028 : newSpace = ShmemAllocRaw(size, &allocated_size);
158 19028 : if (!newSpace)
159 0 : ereport(ERROR,
160 : (errcode(ERRCODE_OUT_OF_MEMORY),
161 : errmsg("out of shared memory (%zu bytes requested)",
162 : size)));
163 19028 : return newSpace;
164 : }
165 :
166 : /*
167 : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
168 : *
169 : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
170 : */
171 : void *
172 666608 : ShmemAllocNoError(Size size)
173 : {
174 : Size allocated_size;
175 :
176 666608 : return ShmemAllocRaw(size, &allocated_size);
177 : }
178 :
179 : /*
180 : * ShmemAllocRaw -- allocate align chunk and return allocated size
181 : *
182 : * Also sets *allocated_size to the number of bytes allocated, which will
183 : * be equal to the number requested plus any padding we choose to add.
184 : */
185 : static void *
186 805478 : ShmemAllocRaw(Size size, Size *allocated_size)
187 : {
188 : Size newStart;
189 : Size newFree;
190 : void *newSpace;
191 :
192 : /*
193 : * Ensure all space is adequately aligned. We used to only MAXALIGN this
194 : * space but experience has proved that on modern systems that is not good
195 : * enough. Many parts of the system are very sensitive to critical data
196 : * structures getting split across cache line boundaries. To avoid that,
197 : * attempt to align the beginning of the allocation to a cache line
198 : * boundary. The calling code will still need to be careful about how it
199 : * uses the allocated space - e.g. by padding each element in an array of
200 : * structures out to a power-of-two size - but without this, even that
201 : * won't be sufficient.
202 : */
203 805478 : size = CACHELINEALIGN(size);
204 805478 : *allocated_size = size;
205 :
206 : Assert(ShmemSegHdr != NULL);
207 :
208 805478 : SpinLockAcquire(ShmemLock);
209 :
210 805478 : newStart = ShmemSegHdr->freeoffset;
211 :
212 805478 : newFree = newStart + size;
213 805478 : if (newFree <= ShmemSegHdr->totalsize)
214 : {
215 805478 : newSpace = (void *) ((char *) ShmemBase + newStart);
216 805478 : ShmemSegHdr->freeoffset = newFree;
217 : }
218 : else
219 0 : newSpace = NULL;
220 :
221 805478 : SpinLockRelease(ShmemLock);
222 :
223 : /* note this assert is okay with newSpace == NULL */
224 : Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
225 :
226 805478 : return newSpace;
227 : }
228 :
229 : /*
230 : * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
231 : *
232 : * Allocate space without locking ShmemLock. This should be used for,
233 : * and only for, allocations that must happen before ShmemLock is ready.
234 : *
235 : * We consider maxalign, rather than cachealign, sufficient here.
236 : */
237 : void *
238 3804 : ShmemAllocUnlocked(Size size)
239 : {
240 : Size newStart;
241 : Size newFree;
242 : void *newSpace;
243 :
244 : /*
245 : * Ensure allocated space is adequately aligned.
246 : */
247 3804 : size = MAXALIGN(size);
248 :
249 : Assert(ShmemSegHdr != NULL);
250 :
251 3804 : newStart = ShmemSegHdr->freeoffset;
252 :
253 3804 : newFree = newStart + size;
254 3804 : if (newFree > ShmemSegHdr->totalsize)
255 0 : ereport(ERROR,
256 : (errcode(ERRCODE_OUT_OF_MEMORY),
257 : errmsg("out of shared memory (%zu bytes requested)",
258 : size)));
259 3804 : ShmemSegHdr->freeoffset = newFree;
260 :
261 3804 : newSpace = (void *) ((char *) ShmemBase + newStart);
262 :
263 : Assert(newSpace == (void *) MAXALIGN(newSpace));
264 :
265 3804 : return newSpace;
266 : }
267 :
268 : /*
269 : * ShmemAddrIsValid -- test if an address refers to shared memory
270 : *
271 : * Returns true if the pointer points within the shared memory segment.
272 : */
273 : bool
274 0 : ShmemAddrIsValid(const void *addr)
275 : {
276 0 : return (addr >= ShmemBase) && (addr < ShmemEnd);
277 : }
278 :
279 : /*
280 : * InitShmemIndex() --- set up or attach to shmem index table.
281 : */
282 : void
283 1902 : InitShmemIndex(void)
284 : {
285 : HASHCTL info;
286 :
287 : /*
288 : * Create the shared memory shmem index.
289 : *
290 : * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
291 : * hashtable to exist already, we have a bit of a circularity problem in
292 : * initializing the ShmemIndex itself. The special "ShmemIndex" hash
293 : * table name will tell ShmemInitStruct to fake it.
294 : */
295 1902 : info.keysize = SHMEM_INDEX_KEYSIZE;
296 1902 : info.entrysize = sizeof(ShmemIndexEnt);
297 :
298 1902 : ShmemIndex = ShmemInitHash("ShmemIndex",
299 : SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
300 : &info,
301 : HASH_ELEM | HASH_STRINGS);
302 1902 : }
303 :
304 : /*
305 : * ShmemInitHash -- Create and initialize, or attach to, a
306 : * shared memory hash table.
307 : *
308 : * We assume caller is doing some kind of synchronization
309 : * so that two processes don't try to create/initialize the same
310 : * table at once. (In practice, all creations are done in the postmaster
311 : * process; child processes should always be attaching to existing tables.)
312 : *
313 : * max_size is the estimated maximum number of hashtable entries. This is
314 : * not a hard limit, but the access efficiency will degrade if it is
315 : * exceeded substantially (since it's used to compute directory size and
316 : * the hash table buckets will get overfull).
317 : *
318 : * init_size is the number of hashtable entries to preallocate. For a table
319 : * whose maximum size is certain, this should be equal to max_size; that
320 : * ensures that no run-time out-of-shared-memory failures can occur.
321 : *
322 : * *infoP and hash_flags must specify at least the entry sizes and key
323 : * comparison semantics (see hash_create()). Flag bits and values specific
324 : * to shared-memory hash tables are added here, except that callers may
325 : * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
326 : *
327 : * Note: before Postgres 9.0, this function returned NULL for some failure
328 : * cases. Now, it always throws error instead, so callers need not check
329 : * for NULL.
330 : */
331 : HTAB *
332 17132 : ShmemInitHash(const char *name, /* table string name for shmem index */
333 : long init_size, /* initial table size */
334 : long max_size, /* max size of the table */
335 : HASHCTL *infoP, /* info about key and bucket size */
336 : int hash_flags) /* info about infoP */
337 : {
338 : bool found;
339 : void *location;
340 :
341 : /*
342 : * Hash tables allocated in shared memory have a fixed directory; it can't
343 : * grow or other backends wouldn't be able to find it. So, make sure we
344 : * make it big enough to start with.
345 : *
346 : * The shared memory allocator must be specified too.
347 : */
348 17132 : infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
349 17132 : infoP->alloc = ShmemAllocNoError;
350 17132 : hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
351 :
352 : /* look it up in the shmem index */
353 17132 : location = ShmemInitStruct(name,
354 : hash_get_shared_size(infoP, hash_flags),
355 : &found);
356 :
357 : /*
358 : * if it already exists, attach to it rather than allocate and initialize
359 : * new space
360 : */
361 17132 : if (found)
362 0 : hash_flags |= HASH_ATTACH;
363 :
364 : /* Pass location of hashtable header to hash_create */
365 17132 : infoP->hctl = (HASHHDR *) location;
366 :
367 17132 : return hash_create(name, init_size, infoP, hash_flags);
368 : }
369 :
370 : /*
371 : * ShmemInitStruct -- Create/attach to a structure in shared memory.
372 : *
373 : * This is called during initialization to find or allocate
374 : * a data structure in shared memory. If no other process
375 : * has created the structure, this routine allocates space
376 : * for it. If it exists already, a pointer to the existing
377 : * structure is returned.
378 : *
379 : * Returns: pointer to the object. *foundPtr is set true if the object was
380 : * already in the shmem index (hence, already initialized).
381 : *
382 : * Note: before Postgres 9.0, this function returned NULL for some failure
383 : * cases. Now, it always throws error instead, so callers need not check
384 : * for NULL.
385 : */
386 : void *
387 121744 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
388 : {
389 : ShmemIndexEnt *result;
390 : void *structPtr;
391 :
392 121744 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
393 :
394 121744 : if (!ShmemIndex)
395 : {
396 1902 : PGShmemHeader *shmemseghdr = ShmemSegHdr;
397 :
398 : /* Must be trying to create/attach to ShmemIndex itself */
399 : Assert(strcmp(name, "ShmemIndex") == 0);
400 :
401 1902 : if (IsUnderPostmaster)
402 : {
403 : /* Must be initializing a (non-standalone) backend */
404 : Assert(shmemseghdr->index != NULL);
405 0 : structPtr = shmemseghdr->index;
406 0 : *foundPtr = true;
407 : }
408 : else
409 : {
410 : /*
411 : * If the shmem index doesn't exist, we are bootstrapping: we must
412 : * be trying to init the shmem index itself.
413 : *
414 : * Notice that the ShmemIndexLock is released before the shmem
415 : * index has been initialized. This should be OK because no other
416 : * process can be accessing shared memory yet.
417 : */
418 : Assert(shmemseghdr->index == NULL);
419 1902 : structPtr = ShmemAlloc(size);
420 1902 : shmemseghdr->index = structPtr;
421 1902 : *foundPtr = false;
422 : }
423 1902 : LWLockRelease(ShmemIndexLock);
424 1902 : return structPtr;
425 : }
426 :
427 : /* look it up in the shmem index */
428 : result = (ShmemIndexEnt *)
429 119842 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
430 :
431 119842 : if (!result)
432 : {
433 0 : LWLockRelease(ShmemIndexLock);
434 0 : ereport(ERROR,
435 : (errcode(ERRCODE_OUT_OF_MEMORY),
436 : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
437 : name)));
438 : }
439 :
440 119842 : if (*foundPtr)
441 : {
442 : /*
443 : * Structure is in the shmem index so someone else has allocated it
444 : * already. The size better be the same as the size we are trying to
445 : * initialize to, or there is a name conflict (or worse).
446 : */
447 0 : if (result->size != size)
448 : {
449 0 : LWLockRelease(ShmemIndexLock);
450 0 : ereport(ERROR,
451 : (errmsg("ShmemIndex entry size is wrong for data structure"
452 : " \"%s\": expected %zu, actual %zu",
453 : name, size, result->size)));
454 : }
455 0 : structPtr = result->location;
456 : }
457 : else
458 : {
459 : Size allocated_size;
460 :
461 : /* It isn't in the table yet. allocate and initialize it */
462 119842 : structPtr = ShmemAllocRaw(size, &allocated_size);
463 119842 : if (structPtr == NULL)
464 : {
465 : /* out of memory; remove the failed ShmemIndex entry */
466 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
467 0 : LWLockRelease(ShmemIndexLock);
468 0 : ereport(ERROR,
469 : (errcode(ERRCODE_OUT_OF_MEMORY),
470 : errmsg("not enough shared memory for data structure"
471 : " \"%s\" (%zu bytes requested)",
472 : name, size)));
473 : }
474 119842 : result->size = size;
475 119842 : result->allocated_size = allocated_size;
476 119842 : result->location = structPtr;
477 : }
478 :
479 119842 : LWLockRelease(ShmemIndexLock);
480 :
481 : Assert(ShmemAddrIsValid(structPtr));
482 :
483 : Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
484 :
485 119842 : return structPtr;
486 : }
487 :
488 :
489 : /*
490 : * Add two Size values, checking for overflow
491 : */
492 : Size
493 871932 : add_size(Size s1, Size s2)
494 : {
495 : Size result;
496 :
497 871932 : result = s1 + s2;
498 : /* We are assuming Size is an unsigned type here... */
499 871932 : if (result < s1 || result < s2)
500 0 : ereport(ERROR,
501 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
502 : errmsg("requested shared memory size overflows size_t")));
503 871932 : return result;
504 : }
505 :
506 : /*
507 : * Multiply two Size values, checking for overflow
508 : */
509 : Size
510 370538 : mul_size(Size s1, Size s2)
511 : {
512 : Size result;
513 :
514 370538 : if (s1 == 0 || s2 == 0)
515 19324 : return 0;
516 351214 : result = s1 * s2;
517 : /* We are assuming Size is an unsigned type here... */
518 351214 : if (result / s2 != s1)
519 0 : ereport(ERROR,
520 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
521 : errmsg("requested shared memory size overflows size_t")));
522 351214 : return result;
523 : }
524 :
525 : /* SQL SRF showing allocated shared memory */
526 : Datum
527 6 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
528 : {
529 : #define PG_GET_SHMEM_SIZES_COLS 4
530 6 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
531 : HASH_SEQ_STATUS hstat;
532 : ShmemIndexEnt *ent;
533 6 : Size named_allocated = 0;
534 : Datum values[PG_GET_SHMEM_SIZES_COLS];
535 : bool nulls[PG_GET_SHMEM_SIZES_COLS];
536 :
537 6 : InitMaterializedSRF(fcinfo, 0);
538 :
539 6 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
540 :
541 6 : hash_seq_init(&hstat, ShmemIndex);
542 :
543 : /* output all allocated entries */
544 6 : memset(nulls, 0, sizeof(nulls));
545 388 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
546 : {
547 382 : values[0] = CStringGetTextDatum(ent->key);
548 382 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
549 382 : values[2] = Int64GetDatum(ent->size);
550 382 : values[3] = Int64GetDatum(ent->allocated_size);
551 382 : named_allocated += ent->allocated_size;
552 :
553 382 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
554 : values, nulls);
555 : }
556 :
557 : /* output shared memory allocated but not counted via the shmem index */
558 6 : values[0] = CStringGetTextDatum("<anonymous>");
559 6 : nulls[1] = true;
560 6 : values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
561 6 : values[3] = values[2];
562 6 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
563 :
564 : /* output as-of-yet unused shared memory */
565 6 : nulls[0] = true;
566 6 : values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
567 6 : nulls[1] = false;
568 6 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
569 6 : values[3] = values[2];
570 6 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
571 :
572 6 : LWLockRelease(ShmemIndexLock);
573 :
574 6 : return (Datum) 0;
575 : }
|