Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dsa.c
4 : * Dynamic shared memory areas.
5 : *
6 : * This module provides dynamic shared memory areas which are built on top of
7 : * DSM segments. While dsm.c allows segments of memory of shared memory to be
8 : * created and shared between backends, it isn't designed to deal with small
9 : * objects. A DSA area is a shared memory heap usually backed by one or more
10 : * DSM segments which can allocate memory using dsa_allocate() and dsa_free().
11 : * Alternatively, it can be created in pre-existing shared memory, including a
12 : * DSM segment, and then create extra DSM segments as required. Unlike the
13 : * regular system heap, it deals in pseudo-pointers which must be converted to
14 : * backend-local pointers before they are dereferenced. These pseudo-pointers
15 : * can however be shared with other backends, and can be used to construct
16 : * shared data structures.
17 : *
18 : * Each DSA area manages a set of DSM segments, adding new segments as
19 : * required and detaching them when they are no longer needed. Each segment
20 : * contains a number of 4KB pages, a free page manager for tracking
21 : * consecutive runs of free pages, and a page map for tracking the source of
22 : * objects allocated on each page. Allocation requests above 8KB are handled
23 : * by choosing a segment and finding consecutive free pages in its free page
24 : * manager. Allocation requests for smaller sizes are handled using pools of
25 : * objects of a selection of sizes. Each pool consists of a number of 16 page
26 : * (64KB) superblocks allocated in the same way as large objects. Allocation
27 : * of large objects and new superblocks is serialized by a single LWLock, but
28 : * allocation of small objects from pre-existing superblocks uses one LWLock
29 : * per pool. Currently there is one pool, and therefore one lock, per size
30 : * class. Per-core pools to increase concurrency and strategies for reducing
31 : * the resulting fragmentation are areas for future research. Each superblock
32 : * is managed with a 'span', which tracks the superblock's freelist. Free
33 : * requests are handled by looking in the page map to find which span an
34 : * address was allocated from, so that small objects can be returned to the
35 : * appropriate free list, and large object pages can be returned directly to
36 : * the free page map. When allocating, simple heuristics for selecting
37 : * segments and superblocks try to encourage occupied memory to be
38 : * concentrated, increasing the likelihood that whole superblocks can become
39 : * empty and be returned to the free page manager, and whole segments can
40 : * become empty and be returned to the operating system.
41 : *
42 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
43 : * Portions Copyright (c) 1994, Regents of the University of California
44 : *
45 : * IDENTIFICATION
46 : * src/backend/utils/mmgr/dsa.c
47 : *
48 : *-------------------------------------------------------------------------
49 : */
50 :
51 : #include "postgres.h"
52 :
53 : #include "port/atomics.h"
54 : #include "port/pg_bitutils.h"
55 : #include "storage/dsm.h"
56 : #include "storage/lwlock.h"
57 : #include "utils/dsa.h"
58 : #include "utils/freepage.h"
59 : #include "utils/memutils.h"
60 : #include "utils/resowner.h"
61 :
62 : /*
63 : * How many segments to create before we double the segment size. If this is
64 : * low, then there is likely to be a lot of wasted space in the largest
65 : * segment. If it is high, then we risk running out of segment slots (see
66 : * dsm.c's limits on total number of segments), or limiting the total size
67 : * an area can manage when using small pointers.
68 : */
69 : #define DSA_NUM_SEGMENTS_AT_EACH_SIZE 2
70 :
71 : /*
72 : * The maximum number of DSM segments that an area can own, determined by
73 : * the number of bits remaining (but capped at 1024).
74 : */
75 : #define DSA_MAX_SEGMENTS \
76 : Min(1024, (1 << ((SIZEOF_DSA_POINTER * 8) - DSA_OFFSET_WIDTH)))
77 :
78 : /* The bitmask for extracting the offset from a dsa_pointer. */
79 : #define DSA_OFFSET_BITMASK (((dsa_pointer) 1 << DSA_OFFSET_WIDTH) - 1)
80 :
81 : /* Number of pages (see FPM_PAGE_SIZE) per regular superblock. */
82 : #define DSA_PAGES_PER_SUPERBLOCK 16
83 :
84 : /*
85 : * A magic number used as a sanity check for following DSM segments belonging
86 : * to a DSA area (this number will be XORed with the area handle and
87 : * the segment index).
88 : */
89 : #define DSA_SEGMENT_HEADER_MAGIC 0x0ce26608
90 :
91 : /* Build a dsa_pointer given a segment number and offset. */
92 : #define DSA_MAKE_POINTER(segment_number, offset) \
93 : (((dsa_pointer) (segment_number) << DSA_OFFSET_WIDTH) | (offset))
94 :
95 : /* Extract the segment number from a dsa_pointer. */
96 : #define DSA_EXTRACT_SEGMENT_NUMBER(dp) ((dp) >> DSA_OFFSET_WIDTH)
97 :
98 : /* Extract the offset from a dsa_pointer. */
99 : #define DSA_EXTRACT_OFFSET(dp) ((dp) & DSA_OFFSET_BITMASK)
100 :
101 : /* The type used for index segment indexes (zero based). */
102 : typedef size_t dsa_segment_index;
103 :
104 : /* Sentinel value for dsa_segment_index indicating 'none' or 'end'. */
105 : #define DSA_SEGMENT_INDEX_NONE (~(dsa_segment_index)0)
106 :
107 : /*
108 : * How many bins of segments do we have? The bins are used to categorize
109 : * segments by their largest contiguous run of free pages.
110 : */
111 : #define DSA_NUM_SEGMENT_BINS 16
112 :
113 : /*
114 : * What is the lowest bin that holds segments that *might* have n contiguous
115 : * free pages? There is no point in looking in segments in lower bins; they
116 : * definitely can't service a request for n free pages.
117 : */
118 : static inline size_t
119 40128 : contiguous_pages_to_segment_bin(size_t n)
120 : {
121 : size_t bin;
122 :
123 40128 : if (n == 0)
124 1424 : bin = 0;
125 : else
126 38704 : bin = pg_leftmost_one_pos_size_t(n) + 1;
127 :
128 40128 : return Min(bin, DSA_NUM_SEGMENT_BINS - 1);
129 : }
130 :
131 : /* Macros for access to locks. */
132 : #define DSA_AREA_LOCK(area) (&area->control->lock)
133 : #define DSA_SCLASS_LOCK(area, sclass) (&area->control->pools[sclass].lock)
134 :
135 : /*
136 : * The header for an individual segment. This lives at the start of each DSM
137 : * segment owned by a DSA area including the first segment (where it appears
138 : * as part of the dsa_area_control struct).
139 : */
140 : typedef struct
141 : {
142 : /* Sanity check magic value. */
143 : uint32 magic;
144 : /* Total number of pages in this segment (excluding metadata area). */
145 : size_t usable_pages;
146 : /* Total size of this segment in bytes. */
147 : size_t size;
148 :
149 : /*
150 : * Index of the segment that precedes this one in the same segment bin, or
151 : * DSA_SEGMENT_INDEX_NONE if this is the first one.
152 : */
153 : dsa_segment_index prev;
154 :
155 : /*
156 : * Index of the segment that follows this one in the same segment bin, or
157 : * DSA_SEGMENT_INDEX_NONE if this is the last one.
158 : */
159 : dsa_segment_index next;
160 : /* The index of the bin that contains this segment. */
161 : size_t bin;
162 :
163 : /*
164 : * A flag raised to indicate that this segment is being returned to the
165 : * operating system and has been unpinned.
166 : */
167 : bool freed;
168 : } dsa_segment_header;
169 :
170 : /*
171 : * Metadata for one superblock.
172 : *
173 : * For most blocks, span objects are stored out-of-line; that is, the span
174 : * object is not stored within the block itself. But, as an exception, for a
175 : * "span of spans", the span object is stored "inline". The allocation is
176 : * always exactly one page, and the dsa_area_span object is located at
177 : * the beginning of that page. The size class is DSA_SCLASS_BLOCK_OF_SPANS,
178 : * and the remaining fields are used just as they would be in an ordinary
179 : * block. We can't allocate spans out of ordinary superblocks because
180 : * creating an ordinary superblock requires us to be able to allocate a span
181 : * *first*. Doing it this way avoids that circularity.
182 : */
183 : typedef struct
184 : {
185 : dsa_pointer pool; /* Containing pool. */
186 : dsa_pointer prevspan; /* Previous span. */
187 : dsa_pointer nextspan; /* Next span. */
188 : dsa_pointer start; /* Starting address. */
189 : size_t npages; /* Length of span in pages. */
190 : uint16 size_class; /* Size class. */
191 : uint16 ninitialized; /* Maximum number of objects ever allocated. */
192 : uint16 nallocatable; /* Number of objects currently allocatable. */
193 : uint16 firstfree; /* First object on free list. */
194 : uint16 nmax; /* Maximum number of objects ever possible. */
195 : uint16 fclass; /* Current fullness class. */
196 : } dsa_area_span;
197 :
198 : /*
199 : * Given a pointer to an object in a span, access the index of the next free
200 : * object in the same span (ie in the span's freelist) as an L-value.
201 : */
202 : #define NextFreeObjectIndex(object) (* (uint16 *) (object))
203 :
204 : /*
205 : * Small allocations are handled by dividing a single block of memory into
206 : * many small objects of equal size. The possible allocation sizes are
207 : * defined by the following array. Larger size classes are spaced more widely
208 : * than smaller size classes. We fudge the spacing for size classes >1kB to
209 : * avoid space wastage: based on the knowledge that we plan to allocate 64kB
210 : * blocks, we bump the maximum object size up to the largest multiple of
211 : * 8 bytes that still lets us fit the same number of objects into one block.
212 : *
213 : * NB: Because of this fudging, if we were ever to use differently-sized blocks
214 : * for small allocations, these size classes would need to be reworked to be
215 : * optimal for the new size.
216 : *
217 : * NB: The optimal spacing for size classes, as well as the size of the blocks
218 : * out of which small objects are allocated, is not a question that has one
219 : * right answer. Some allocators (such as tcmalloc) use more closely-spaced
220 : * size classes than we do here, while others (like aset.c) use more
221 : * widely-spaced classes. Spacing the classes more closely avoids wasting
222 : * memory within individual chunks, but also means a larger number of
223 : * potentially-unfilled blocks.
224 : */
225 : static const uint16 dsa_size_classes[] = {
226 : sizeof(dsa_area_span), 0, /* special size classes */
227 : 8, 16, 24, 32, 40, 48, 56, 64, /* 8 classes separated by 8 bytes */
228 : 80, 96, 112, 128, /* 4 classes separated by 16 bytes */
229 : 160, 192, 224, 256, /* 4 classes separated by 32 bytes */
230 : 320, 384, 448, 512, /* 4 classes separated by 64 bytes */
231 : 640, 768, 896, 1024, /* 4 classes separated by 128 bytes */
232 : 1280, 1560, 1816, 2048, /* 4 classes separated by ~256 bytes */
233 : 2616, 3120, 3640, 4096, /* 4 classes separated by ~512 bytes */
234 : 5456, 6552, 7280, 8192 /* 4 classes separated by ~1024 bytes */
235 : };
236 : #define DSA_NUM_SIZE_CLASSES lengthof(dsa_size_classes)
237 :
238 : /* Special size classes. */
239 : #define DSA_SCLASS_BLOCK_OF_SPANS 0
240 : #define DSA_SCLASS_SPAN_LARGE 1
241 :
242 : /*
243 : * The following lookup table is used to map the size of small objects
244 : * (less than 1kB) onto the corresponding size class. To use this table,
245 : * round the size of the object up to the next multiple of 8 bytes, and then
246 : * index into this array.
247 : */
248 : static const uint8 dsa_size_class_map[] = {
249 : 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13,
250 : 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17,
251 : 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
252 : 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
253 : 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
254 : 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
255 : 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
256 : 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25
257 : };
258 : #define DSA_SIZE_CLASS_MAP_QUANTUM 8
259 :
260 : /*
261 : * Superblocks are binned by how full they are. Generally, each fullness
262 : * class corresponds to one quartile, but the block being used for
263 : * allocations is always at the head of the list for fullness class 1,
264 : * regardless of how full it really is.
265 : */
266 : #define DSA_FULLNESS_CLASSES 4
267 :
268 : /*
269 : * A dsa_area_pool represents a set of objects of a given size class.
270 : *
271 : * Perhaps there should be multiple pools for the same size class for
272 : * contention avoidance, but for now there is just one!
273 : */
274 : typedef struct
275 : {
276 : /* A lock protecting access to this pool. */
277 : LWLock lock;
278 : /* A set of linked lists of spans, arranged by fullness. */
279 : dsa_pointer spans[DSA_FULLNESS_CLASSES];
280 : /* Should we pad this out to a cacheline boundary? */
281 : } dsa_area_pool;
282 :
283 : /*
284 : * The control block for an area. This lives in shared memory, at the start of
285 : * the first DSM segment controlled by this area.
286 : */
287 : typedef struct
288 : {
289 : /* The segment header for the first segment. */
290 : dsa_segment_header segment_header;
291 : /* The handle for this area. */
292 : dsa_handle handle;
293 : /* The handles of the segments owned by this area. */
294 : dsm_handle segment_handles[DSA_MAX_SEGMENTS];
295 : /* Lists of segments, binned by maximum contiguous run of free pages. */
296 : dsa_segment_index segment_bins[DSA_NUM_SEGMENT_BINS];
297 : /* The object pools for each size class. */
298 : dsa_area_pool pools[DSA_NUM_SIZE_CLASSES];
299 : /* initial allocation segment size */
300 : size_t init_segment_size;
301 : /* maximum allocation segment size */
302 : size_t max_segment_size;
303 : /* The total size of all active segments. */
304 : size_t total_segment_size;
305 : /* The maximum total size of backing storage we are allowed. */
306 : size_t max_total_segment_size;
307 : /* Highest used segment index in the history of this area. */
308 : dsa_segment_index high_segment_index;
309 : /* The reference count for this area. */
310 : int refcnt;
311 : /* A flag indicating that this area has been pinned. */
312 : bool pinned;
313 : /* The number of times that segments have been freed. */
314 : size_t freed_segment_counter;
315 : /* The LWLock tranche ID. */
316 : int lwlock_tranche_id;
317 : /* The general lock (protects everything except object pools). */
318 : LWLock lock;
319 : } dsa_area_control;
320 :
321 : /* Given a pointer to a pool, find a dsa_pointer. */
322 : #define DsaAreaPoolToDsaPointer(area, p) \
323 : DSA_MAKE_POINTER(0, (char *) p - (char *) area->control)
324 :
325 : /*
326 : * A dsa_segment_map is stored within the backend-private memory of each
327 : * individual backend. It holds the base address of the segment within that
328 : * backend, plus the addresses of key objects within the segment. Those
329 : * could instead be derived from the base address but it's handy to have them
330 : * around.
331 : */
332 : typedef struct
333 : {
334 : dsm_segment *segment; /* DSM segment */
335 : char *mapped_address; /* Address at which segment is mapped */
336 : dsa_segment_header *header; /* Header (same as mapped_address) */
337 : FreePageManager *fpm; /* Free page manager within segment. */
338 : dsa_pointer *pagemap; /* Page map within segment. */
339 : } dsa_segment_map;
340 :
341 : /*
342 : * Per-backend state for a storage area. Backends obtain one of these by
343 : * creating an area or attaching to an existing one using a handle. Each
344 : * process that needs to use an area uses its own object to track where the
345 : * segments are mapped.
346 : */
347 : struct dsa_area
348 : {
349 : /* Pointer to the control object in shared memory. */
350 : dsa_area_control *control;
351 :
352 : /*
353 : * All the mappings are owned by this. The dsa_area itself is not
354 : * directly tracked by the ResourceOwner, but the effect is the same. NULL
355 : * if the attachment has session lifespan, i.e if dsa_pin_mapping() has
356 : * been called.
357 : */
358 : ResourceOwner resowner;
359 :
360 : /*
361 : * This backend's array of segment maps, ordered by segment index
362 : * corresponding to control->segment_handles. Some of the area's segments
363 : * may not be mapped in this backend yet, and some slots may have been
364 : * freed and need to be detached; these operations happen on demand.
365 : */
366 : dsa_segment_map segment_maps[DSA_MAX_SEGMENTS];
367 :
368 : /* The highest segment index this backend has ever mapped. */
369 : dsa_segment_index high_segment_index;
370 :
371 : /* The last observed freed_segment_counter. */
372 : size_t freed_segment_counter;
373 : };
374 :
375 : #define DSA_SPAN_NOTHING_FREE ((uint16) -1)
376 : #define DSA_SUPERBLOCK_SIZE (DSA_PAGES_PER_SUPERBLOCK * FPM_PAGE_SIZE)
377 :
378 : /* Given a pointer to a segment_map, obtain a segment index number. */
379 : #define get_segment_index(area, segment_map_ptr) \
380 : (segment_map_ptr - &area->segment_maps[0])
381 :
382 : static void init_span(dsa_area *area, dsa_pointer span_pointer,
383 : dsa_area_pool *pool, dsa_pointer start, size_t npages,
384 : uint16 size_class);
385 : static bool transfer_first_span(dsa_area *area, dsa_area_pool *pool,
386 : int fromclass, int toclass);
387 : static inline dsa_pointer alloc_object(dsa_area *area, int size_class);
388 : static bool ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
389 : int size_class);
390 : static dsa_segment_map *get_segment_by_index(dsa_area *area,
391 : dsa_segment_index index);
392 : static void destroy_superblock(dsa_area *area, dsa_pointer span_pointer);
393 : static void unlink_span(dsa_area *area, dsa_area_span *span);
394 : static void add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
395 : dsa_pointer span_pointer, int fclass);
396 : static void unlink_segment(dsa_area *area, dsa_segment_map *segment_map);
397 : static dsa_segment_map *get_best_segment(dsa_area *area, size_t npages);
398 : static dsa_segment_map *make_new_segment(dsa_area *area, size_t requested_pages);
399 : static dsa_area *create_internal(void *place, size_t size,
400 : int tranche_id,
401 : dsm_handle control_handle,
402 : dsm_segment *control_segment,
403 : size_t init_segment_size,
404 : size_t max_segment_size);
405 : static dsa_area *attach_internal(void *place, dsm_segment *segment,
406 : dsa_handle handle);
407 : static void check_for_freed_segments(dsa_area *area);
408 : static void check_for_freed_segments_locked(dsa_area *area);
409 : static void rebin_segment(dsa_area *area, dsa_segment_map *segment_map);
410 :
411 : /*
412 : * Create a new shared area in a new DSM segment. Further DSM segments will
413 : * be allocated as required to extend the available space.
414 : *
415 : * We can't allocate a LWLock tranche_id within this function, because tranche
416 : * IDs are a scarce resource; there are only 64k available, using low numbers
417 : * when possible matters, and we have no provision for recycling them. So,
418 : * we require the caller to provide one.
419 : */
420 : dsa_area *
421 156 : dsa_create_ext(int tranche_id, size_t init_segment_size, size_t max_segment_size)
422 : {
423 : dsm_segment *segment;
424 : dsa_area *area;
425 :
426 : /*
427 : * Create the DSM segment that will hold the shared control object and the
428 : * first segment of usable space.
429 : */
430 156 : segment = dsm_create(init_segment_size, 0);
431 :
432 : /*
433 : * All segments backing this area are pinned, so that DSA can explicitly
434 : * control their lifetime (otherwise a newly created segment belonging to
435 : * this area might be freed when the only backend that happens to have it
436 : * mapped in ends, corrupting the area).
437 : */
438 156 : dsm_pin_segment(segment);
439 :
440 : /* Create a new DSA area with the control object in this segment. */
441 156 : area = create_internal(dsm_segment_address(segment),
442 : init_segment_size,
443 : tranche_id,
444 : dsm_segment_handle(segment), segment,
445 : init_segment_size, max_segment_size);
446 :
447 : /* Clean up when the control segment detaches. */
448 156 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
449 156 : PointerGetDatum(dsm_segment_address(segment)));
450 :
451 156 : return area;
452 : }
453 :
454 : /*
455 : * Create a new shared area in an existing shared memory space, which may be
456 : * either DSM or Postmaster-initialized memory. DSM segments will be
457 : * allocated as required to extend the available space, though that can be
458 : * prevented with dsa_set_size_limit(area, size) using the same size provided
459 : * to dsa_create_in_place.
460 : *
461 : * Areas created in-place must eventually be released by the backend that
462 : * created them and all backends that attach to them. This can be done
463 : * explicitly with dsa_release_in_place, or, in the special case that 'place'
464 : * happens to be in a pre-existing DSM segment, by passing in a pointer to the
465 : * segment so that a detach hook can be registered with the containing DSM
466 : * segment.
467 : *
468 : * See dsa_create() for a note about the tranche arguments.
469 : */
470 : dsa_area *
471 2768 : dsa_create_in_place_ext(void *place, size_t size,
472 : int tranche_id, dsm_segment *segment,
473 : size_t init_segment_size, size_t max_segment_size)
474 : {
475 : dsa_area *area;
476 :
477 2768 : area = create_internal(place, size, tranche_id,
478 : DSM_HANDLE_INVALID, NULL,
479 : init_segment_size, max_segment_size);
480 :
481 : /*
482 : * Clean up when the control segment detaches, if a containing DSM segment
483 : * was provided.
484 : */
485 2768 : if (segment != NULL)
486 850 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
487 : PointerGetDatum(place));
488 :
489 2768 : return area;
490 : }
491 :
492 : /*
493 : * Obtain a handle that can be passed to other processes so that they can
494 : * attach to the given area. Cannot be called for areas created with
495 : * dsa_create_in_place.
496 : */
497 : dsa_handle
498 150 : dsa_get_handle(dsa_area *area)
499 : {
500 : Assert(area->control->handle != DSA_HANDLE_INVALID);
501 150 : return area->control->handle;
502 : }
503 :
504 : /*
505 : * Attach to an area given a handle generated (possibly in another process) by
506 : * dsa_get_handle. The area must have been created with dsa_create (not
507 : * dsa_create_in_place).
508 : */
509 : dsa_area *
510 284 : dsa_attach(dsa_handle handle)
511 : {
512 : dsm_segment *segment;
513 : dsa_area *area;
514 :
515 : /*
516 : * An area handle is really a DSM segment handle for the first segment, so
517 : * we go ahead and attach to that.
518 : */
519 284 : segment = dsm_attach(handle);
520 284 : if (segment == NULL)
521 0 : ereport(ERROR,
522 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
523 : errmsg("could not attach to dynamic shared area")));
524 :
525 284 : area = attach_internal(dsm_segment_address(segment), segment, handle);
526 :
527 : /* Clean up when the control segment detaches. */
528 284 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
529 284 : PointerGetDatum(dsm_segment_address(segment)));
530 :
531 284 : return area;
532 : }
533 :
534 : /*
535 : * Attach to an area that was created with dsa_create_in_place. The caller
536 : * must somehow know the location in memory that was used when the area was
537 : * created, though it may be mapped at a different virtual address in this
538 : * process.
539 : *
540 : * See dsa_create_in_place for note about releasing in-place areas, and the
541 : * optional 'segment' argument which can be provided to allow automatic
542 : * release if the containing memory happens to be a DSM segment.
543 : */
544 : dsa_area *
545 40154 : dsa_attach_in_place(void *place, dsm_segment *segment)
546 : {
547 : dsa_area *area;
548 :
549 40154 : area = attach_internal(place, NULL, DSA_HANDLE_INVALID);
550 :
551 : /*
552 : * Clean up when the control segment detaches, if a containing DSM segment
553 : * was provided.
554 : */
555 40154 : if (segment != NULL)
556 5230 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
557 : PointerGetDatum(place));
558 :
559 40154 : return area;
560 : }
561 :
562 : /*
563 : * Release a DSA area that was produced by dsa_create_in_place or
564 : * dsa_attach_in_place. The 'segment' argument is ignored but provides an
565 : * interface suitable for on_dsm_detach, for the convenience of users who want
566 : * to create a DSA segment inside an existing DSM segment and have it
567 : * automatically released when the containing DSM segment is detached.
568 : * 'place' should be the address of the place where the area was created.
569 : *
570 : * This callback is automatically registered for the DSM segment containing
571 : * the control object of in-place areas when a segment is provided to
572 : * dsa_create_in_place or dsa_attach_in_place, and also for all areas created
573 : * with dsa_create.
574 : */
575 : void
576 6520 : dsa_on_dsm_detach_release_in_place(dsm_segment *segment, Datum place)
577 : {
578 6520 : dsa_release_in_place(DatumGetPointer(place));
579 6520 : }
580 :
581 : /*
582 : * Release a DSA area that was produced by dsa_create_in_place or
583 : * dsa_attach_in_place. The 'code' argument is ignored but provides an
584 : * interface suitable for on_shmem_exit or before_shmem_exit, for the
585 : * convenience of users who want to create a DSA segment inside shared memory
586 : * other than a DSM segment and have it automatically release at backend exit.
587 : * 'place' should be the address of the place where the area was created.
588 : */
589 : void
590 0 : dsa_on_shmem_exit_release_in_place(int code, Datum place)
591 : {
592 0 : dsa_release_in_place(DatumGetPointer(place));
593 0 : }
594 :
595 : /*
596 : * Release a DSA area that was produced by dsa_create_in_place or
597 : * dsa_attach_in_place. It is preferable to use one of the 'dsa_on_XXX'
598 : * callbacks so that this is managed automatically, because failure to release
599 : * an area created in-place leaks its segments permanently.
600 : *
601 : * This is also called automatically for areas produced by dsa_create or
602 : * dsa_attach as an implementation detail.
603 : */
604 : void
605 41444 : dsa_release_in_place(void *place)
606 : {
607 41444 : dsa_area_control *control = (dsa_area_control *) place;
608 : int i;
609 :
610 41444 : LWLockAcquire(&control->lock, LW_EXCLUSIVE);
611 : Assert(control->segment_header.magic ==
612 : (DSA_SEGMENT_HEADER_MAGIC ^ control->handle ^ 0));
613 : Assert(control->refcnt > 0);
614 41444 : if (--control->refcnt == 0)
615 : {
616 2034 : for (i = 0; i <= control->high_segment_index; ++i)
617 : {
618 : dsm_handle handle;
619 :
620 1146 : handle = control->segment_handles[i];
621 1146 : if (handle != DSM_HANDLE_INVALID)
622 296 : dsm_unpin_segment(handle);
623 : }
624 : }
625 41444 : LWLockRelease(&control->lock);
626 41444 : }
627 :
628 : /*
629 : * Keep a DSA area attached until end of session or explicit detach.
630 : *
631 : * By default, areas are owned by the current resource owner, which means they
632 : * are detached automatically when that scope ends.
633 : */
634 : void
635 38144 : dsa_pin_mapping(dsa_area *area)
636 : {
637 : int i;
638 :
639 38144 : if (area->resowner != NULL)
640 : {
641 3100 : area->resowner = NULL;
642 :
643 6226 : for (i = 0; i <= area->high_segment_index; ++i)
644 3126 : if (area->segment_maps[i].segment != NULL)
645 276 : dsm_pin_mapping(area->segment_maps[i].segment);
646 : }
647 38144 : }
648 :
649 : /*
650 : * Allocate memory in this storage area. The return value is a dsa_pointer
651 : * that can be passed to other processes, and converted to a local pointer
652 : * with dsa_get_address. 'flags' is a bitmap which should be constructed
653 : * from the following values:
654 : *
655 : * DSA_ALLOC_HUGE allows allocations >= 1GB. Otherwise, such allocations
656 : * will result in an ERROR.
657 : *
658 : * DSA_ALLOC_NO_OOM causes this function to return InvalidDsaPointer when
659 : * no memory is available or a size limit established by dsa_set_size_limit
660 : * would be exceeded. Otherwise, such allocations will result in an ERROR.
661 : *
662 : * DSA_ALLOC_ZERO causes the allocated memory to be zeroed. Otherwise, the
663 : * contents of newly-allocated memory are indeterminate.
664 : *
665 : * These flags correspond to similarly named flags used by
666 : * MemoryContextAllocExtended(). See also the macros dsa_allocate and
667 : * dsa_allocate0 which expand to a call to this function with commonly used
668 : * flags.
669 : */
670 : dsa_pointer
671 1112208 : dsa_allocate_extended(dsa_area *area, size_t size, int flags)
672 : {
673 : uint16 size_class;
674 : dsa_pointer start_pointer;
675 : dsa_segment_map *segment_map;
676 : dsa_pointer result;
677 :
678 : Assert(size > 0);
679 :
680 : /* Sanity check on huge individual allocation size. */
681 1112208 : if (((flags & DSA_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
682 1112208 : ((flags & DSA_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
683 0 : elog(ERROR, "invalid DSA memory alloc request size %zu", size);
684 :
685 : /*
686 : * If bigger than the largest size class, just grab a run of pages from
687 : * the free page manager, instead of allocating an object from a pool.
688 : * There will still be a span, but it's a special class of span that
689 : * manages this whole allocation and simply gives all pages back to the
690 : * free page manager when dsa_free is called.
691 : */
692 1112208 : if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1])
693 : {
694 5506 : size_t npages = fpm_size_to_pages(size);
695 : size_t first_page;
696 : dsa_pointer span_pointer;
697 5506 : dsa_area_pool *pool = &area->control->pools[DSA_SCLASS_SPAN_LARGE];
698 :
699 : /* Obtain a span object. */
700 5506 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
701 5506 : if (!DsaPointerIsValid(span_pointer))
702 : {
703 : /* Raise error unless asked not to. */
704 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
705 0 : ereport(ERROR,
706 : (errcode(ERRCODE_OUT_OF_MEMORY),
707 : errmsg("out of memory"),
708 : errdetail("Failed on DSA request of size %zu.",
709 : size)));
710 0 : return InvalidDsaPointer;
711 : }
712 :
713 5506 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
714 :
715 : /* Find a segment from which to allocate. */
716 5506 : segment_map = get_best_segment(area, npages);
717 5506 : if (segment_map == NULL)
718 44 : segment_map = make_new_segment(area, npages);
719 5506 : if (segment_map == NULL)
720 : {
721 : /* Can't make any more segments: game over. */
722 0 : LWLockRelease(DSA_AREA_LOCK(area));
723 0 : dsa_free(area, span_pointer);
724 :
725 : /* Raise error unless asked not to. */
726 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
727 0 : ereport(ERROR,
728 : (errcode(ERRCODE_OUT_OF_MEMORY),
729 : errmsg("out of memory"),
730 : errdetail("Failed on DSA request of size %zu.",
731 : size)));
732 0 : return InvalidDsaPointer;
733 : }
734 :
735 : /*
736 : * Ask the free page manager for a run of pages. This should always
737 : * succeed, since both get_best_segment and make_new_segment should
738 : * only return a non-NULL pointer if it actually contains enough
739 : * contiguous freespace. If it does fail, something in our backend
740 : * private state is out of whack, so use FATAL to kill the process.
741 : */
742 5506 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
743 0 : elog(FATAL,
744 : "dsa_allocate could not find %zu free pages", npages);
745 5506 : LWLockRelease(DSA_AREA_LOCK(area));
746 :
747 5506 : start_pointer = DSA_MAKE_POINTER(get_segment_index(area, segment_map),
748 : first_page * FPM_PAGE_SIZE);
749 :
750 : /* Initialize span and pagemap. */
751 5506 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
752 : LW_EXCLUSIVE);
753 5506 : init_span(area, span_pointer, pool, start_pointer, npages,
754 : DSA_SCLASS_SPAN_LARGE);
755 5506 : segment_map->pagemap[first_page] = span_pointer;
756 5506 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
757 :
758 : /* Zero-initialize the memory if requested. */
759 5506 : if ((flags & DSA_ALLOC_ZERO) != 0)
760 1300 : memset(dsa_get_address(area, start_pointer), 0, size);
761 :
762 5506 : return start_pointer;
763 : }
764 :
765 : /* Map allocation to a size class. */
766 1106702 : if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM)
767 : {
768 : int mapidx;
769 :
770 : /* For smaller sizes we have a lookup table... */
771 1067886 : mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) /
772 1067886 : DSA_SIZE_CLASS_MAP_QUANTUM) - 1;
773 1067886 : size_class = dsa_size_class_map[mapidx];
774 : }
775 : else
776 : {
777 : uint16 min;
778 : uint16 max;
779 :
780 : /* ... and for the rest we search by binary chop. */
781 38816 : min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1];
782 38816 : max = lengthof(dsa_size_classes) - 1;
783 :
784 190500 : while (min < max)
785 : {
786 151684 : uint16 mid = (min + max) / 2;
787 151684 : uint16 class_size = dsa_size_classes[mid];
788 :
789 151684 : if (class_size < size)
790 72454 : min = mid + 1;
791 : else
792 79230 : max = mid;
793 : }
794 :
795 38816 : size_class = min;
796 : }
797 : Assert(size <= dsa_size_classes[size_class]);
798 : Assert(size_class == 0 || size > dsa_size_classes[size_class - 1]);
799 :
800 : /* Attempt to allocate an object from the appropriate pool. */
801 1106702 : result = alloc_object(area, size_class);
802 :
803 : /* Check for failure to allocate. */
804 1106702 : if (!DsaPointerIsValid(result))
805 : {
806 : /* Raise error unless asked not to. */
807 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
808 0 : ereport(ERROR,
809 : (errcode(ERRCODE_OUT_OF_MEMORY),
810 : errmsg("out of memory"),
811 : errdetail("Failed on DSA request of size %zu.", size)));
812 0 : return InvalidDsaPointer;
813 : }
814 :
815 : /* Zero-initialize the memory if requested. */
816 1106702 : if ((flags & DSA_ALLOC_ZERO) != 0)
817 544364 : memset(dsa_get_address(area, result), 0, size);
818 :
819 1106702 : return result;
820 : }
821 :
822 : /*
823 : * Free memory obtained with dsa_allocate.
824 : */
825 : void
826 207368 : dsa_free(dsa_area *area, dsa_pointer dp)
827 : {
828 : dsa_segment_map *segment_map;
829 : int pageno;
830 : dsa_pointer span_pointer;
831 : dsa_area_span *span;
832 : char *superblock;
833 : char *object;
834 : size_t size;
835 : int size_class;
836 :
837 : /* Make sure we don't have a stale segment in the slot 'dp' refers to. */
838 207368 : check_for_freed_segments(area);
839 :
840 : /* Locate the object, span and pool. */
841 207368 : segment_map = get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(dp));
842 207368 : pageno = DSA_EXTRACT_OFFSET(dp) / FPM_PAGE_SIZE;
843 207368 : span_pointer = segment_map->pagemap[pageno];
844 207368 : span = dsa_get_address(area, span_pointer);
845 207368 : superblock = dsa_get_address(area, span->start);
846 207368 : object = dsa_get_address(area, dp);
847 207368 : size_class = span->size_class;
848 207368 : size = dsa_size_classes[size_class];
849 :
850 : /*
851 : * Special case for large objects that live in a special span: we return
852 : * those pages directly to the free page manager and free the span.
853 : */
854 207368 : if (span->size_class == DSA_SCLASS_SPAN_LARGE)
855 : {
856 :
857 : #ifdef CLOBBER_FREED_MEMORY
858 : memset(object, 0x7f, span->npages * FPM_PAGE_SIZE);
859 : #endif
860 :
861 : /* Give pages back to free page manager. */
862 4392 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
863 4392 : FreePageManagerPut(segment_map->fpm,
864 4392 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
865 : span->npages);
866 :
867 : /* Move segment to appropriate bin if necessary. */
868 4392 : rebin_segment(area, segment_map);
869 4392 : LWLockRelease(DSA_AREA_LOCK(area));
870 :
871 : /* Unlink span. */
872 4392 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
873 : LW_EXCLUSIVE);
874 4392 : unlink_span(area, span);
875 4392 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
876 : /* Free the span object so it can be reused. */
877 4392 : dsa_free(area, span_pointer);
878 4392 : return;
879 : }
880 :
881 : #ifdef CLOBBER_FREED_MEMORY
882 : memset(object, 0x7f, size);
883 : #endif
884 :
885 202976 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
886 :
887 : /* Put the object on the span's freelist. */
888 : Assert(object >= superblock);
889 : Assert(object < superblock + DSA_SUPERBLOCK_SIZE);
890 : Assert((object - superblock) % size == 0);
891 202976 : NextFreeObjectIndex(object) = span->firstfree;
892 202976 : span->firstfree = (object - superblock) / size;
893 202976 : ++span->nallocatable;
894 :
895 : /*
896 : * See if the span needs to moved to a different fullness class, or be
897 : * freed so its pages can be given back to the segment.
898 : */
899 202976 : if (span->nallocatable == 1 && span->fclass == DSA_FULLNESS_CLASSES - 1)
900 : {
901 : /*
902 : * The block was completely full and is located in the
903 : * highest-numbered fullness class, which is never scanned for free
904 : * chunks. We must move it to the next-lower fullness class.
905 : */
906 204 : unlink_span(area, span);
907 204 : add_span_to_fullness_class(area, span, span_pointer,
908 : DSA_FULLNESS_CLASSES - 2);
909 :
910 : /*
911 : * If this is the only span, and there is no active span, then we
912 : * should probably move this span to fullness class 1. (Otherwise if
913 : * you allocate exactly all the objects in the only span, it moves to
914 : * class 3, then you free them all, it moves to 2, and then is given
915 : * back, leaving no active span).
916 : */
917 : }
918 202772 : else if (span->nallocatable == span->nmax &&
919 9128 : (span->fclass != 1 || span->prevspan != InvalidDsaPointer))
920 : {
921 : /*
922 : * This entire block is free, and it's not the active block for this
923 : * size class. Return the memory to the free page manager. We don't
924 : * do this for the active block to prevent hysteresis: if we
925 : * repeatedly allocate and free the only chunk in the active block, it
926 : * will be very inefficient if we deallocate and reallocate the block
927 : * every time.
928 : */
929 16 : destroy_superblock(area, span_pointer);
930 : }
931 :
932 202976 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
933 : }
934 :
935 : /*
936 : * Obtain a backend-local address for a dsa_pointer. 'dp' must point to
937 : * memory allocated by the given area (possibly in another process) that
938 : * hasn't yet been freed. This may cause a segment to be mapped into the
939 : * current process if required, and may cause freed segments to be unmapped.
940 : */
941 : void *
942 16722296 : dsa_get_address(dsa_area *area, dsa_pointer dp)
943 : {
944 : dsa_segment_index index;
945 : size_t offset;
946 :
947 : /* Convert InvalidDsaPointer to NULL. */
948 16722296 : if (!DsaPointerIsValid(dp))
949 2774562 : return NULL;
950 :
951 : /* Process any requests to detach from freed segments. */
952 13947734 : check_for_freed_segments(area);
953 :
954 : /* Break the dsa_pointer into its components. */
955 13947734 : index = DSA_EXTRACT_SEGMENT_NUMBER(dp);
956 13947734 : offset = DSA_EXTRACT_OFFSET(dp);
957 : Assert(index < DSA_MAX_SEGMENTS);
958 :
959 : /* Check if we need to cause this segment to be mapped in. */
960 13947734 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
961 : {
962 : /* Call for effect (we don't need the result). */
963 31586 : get_segment_by_index(area, index);
964 : }
965 :
966 13947734 : return area->segment_maps[index].mapped_address + offset;
967 : }
968 :
969 : /*
970 : * Pin this area, so that it will continue to exist even if all backends
971 : * detach from it. In that case, the area can still be reattached to if a
972 : * handle has been recorded somewhere.
973 : */
974 : void
975 2036 : dsa_pin(dsa_area *area)
976 : {
977 2036 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
978 2036 : if (area->control->pinned)
979 : {
980 0 : LWLockRelease(DSA_AREA_LOCK(area));
981 0 : elog(ERROR, "dsa_area already pinned");
982 : }
983 2036 : area->control->pinned = true;
984 2036 : ++area->control->refcnt;
985 2036 : LWLockRelease(DSA_AREA_LOCK(area));
986 2036 : }
987 :
988 : /*
989 : * Undo the effects of dsa_pin, so that the given area can be freed when no
990 : * backends are attached to it. May be called only if dsa_pin has been
991 : * called.
992 : */
993 : void
994 0 : dsa_unpin(dsa_area *area)
995 : {
996 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
997 : Assert(area->control->refcnt > 1);
998 0 : if (!area->control->pinned)
999 : {
1000 0 : LWLockRelease(DSA_AREA_LOCK(area));
1001 0 : elog(ERROR, "dsa_area not pinned");
1002 : }
1003 0 : area->control->pinned = false;
1004 0 : --area->control->refcnt;
1005 0 : LWLockRelease(DSA_AREA_LOCK(area));
1006 0 : }
1007 :
1008 : /*
1009 : * Set the total size limit for this area. This limit is checked whenever new
1010 : * segments need to be allocated from the operating system. If the new size
1011 : * limit is already exceeded, this has no immediate effect.
1012 : *
1013 : * Note that the total virtual memory usage may be temporarily larger than
1014 : * this limit when segments have been freed, but not yet detached by all
1015 : * backends that have attached to them.
1016 : */
1017 : void
1018 3836 : dsa_set_size_limit(dsa_area *area, size_t limit)
1019 : {
1020 3836 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1021 3836 : area->control->max_total_segment_size = limit;
1022 3836 : LWLockRelease(DSA_AREA_LOCK(area));
1023 3836 : }
1024 :
1025 : /* Return the total size of all active segments */
1026 : size_t
1027 2370 : dsa_get_total_size(dsa_area *area)
1028 : {
1029 : size_t size;
1030 :
1031 2370 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1032 2370 : size = area->control->total_segment_size;
1033 2370 : LWLockRelease(DSA_AREA_LOCK(area));
1034 :
1035 2370 : return size;
1036 : }
1037 :
1038 : /*
1039 : * Aggressively free all spare memory in the hope of returning DSM segments to
1040 : * the operating system.
1041 : */
1042 : void
1043 0 : dsa_trim(dsa_area *area)
1044 : {
1045 : int size_class;
1046 :
1047 : /*
1048 : * Trim in reverse pool order so we get to the spans-of-spans last, just
1049 : * in case any become entirely free while processing all the other pools.
1050 : */
1051 0 : for (size_class = DSA_NUM_SIZE_CLASSES - 1; size_class >= 0; --size_class)
1052 : {
1053 0 : dsa_area_pool *pool = &area->control->pools[size_class];
1054 : dsa_pointer span_pointer;
1055 :
1056 0 : if (size_class == DSA_SCLASS_SPAN_LARGE)
1057 : {
1058 : /* Large object frees give back segments aggressively already. */
1059 0 : continue;
1060 : }
1061 :
1062 : /*
1063 : * Search fullness class 1 only. That is where we expect to find an
1064 : * entirely empty superblock (entirely empty superblocks in other
1065 : * fullness classes are returned to the free page map by dsa_free).
1066 : */
1067 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
1068 0 : span_pointer = pool->spans[1];
1069 0 : while (DsaPointerIsValid(span_pointer))
1070 : {
1071 0 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1072 0 : dsa_pointer next = span->nextspan;
1073 :
1074 0 : if (span->nallocatable == span->nmax)
1075 0 : destroy_superblock(area, span_pointer);
1076 :
1077 0 : span_pointer = next;
1078 : }
1079 0 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
1080 : }
1081 0 : }
1082 :
1083 : /*
1084 : * Print out debugging information about the internal state of the shared
1085 : * memory area.
1086 : */
1087 : void
1088 0 : dsa_dump(dsa_area *area)
1089 : {
1090 : size_t i,
1091 : j;
1092 :
1093 : /*
1094 : * Note: This gives an inconsistent snapshot as it acquires and releases
1095 : * individual locks as it goes...
1096 : */
1097 :
1098 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1099 0 : check_for_freed_segments_locked(area);
1100 0 : fprintf(stderr, "dsa_area handle %x:\n", area->control->handle);
1101 0 : fprintf(stderr, " max_total_segment_size: %zu\n",
1102 0 : area->control->max_total_segment_size);
1103 0 : fprintf(stderr, " total_segment_size: %zu\n",
1104 0 : area->control->total_segment_size);
1105 0 : fprintf(stderr, " refcnt: %d\n", area->control->refcnt);
1106 0 : fprintf(stderr, " pinned: %c\n", area->control->pinned ? 't' : 'f');
1107 0 : fprintf(stderr, " segment bins:\n");
1108 0 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1109 : {
1110 0 : if (area->control->segment_bins[i] != DSA_SEGMENT_INDEX_NONE)
1111 : {
1112 : dsa_segment_index segment_index;
1113 :
1114 0 : if (i == 0)
1115 0 : fprintf(stderr,
1116 : " segment bin %zu (no contiguous free pages):\n", i);
1117 : else
1118 0 : fprintf(stderr,
1119 : " segment bin %zu (at least %d contiguous pages free):\n",
1120 0 : i, 1 << (i - 1));
1121 0 : segment_index = area->control->segment_bins[i];
1122 0 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
1123 : {
1124 : dsa_segment_map *segment_map;
1125 :
1126 : segment_map =
1127 0 : get_segment_by_index(area, segment_index);
1128 :
1129 0 : fprintf(stderr,
1130 : " segment index %zu, usable_pages = %zu, "
1131 : "contiguous_pages = %zu, mapped at %p\n",
1132 : segment_index,
1133 0 : segment_map->header->usable_pages,
1134 0 : fpm_largest(segment_map->fpm),
1135 : segment_map->mapped_address);
1136 0 : segment_index = segment_map->header->next;
1137 : }
1138 : }
1139 : }
1140 0 : LWLockRelease(DSA_AREA_LOCK(area));
1141 :
1142 0 : fprintf(stderr, " pools:\n");
1143 0 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1144 : {
1145 0 : bool found = false;
1146 :
1147 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, i), LW_EXCLUSIVE);
1148 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
1149 0 : if (DsaPointerIsValid(area->control->pools[i].spans[j]))
1150 0 : found = true;
1151 0 : if (found)
1152 : {
1153 0 : if (i == DSA_SCLASS_BLOCK_OF_SPANS)
1154 0 : fprintf(stderr, " pool for blocks of span objects:\n");
1155 0 : else if (i == DSA_SCLASS_SPAN_LARGE)
1156 0 : fprintf(stderr, " pool for large object spans:\n");
1157 : else
1158 0 : fprintf(stderr,
1159 : " pool for size class %zu (object size %hu bytes):\n",
1160 0 : i, dsa_size_classes[i]);
1161 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
1162 : {
1163 0 : if (!DsaPointerIsValid(area->control->pools[i].spans[j]))
1164 0 : fprintf(stderr, " fullness class %zu is empty\n", j);
1165 : else
1166 : {
1167 0 : dsa_pointer span_pointer = area->control->pools[i].spans[j];
1168 :
1169 0 : fprintf(stderr, " fullness class %zu:\n", j);
1170 0 : while (DsaPointerIsValid(span_pointer))
1171 : {
1172 : dsa_area_span *span;
1173 :
1174 0 : span = dsa_get_address(area, span_pointer);
1175 0 : fprintf(stderr,
1176 : " span descriptor at "
1177 : DSA_POINTER_FORMAT ", superblock at "
1178 : DSA_POINTER_FORMAT
1179 : ", pages = %zu, objects free = %hu/%hu\n",
1180 : span_pointer, span->start, span->npages,
1181 0 : span->nallocatable, span->nmax);
1182 0 : span_pointer = span->nextspan;
1183 : }
1184 : }
1185 : }
1186 : }
1187 0 : LWLockRelease(DSA_SCLASS_LOCK(area, i));
1188 : }
1189 0 : }
1190 :
1191 : /*
1192 : * Return the smallest size that you can successfully provide to
1193 : * dsa_create_in_place.
1194 : */
1195 : size_t
1196 3636 : dsa_minimum_size(void)
1197 : {
1198 : size_t size;
1199 3636 : int pages = 0;
1200 :
1201 3636 : size = MAXALIGN(sizeof(dsa_area_control)) +
1202 : MAXALIGN(sizeof(FreePageManager));
1203 :
1204 : /* Figure out how many pages we need, including the page map... */
1205 10908 : while (((size + FPM_PAGE_SIZE - 1) / FPM_PAGE_SIZE) > pages)
1206 : {
1207 7272 : ++pages;
1208 7272 : size += sizeof(dsa_pointer);
1209 : }
1210 :
1211 3636 : return pages * FPM_PAGE_SIZE;
1212 : }
1213 :
1214 : /*
1215 : * Workhorse function for dsa_create and dsa_create_in_place.
1216 : */
1217 : static dsa_area *
1218 2924 : create_internal(void *place, size_t size,
1219 : int tranche_id,
1220 : dsm_handle control_handle,
1221 : dsm_segment *control_segment,
1222 : size_t init_segment_size, size_t max_segment_size)
1223 : {
1224 : dsa_area_control *control;
1225 : dsa_area *area;
1226 : dsa_segment_map *segment_map;
1227 : size_t usable_pages;
1228 : size_t total_pages;
1229 : size_t metadata_bytes;
1230 : int i;
1231 :
1232 : /* Check the initial and maximum block sizes */
1233 : Assert(init_segment_size >= DSA_MIN_SEGMENT_SIZE);
1234 : Assert(max_segment_size >= init_segment_size);
1235 : Assert(max_segment_size <= DSA_MAX_SEGMENT_SIZE);
1236 :
1237 : /* Sanity check on the space we have to work in. */
1238 2924 : if (size < dsa_minimum_size())
1239 0 : elog(ERROR, "dsa_area space must be at least %zu, but %zu provided",
1240 : dsa_minimum_size(), size);
1241 :
1242 : /* Now figure out how much space is usable */
1243 2924 : total_pages = size / FPM_PAGE_SIZE;
1244 2924 : metadata_bytes =
1245 : MAXALIGN(sizeof(dsa_area_control)) +
1246 2924 : MAXALIGN(sizeof(FreePageManager)) +
1247 : total_pages * sizeof(dsa_pointer);
1248 : /* Add padding up to next page boundary. */
1249 2924 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
1250 2924 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
1251 : Assert(metadata_bytes <= size);
1252 2924 : usable_pages = (size - metadata_bytes) / FPM_PAGE_SIZE;
1253 :
1254 : /*
1255 : * Initialize the dsa_area_control object located at the start of the
1256 : * space.
1257 : */
1258 2924 : control = (dsa_area_control *) place;
1259 2924 : memset(place, 0, sizeof(*control));
1260 2924 : control->segment_header.magic =
1261 2924 : DSA_SEGMENT_HEADER_MAGIC ^ control_handle ^ 0;
1262 2924 : control->segment_header.next = DSA_SEGMENT_INDEX_NONE;
1263 2924 : control->segment_header.prev = DSA_SEGMENT_INDEX_NONE;
1264 2924 : control->segment_header.usable_pages = usable_pages;
1265 2924 : control->segment_header.freed = false;
1266 2924 : control->segment_header.size = size;
1267 2924 : control->handle = control_handle;
1268 2924 : control->init_segment_size = init_segment_size;
1269 2924 : control->max_segment_size = max_segment_size;
1270 2924 : control->max_total_segment_size = (size_t) -1;
1271 2924 : control->total_segment_size = size;
1272 2924 : control->segment_handles[0] = control_handle;
1273 49708 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1274 46784 : control->segment_bins[i] = DSA_SEGMENT_INDEX_NONE;
1275 2924 : control->refcnt = 1;
1276 2924 : control->lwlock_tranche_id = tranche_id;
1277 :
1278 : /*
1279 : * Create the dsa_area object that this backend will use to access the
1280 : * area. Other backends will need to obtain their own dsa_area object by
1281 : * attaching.
1282 : */
1283 2924 : area = palloc(sizeof(dsa_area));
1284 2924 : area->control = control;
1285 2924 : area->resowner = CurrentResourceOwner;
1286 2924 : memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
1287 2924 : area->high_segment_index = 0;
1288 2924 : area->freed_segment_counter = 0;
1289 2924 : LWLockInitialize(&control->lock, control->lwlock_tranche_id);
1290 114036 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1291 111112 : LWLockInitialize(DSA_SCLASS_LOCK(area, i),
1292 : control->lwlock_tranche_id);
1293 :
1294 : /* Set up the segment map for this process's mapping. */
1295 2924 : segment_map = &area->segment_maps[0];
1296 2924 : segment_map->segment = control_segment;
1297 2924 : segment_map->mapped_address = place;
1298 2924 : segment_map->header = (dsa_segment_header *) place;
1299 2924 : segment_map->fpm = (FreePageManager *)
1300 2924 : (segment_map->mapped_address +
1301 : MAXALIGN(sizeof(dsa_area_control)));
1302 2924 : segment_map->pagemap = (dsa_pointer *)
1303 2924 : (segment_map->mapped_address +
1304 2924 : MAXALIGN(sizeof(dsa_area_control)) +
1305 : MAXALIGN(sizeof(FreePageManager)));
1306 :
1307 : /* Set up the free page map. */
1308 2924 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
1309 : /* There can be 0 usable pages if size is dsa_minimum_size(). */
1310 :
1311 2924 : if (usable_pages > 0)
1312 2212 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
1313 : usable_pages);
1314 :
1315 : /* Put this segment into the appropriate bin. */
1316 2924 : control->segment_bins[contiguous_pages_to_segment_bin(usable_pages)] = 0;
1317 2924 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
1318 :
1319 2924 : return area;
1320 : }
1321 :
1322 : /*
1323 : * Workhorse function for dsa_attach and dsa_attach_in_place.
1324 : */
1325 : static dsa_area *
1326 40438 : attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
1327 : {
1328 : dsa_area_control *control;
1329 : dsa_area *area;
1330 : dsa_segment_map *segment_map;
1331 :
1332 40438 : control = (dsa_area_control *) place;
1333 : Assert(control->handle == handle);
1334 : Assert(control->segment_handles[0] == handle);
1335 : Assert(control->segment_header.magic ==
1336 : (DSA_SEGMENT_HEADER_MAGIC ^ handle ^ 0));
1337 :
1338 : /* Build the backend-local area object. */
1339 40438 : area = palloc(sizeof(dsa_area));
1340 40438 : area->control = control;
1341 40438 : area->resowner = CurrentResourceOwner;
1342 40438 : memset(&area->segment_maps[0], 0,
1343 : sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
1344 40438 : area->high_segment_index = 0;
1345 :
1346 : /* Set up the segment map for this process's mapping. */
1347 40438 : segment_map = &area->segment_maps[0];
1348 40438 : segment_map->segment = segment; /* NULL for in-place */
1349 40438 : segment_map->mapped_address = place;
1350 40438 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
1351 40438 : segment_map->fpm = (FreePageManager *)
1352 40438 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)));
1353 40438 : segment_map->pagemap = (dsa_pointer *)
1354 40438 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)) +
1355 : MAXALIGN(sizeof(FreePageManager)));
1356 :
1357 : /* Bump the reference count. */
1358 40438 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1359 40438 : if (control->refcnt == 0)
1360 : {
1361 : /* We can't attach to a DSA area that has already been destroyed. */
1362 0 : ereport(ERROR,
1363 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1364 : errmsg("could not attach to dynamic shared area")));
1365 : }
1366 40438 : ++control->refcnt;
1367 40438 : area->freed_segment_counter = area->control->freed_segment_counter;
1368 40438 : LWLockRelease(DSA_AREA_LOCK(area));
1369 :
1370 40438 : return area;
1371 : }
1372 :
1373 : /*
1374 : * Add a new span to fullness class 1 of the indicated pool.
1375 : */
1376 : static void
1377 24548 : init_span(dsa_area *area,
1378 : dsa_pointer span_pointer,
1379 : dsa_area_pool *pool, dsa_pointer start, size_t npages,
1380 : uint16 size_class)
1381 : {
1382 24548 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1383 24548 : size_t obsize = dsa_size_classes[size_class];
1384 :
1385 : /*
1386 : * The per-pool lock must be held because we manipulate the span list for
1387 : * this pool.
1388 : */
1389 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1390 :
1391 : /* Push this span onto the front of the span list for fullness class 1. */
1392 24548 : if (DsaPointerIsValid(pool->spans[1]))
1393 : {
1394 : dsa_area_span *head = (dsa_area_span *)
1395 4038 : dsa_get_address(area, pool->spans[1]);
1396 :
1397 4038 : head->prevspan = span_pointer;
1398 : }
1399 24548 : span->pool = DsaAreaPoolToDsaPointer(area, pool);
1400 24548 : span->nextspan = pool->spans[1];
1401 24548 : span->prevspan = InvalidDsaPointer;
1402 24548 : pool->spans[1] = span_pointer;
1403 :
1404 24548 : span->start = start;
1405 24548 : span->npages = npages;
1406 24548 : span->size_class = size_class;
1407 24548 : span->ninitialized = 0;
1408 24548 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1409 : {
1410 : /*
1411 : * A block-of-spans contains its own descriptor, so mark one object as
1412 : * initialized and reduce the count of allocatable objects by one.
1413 : * Doing this here has the side effect of also reducing nmax by one,
1414 : * which is important to make sure we free this object at the correct
1415 : * time.
1416 : */
1417 2378 : span->ninitialized = 1;
1418 2378 : span->nallocatable = FPM_PAGE_SIZE / obsize - 1;
1419 : }
1420 22170 : else if (size_class != DSA_SCLASS_SPAN_LARGE)
1421 16664 : span->nallocatable = DSA_SUPERBLOCK_SIZE / obsize;
1422 24548 : span->firstfree = DSA_SPAN_NOTHING_FREE;
1423 24548 : span->nmax = span->nallocatable;
1424 24548 : span->fclass = 1;
1425 24548 : }
1426 :
1427 : /*
1428 : * Transfer the first span in one fullness class to the head of another
1429 : * fullness class.
1430 : */
1431 : static bool
1432 39704 : transfer_first_span(dsa_area *area,
1433 : dsa_area_pool *pool, int fromclass, int toclass)
1434 : {
1435 : dsa_pointer span_pointer;
1436 : dsa_area_span *span;
1437 : dsa_area_span *nextspan;
1438 :
1439 : /* Can't do it if source list is empty. */
1440 39704 : span_pointer = pool->spans[fromclass];
1441 39704 : if (!DsaPointerIsValid(span_pointer))
1442 38084 : return false;
1443 :
1444 : /* Remove span from head of source list. */
1445 1620 : span = dsa_get_address(area, span_pointer);
1446 1620 : pool->spans[fromclass] = span->nextspan;
1447 1620 : if (DsaPointerIsValid(span->nextspan))
1448 : {
1449 : nextspan = (dsa_area_span *)
1450 84 : dsa_get_address(area, span->nextspan);
1451 84 : nextspan->prevspan = InvalidDsaPointer;
1452 : }
1453 :
1454 : /* Add span to head of target list. */
1455 1620 : span->nextspan = pool->spans[toclass];
1456 1620 : pool->spans[toclass] = span_pointer;
1457 1620 : if (DsaPointerIsValid(span->nextspan))
1458 : {
1459 : nextspan = (dsa_area_span *)
1460 586 : dsa_get_address(area, span->nextspan);
1461 586 : nextspan->prevspan = span_pointer;
1462 : }
1463 1620 : span->fclass = toclass;
1464 :
1465 1620 : return true;
1466 : }
1467 :
1468 : /*
1469 : * Allocate one object of the requested size class from the given area.
1470 : */
1471 : static inline dsa_pointer
1472 1128872 : alloc_object(dsa_area *area, int size_class)
1473 : {
1474 1128872 : dsa_area_pool *pool = &area->control->pools[size_class];
1475 : dsa_area_span *span;
1476 : dsa_pointer block;
1477 : dsa_pointer result;
1478 : char *object;
1479 : size_t size;
1480 :
1481 : /*
1482 : * Even though ensure_active_superblock can in turn call alloc_object if
1483 : * it needs to allocate a new span, that's always from a different pool,
1484 : * and the order of lock acquisition is always the same, so it's OK that
1485 : * we hold this lock for the duration of this function.
1486 : */
1487 : Assert(!LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1488 1128872 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
1489 :
1490 : /*
1491 : * If there's no active superblock, we must successfully obtain one or
1492 : * fail the request.
1493 : */
1494 1128872 : if (!DsaPointerIsValid(pool->spans[1]) &&
1495 19188 : !ensure_active_superblock(area, pool, size_class))
1496 : {
1497 0 : result = InvalidDsaPointer;
1498 : }
1499 : else
1500 : {
1501 : /*
1502 : * There should be a block in fullness class 1 at this point, and it
1503 : * should never be completely full. Thus we can either pop an object
1504 : * from the free list or, failing that, initialize a new object.
1505 : */
1506 : Assert(DsaPointerIsValid(pool->spans[1]));
1507 : span = (dsa_area_span *)
1508 1128872 : dsa_get_address(area, pool->spans[1]);
1509 : Assert(span->nallocatable > 0);
1510 1128872 : block = span->start;
1511 : Assert(size_class < DSA_NUM_SIZE_CLASSES);
1512 1128872 : size = dsa_size_classes[size_class];
1513 1128872 : if (span->firstfree != DSA_SPAN_NOTHING_FREE)
1514 : {
1515 181422 : result = block + span->firstfree * size;
1516 181422 : object = dsa_get_address(area, result);
1517 181422 : span->firstfree = NextFreeObjectIndex(object);
1518 : }
1519 : else
1520 : {
1521 947450 : result = block + span->ninitialized * size;
1522 947450 : ++span->ninitialized;
1523 : }
1524 1128872 : --span->nallocatable;
1525 :
1526 : /* If it's now full, move it to the highest-numbered fullness class. */
1527 1128872 : if (span->nallocatable == 0)
1528 1474 : transfer_first_span(area, pool, 1, DSA_FULLNESS_CLASSES - 1);
1529 : }
1530 :
1531 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1532 1128872 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
1533 :
1534 1128872 : return result;
1535 : }
1536 :
1537 : /*
1538 : * Ensure an active (i.e. fullness class 1) superblock, unless all existing
1539 : * superblocks are completely full and no more can be allocated.
1540 : *
1541 : * Fullness classes K of 0..N are loosely intended to represent blocks whose
1542 : * utilization percentage is at least K/N, but we only enforce this rigorously
1543 : * for the highest-numbered fullness class, which always contains exactly
1544 : * those blocks that are completely full. It's otherwise acceptable for a
1545 : * block to be in a higher-numbered fullness class than the one to which it
1546 : * logically belongs. In addition, the active block, which is always the
1547 : * first block in fullness class 1, is permitted to have a higher allocation
1548 : * percentage than would normally be allowable for that fullness class; we
1549 : * don't move it until it's completely full, and then it goes to the
1550 : * highest-numbered fullness class.
1551 : *
1552 : * It might seem odd that the active block is the head of fullness class 1
1553 : * rather than fullness class 0, but experience with other allocators has
1554 : * shown that it's usually better to allocate from a block that's moderately
1555 : * full rather than one that's nearly empty. Insofar as is reasonably
1556 : * possible, we want to avoid performing new allocations in a block that would
1557 : * otherwise become empty soon.
1558 : */
1559 : static bool
1560 19188 : ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
1561 : int size_class)
1562 : {
1563 : dsa_pointer span_pointer;
1564 : dsa_pointer start_pointer;
1565 19188 : size_t obsize = dsa_size_classes[size_class];
1566 : size_t nmax;
1567 : int fclass;
1568 19188 : size_t npages = 1;
1569 : size_t first_page;
1570 : size_t i;
1571 : dsa_segment_map *segment_map;
1572 :
1573 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1574 :
1575 : /*
1576 : * Compute the number of objects that will fit in a block of this size
1577 : * class. Span-of-spans blocks are just a single page, and the first
1578 : * object isn't available for use because it describes the block-of-spans
1579 : * itself.
1580 : */
1581 19188 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1582 2378 : nmax = FPM_PAGE_SIZE / obsize - 1;
1583 : else
1584 16810 : nmax = DSA_SUPERBLOCK_SIZE / obsize;
1585 :
1586 : /*
1587 : * If fullness class 1 is empty, try to find a span to put in it by
1588 : * scanning higher-numbered fullness classes (excluding the last one,
1589 : * whose blocks are certain to all be completely full).
1590 : */
1591 38376 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
1592 : {
1593 19188 : span_pointer = pool->spans[fclass];
1594 :
1595 19438 : while (DsaPointerIsValid(span_pointer))
1596 : {
1597 : int tfclass;
1598 : dsa_area_span *span;
1599 : dsa_area_span *nextspan;
1600 : dsa_area_span *prevspan;
1601 : dsa_pointer next_span_pointer;
1602 :
1603 : span = (dsa_area_span *)
1604 250 : dsa_get_address(area, span_pointer);
1605 250 : next_span_pointer = span->nextspan;
1606 :
1607 : /* Figure out what fullness class should contain this span. */
1608 250 : tfclass = (nmax - span->nallocatable)
1609 250 : * (DSA_FULLNESS_CLASSES - 1) / nmax;
1610 :
1611 : /* Look up next span. */
1612 250 : if (DsaPointerIsValid(span->nextspan))
1613 : nextspan = (dsa_area_span *)
1614 104 : dsa_get_address(area, span->nextspan);
1615 : else
1616 146 : nextspan = NULL;
1617 :
1618 : /*
1619 : * If utilization has dropped enough that this now belongs in some
1620 : * other fullness class, move it there.
1621 : */
1622 250 : if (tfclass < fclass)
1623 : {
1624 : /* Remove from the current fullness class list. */
1625 0 : if (pool->spans[fclass] == span_pointer)
1626 : {
1627 : /* It was the head; remove it. */
1628 : Assert(!DsaPointerIsValid(span->prevspan));
1629 0 : pool->spans[fclass] = span->nextspan;
1630 0 : if (nextspan != NULL)
1631 0 : nextspan->prevspan = InvalidDsaPointer;
1632 : }
1633 : else
1634 : {
1635 : /* It was not the head. */
1636 : Assert(DsaPointerIsValid(span->prevspan));
1637 : prevspan = (dsa_area_span *)
1638 0 : dsa_get_address(area, span->prevspan);
1639 0 : prevspan->nextspan = span->nextspan;
1640 : }
1641 0 : if (nextspan != NULL)
1642 0 : nextspan->prevspan = span->prevspan;
1643 :
1644 : /* Push onto the head of the new fullness class list. */
1645 0 : span->nextspan = pool->spans[tfclass];
1646 0 : pool->spans[tfclass] = span_pointer;
1647 0 : span->prevspan = InvalidDsaPointer;
1648 0 : if (DsaPointerIsValid(span->nextspan))
1649 : {
1650 : nextspan = (dsa_area_span *)
1651 0 : dsa_get_address(area, span->nextspan);
1652 0 : nextspan->prevspan = span_pointer;
1653 : }
1654 0 : span->fclass = tfclass;
1655 : }
1656 :
1657 : /* Advance to next span on list. */
1658 250 : span_pointer = next_span_pointer;
1659 : }
1660 :
1661 : /* Stop now if we found a suitable block. */
1662 19188 : if (DsaPointerIsValid(pool->spans[1]))
1663 0 : return true;
1664 : }
1665 :
1666 : /*
1667 : * If there are no blocks that properly belong in fullness class 1, pick
1668 : * one from some other fullness class and move it there anyway, so that we
1669 : * have an allocation target. Our last choice is to transfer a block
1670 : * that's almost empty (and might become completely empty soon if left
1671 : * alone), but even that is better than failing, which is what we must do
1672 : * if there are no blocks at all with freespace.
1673 : */
1674 : Assert(!DsaPointerIsValid(pool->spans[1]));
1675 38230 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
1676 19188 : if (transfer_first_span(area, pool, fclass, 1))
1677 146 : return true;
1678 38084 : if (!DsaPointerIsValid(pool->spans[1]) &&
1679 19042 : transfer_first_span(area, pool, 0, 1))
1680 0 : return true;
1681 :
1682 : /*
1683 : * We failed to find an existing span with free objects, so we need to
1684 : * allocate a new superblock and construct a new span to manage it.
1685 : *
1686 : * First, get a dsa_area_span object to describe the new superblock block
1687 : * ... unless this allocation is for a dsa_area_span object, in which case
1688 : * that's surely not going to work. We handle that case by storing the
1689 : * span describing a block-of-spans inline.
1690 : */
1691 19042 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
1692 : {
1693 16664 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
1694 16664 : if (!DsaPointerIsValid(span_pointer))
1695 0 : return false;
1696 16664 : npages = DSA_PAGES_PER_SUPERBLOCK;
1697 : }
1698 :
1699 : /* Find or create a segment and allocate the superblock. */
1700 19042 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1701 19042 : segment_map = get_best_segment(area, npages);
1702 19042 : if (segment_map == NULL)
1703 : {
1704 1722 : segment_map = make_new_segment(area, npages);
1705 1722 : if (segment_map == NULL)
1706 : {
1707 0 : LWLockRelease(DSA_AREA_LOCK(area));
1708 0 : return false;
1709 : }
1710 : }
1711 :
1712 : /*
1713 : * This shouldn't happen: get_best_segment() or make_new_segment()
1714 : * promised that we can successfully allocate npages.
1715 : */
1716 19042 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
1717 0 : elog(FATAL,
1718 : "dsa_allocate could not find %zu free pages for superblock",
1719 : npages);
1720 19042 : LWLockRelease(DSA_AREA_LOCK(area));
1721 :
1722 : /* Compute the start of the superblock. */
1723 19042 : start_pointer =
1724 19042 : DSA_MAKE_POINTER(get_segment_index(area, segment_map),
1725 : first_page * FPM_PAGE_SIZE);
1726 :
1727 : /*
1728 : * If this is a block-of-spans, carve the descriptor right out of the
1729 : * allocated space.
1730 : */
1731 19042 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1732 : {
1733 : /*
1734 : * We have a pointer into the segment. We need to build a dsa_pointer
1735 : * from the segment index and offset into the segment.
1736 : */
1737 2378 : span_pointer = start_pointer;
1738 : }
1739 :
1740 : /* Initialize span and pagemap. */
1741 19042 : init_span(area, span_pointer, pool, start_pointer, npages, size_class);
1742 288044 : for (i = 0; i < npages; ++i)
1743 269002 : segment_map->pagemap[first_page + i] = span_pointer;
1744 :
1745 19042 : return true;
1746 : }
1747 :
1748 : /*
1749 : * Return the segment map corresponding to a given segment index, mapping the
1750 : * segment in if necessary. For internal segment book-keeping, this is called
1751 : * with the area lock held. It is also called by dsa_free and dsa_get_address
1752 : * without any locking, relying on the fact they have a known live segment
1753 : * index and they always call check_for_freed_segments to ensures that any
1754 : * freed segment occupying the same slot is detached first.
1755 : */
1756 : static dsa_segment_map *
1757 263380 : get_segment_by_index(dsa_area *area, dsa_segment_index index)
1758 : {
1759 263380 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
1760 : {
1761 : dsm_handle handle;
1762 : dsm_segment *segment;
1763 : dsa_segment_map *segment_map;
1764 : ResourceOwner oldowner;
1765 :
1766 : /*
1767 : * If we are reached by dsa_free or dsa_get_address, there must be at
1768 : * least one object allocated in the referenced segment. Otherwise,
1769 : * their caller has a double-free or access-after-free bug, which we
1770 : * have no hope of detecting. So we know it's safe to access this
1771 : * array slot without holding a lock; it won't change underneath us.
1772 : * Furthermore, we know that we can see the latest contents of the
1773 : * slot, as explained in check_for_freed_segments, which those
1774 : * functions call before arriving here.
1775 : */
1776 32286 : handle = area->control->segment_handles[index];
1777 :
1778 : /* It's an error to try to access an unused slot. */
1779 32286 : if (handle == DSM_HANDLE_INVALID)
1780 0 : elog(ERROR,
1781 : "dsa_area could not attach to a segment that has been freed");
1782 :
1783 32286 : oldowner = CurrentResourceOwner;
1784 32286 : CurrentResourceOwner = area->resowner;
1785 32286 : segment = dsm_attach(handle);
1786 32286 : CurrentResourceOwner = oldowner;
1787 32286 : if (segment == NULL)
1788 0 : elog(ERROR, "dsa_area could not attach to segment");
1789 32286 : segment_map = &area->segment_maps[index];
1790 32286 : segment_map->segment = segment;
1791 32286 : segment_map->mapped_address = dsm_segment_address(segment);
1792 32286 : segment_map->header =
1793 32286 : (dsa_segment_header *) segment_map->mapped_address;
1794 32286 : segment_map->fpm = (FreePageManager *)
1795 32286 : (segment_map->mapped_address +
1796 : MAXALIGN(sizeof(dsa_segment_header)));
1797 32286 : segment_map->pagemap = (dsa_pointer *)
1798 32286 : (segment_map->mapped_address +
1799 32286 : MAXALIGN(sizeof(dsa_segment_header)) +
1800 : MAXALIGN(sizeof(FreePageManager)));
1801 :
1802 : /* Remember the highest index this backend has ever mapped. */
1803 32286 : if (area->high_segment_index < index)
1804 32282 : area->high_segment_index = index;
1805 :
1806 : Assert(segment_map->header->magic ==
1807 : (DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ index));
1808 : }
1809 :
1810 : /*
1811 : * Callers of dsa_get_address() and dsa_free() don't hold the area lock,
1812 : * but it's a bug in the calling code and undefined behavior if the
1813 : * address is not live (ie if the segment might possibly have been freed,
1814 : * they're trying to use a dangling pointer).
1815 : *
1816 : * For dsa.c code that holds the area lock to manipulate segment_bins
1817 : * lists, it would be a bug if we ever reach a freed segment here. After
1818 : * it's marked as freed, the only thing any backend should do with it is
1819 : * unmap it, and it should always have done that in
1820 : * check_for_freed_segments_locked() before arriving here to resolve an
1821 : * index to a segment_map.
1822 : *
1823 : * Either way we can assert that we aren't returning a freed segment.
1824 : */
1825 : Assert(!area->segment_maps[index].header->freed);
1826 :
1827 263380 : return &area->segment_maps[index];
1828 : }
1829 :
1830 : /*
1831 : * Return a superblock to the free page manager. If the underlying segment
1832 : * has become entirely free, then return it to the operating system.
1833 : *
1834 : * The appropriate pool lock must be held.
1835 : */
1836 : static void
1837 16 : destroy_superblock(dsa_area *area, dsa_pointer span_pointer)
1838 : {
1839 16 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1840 16 : int size_class = span->size_class;
1841 : dsa_segment_map *segment_map;
1842 :
1843 :
1844 : /* Remove it from its fullness class list. */
1845 16 : unlink_span(area, span);
1846 :
1847 : /*
1848 : * Note: Here we acquire the area lock while we already hold a per-pool
1849 : * lock. We never hold the area lock and then take a pool lock, or we
1850 : * could deadlock.
1851 : */
1852 16 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1853 16 : check_for_freed_segments_locked(area);
1854 : segment_map =
1855 16 : get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(span->start));
1856 16 : FreePageManagerPut(segment_map->fpm,
1857 16 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
1858 : span->npages);
1859 : /* Check if the segment is now entirely free. */
1860 16 : if (fpm_largest(segment_map->fpm) == segment_map->header->usable_pages)
1861 : {
1862 0 : dsa_segment_index index = get_segment_index(area, segment_map);
1863 :
1864 : /* If it's not the segment with extra control data, free it. */
1865 0 : if (index != 0)
1866 : {
1867 : /*
1868 : * Give it back to the OS, and allow other backends to detect that
1869 : * they need to detach.
1870 : */
1871 0 : unlink_segment(area, segment_map);
1872 0 : segment_map->header->freed = true;
1873 : Assert(area->control->total_segment_size >=
1874 : segment_map->header->size);
1875 0 : area->control->total_segment_size -=
1876 0 : segment_map->header->size;
1877 0 : dsm_unpin_segment(dsm_segment_handle(segment_map->segment));
1878 0 : dsm_detach(segment_map->segment);
1879 0 : area->control->segment_handles[index] = DSM_HANDLE_INVALID;
1880 0 : ++area->control->freed_segment_counter;
1881 0 : segment_map->segment = NULL;
1882 0 : segment_map->header = NULL;
1883 0 : segment_map->mapped_address = NULL;
1884 : }
1885 : }
1886 :
1887 : /* Move segment to appropriate bin if necessary. */
1888 16 : if (segment_map->header != NULL)
1889 16 : rebin_segment(area, segment_map);
1890 :
1891 16 : LWLockRelease(DSA_AREA_LOCK(area));
1892 :
1893 : /*
1894 : * Span-of-spans blocks store the span which describes them within the
1895 : * block itself, so freeing the storage implicitly frees the descriptor
1896 : * also. If this is a block of any other type, we need to separately free
1897 : * the span object also. This recursive call to dsa_free will acquire the
1898 : * span pool's lock. We can't deadlock because the acquisition order is
1899 : * always some other pool and then the span pool.
1900 : */
1901 16 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
1902 16 : dsa_free(area, span_pointer);
1903 16 : }
1904 :
1905 : static void
1906 4612 : unlink_span(dsa_area *area, dsa_area_span *span)
1907 : {
1908 4612 : if (DsaPointerIsValid(span->nextspan))
1909 : {
1910 3750 : dsa_area_span *next = dsa_get_address(area, span->nextspan);
1911 :
1912 3750 : next->prevspan = span->prevspan;
1913 : }
1914 4612 : if (DsaPointerIsValid(span->prevspan))
1915 : {
1916 1882 : dsa_area_span *prev = dsa_get_address(area, span->prevspan);
1917 :
1918 1882 : prev->nextspan = span->nextspan;
1919 : }
1920 : else
1921 : {
1922 2730 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
1923 :
1924 2730 : pool->spans[span->fclass] = span->nextspan;
1925 : }
1926 4612 : }
1927 :
1928 : static void
1929 204 : add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
1930 : dsa_pointer span_pointer,
1931 : int fclass)
1932 : {
1933 204 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
1934 :
1935 204 : if (DsaPointerIsValid(pool->spans[fclass]))
1936 : {
1937 86 : dsa_area_span *head = dsa_get_address(area,
1938 : pool->spans[fclass]);
1939 :
1940 86 : head->prevspan = span_pointer;
1941 : }
1942 204 : span->prevspan = InvalidDsaPointer;
1943 204 : span->nextspan = pool->spans[fclass];
1944 204 : pool->spans[fclass] = span_pointer;
1945 204 : span->fclass = fclass;
1946 204 : }
1947 :
1948 : /*
1949 : * Detach from an area that was either created or attached to by this process.
1950 : */
1951 : void
1952 42820 : dsa_detach(dsa_area *area)
1953 : {
1954 : int i;
1955 :
1956 : /* Detach from all segments. */
1957 119656 : for (i = 0; i <= area->high_segment_index; ++i)
1958 76836 : if (area->segment_maps[i].segment != NULL)
1959 34084 : dsm_detach(area->segment_maps[i].segment);
1960 :
1961 : /*
1962 : * Note that 'detaching' (= detaching from DSM segments) doesn't include
1963 : * 'releasing' (= adjusting the reference count). It would be nice to
1964 : * combine these operations, but client code might never get around to
1965 : * calling dsa_detach because of an error path, and a detach hook on any
1966 : * particular segment is too late to detach other segments in the area
1967 : * without risking a 'leak' warning in the non-error path.
1968 : */
1969 :
1970 : /* Free the backend-local area object. */
1971 42820 : pfree(area);
1972 42820 : }
1973 :
1974 : /*
1975 : * Unlink a segment from the bin that contains it.
1976 : */
1977 : static void
1978 3876 : unlink_segment(dsa_area *area, dsa_segment_map *segment_map)
1979 : {
1980 3876 : if (segment_map->header->prev != DSA_SEGMENT_INDEX_NONE)
1981 : {
1982 : dsa_segment_map *prev;
1983 :
1984 2 : prev = get_segment_by_index(area, segment_map->header->prev);
1985 2 : prev->header->next = segment_map->header->next;
1986 : }
1987 : else
1988 : {
1989 : Assert(area->control->segment_bins[segment_map->header->bin] ==
1990 : get_segment_index(area, segment_map));
1991 3874 : area->control->segment_bins[segment_map->header->bin] =
1992 3874 : segment_map->header->next;
1993 : }
1994 3876 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
1995 : {
1996 : dsa_segment_map *next;
1997 :
1998 0 : next = get_segment_by_index(area, segment_map->header->next);
1999 0 : next->header->prev = segment_map->header->prev;
2000 : }
2001 3876 : }
2002 :
2003 : /*
2004 : * Find a segment that could satisfy a request for 'npages' of contiguous
2005 : * memory, or return NULL if none can be found. This may involve attaching to
2006 : * segments that weren't previously attached so that we can query their free
2007 : * pages map.
2008 : */
2009 : static dsa_segment_map *
2010 24548 : get_best_segment(dsa_area *area, size_t npages)
2011 : {
2012 : size_t bin;
2013 :
2014 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2015 24548 : check_for_freed_segments_locked(area);
2016 :
2017 : /*
2018 : * Start searching from the first bin that *might* have enough contiguous
2019 : * pages.
2020 : */
2021 107214 : for (bin = contiguous_pages_to_segment_bin(npages);
2022 : bin < DSA_NUM_SEGMENT_BINS;
2023 82666 : ++bin)
2024 : {
2025 : /*
2026 : * The minimum contiguous size that any segment in this bin should
2027 : * have. We'll re-bin if we see segments with fewer.
2028 : */
2029 105448 : size_t threshold = (size_t) 1 << (bin - 1);
2030 : dsa_segment_index segment_index;
2031 :
2032 : /* Search this bin for a segment with enough contiguous space. */
2033 105448 : segment_index = area->control->segment_bins[bin];
2034 107052 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
2035 : {
2036 : dsa_segment_map *segment_map;
2037 : dsa_segment_index next_segment_index;
2038 : size_t contiguous_pages;
2039 :
2040 24386 : segment_map = get_segment_by_index(area, segment_index);
2041 24386 : next_segment_index = segment_map->header->next;
2042 24386 : contiguous_pages = fpm_largest(segment_map->fpm);
2043 :
2044 : /* Not enough for the request, still enough for this bin. */
2045 24386 : if (contiguous_pages >= threshold && contiguous_pages < npages)
2046 : {
2047 0 : segment_index = next_segment_index;
2048 0 : continue;
2049 : }
2050 :
2051 : /* Re-bin it if it's no longer in the appropriate bin. */
2052 24386 : if (contiguous_pages < threshold)
2053 : {
2054 3558 : rebin_segment(area, segment_map);
2055 :
2056 : /*
2057 : * But fall through to see if it's enough to satisfy this
2058 : * request anyway....
2059 : */
2060 : }
2061 :
2062 : /* Check if we are done. */
2063 24386 : if (contiguous_pages >= npages)
2064 22782 : return segment_map;
2065 :
2066 : /* Continue searching the same bin. */
2067 1604 : segment_index = next_segment_index;
2068 : }
2069 : }
2070 :
2071 : /* Not found. */
2072 1766 : return NULL;
2073 : }
2074 :
2075 : /*
2076 : * Create a new segment that can handle at least requested_pages. Returns
2077 : * NULL if the requested total size limit or maximum allowed number of
2078 : * segments would be exceeded.
2079 : */
2080 : static dsa_segment_map *
2081 1766 : make_new_segment(dsa_area *area, size_t requested_pages)
2082 : {
2083 : dsa_segment_index new_index;
2084 : size_t metadata_bytes;
2085 : size_t total_size;
2086 : size_t total_pages;
2087 : size_t usable_pages;
2088 : dsa_segment_map *segment_map;
2089 : dsm_segment *segment;
2090 : ResourceOwner oldowner;
2091 :
2092 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2093 :
2094 : /* Find a segment slot that is not in use (linearly for now). */
2095 1842 : for (new_index = 1; new_index < DSA_MAX_SEGMENTS; ++new_index)
2096 : {
2097 1842 : if (area->control->segment_handles[new_index] == DSM_HANDLE_INVALID)
2098 1766 : break;
2099 : }
2100 1766 : if (new_index == DSA_MAX_SEGMENTS)
2101 0 : return NULL;
2102 :
2103 : /*
2104 : * If the total size limit is already exceeded, then we exit early and
2105 : * avoid arithmetic wraparound in the unsigned expressions below.
2106 : */
2107 1766 : if (area->control->total_segment_size >=
2108 1766 : area->control->max_total_segment_size)
2109 0 : return NULL;
2110 :
2111 : /*
2112 : * The size should be at least as big as requested, and at least big
2113 : * enough to follow a geometric series that approximately doubles the
2114 : * total storage each time we create a new segment. We use geometric
2115 : * growth because the underlying DSM system isn't designed for large
2116 : * numbers of segments (otherwise we might even consider just using one
2117 : * DSM segment for each large allocation and for each superblock, and then
2118 : * we wouldn't need to use FreePageManager).
2119 : *
2120 : * We decide on a total segment size first, so that we produce tidy
2121 : * power-of-two sized segments. This is a good property to have if we
2122 : * move to huge pages in the future. Then we work back to the number of
2123 : * pages we can fit.
2124 : */
2125 1766 : total_size = area->control->init_segment_size *
2126 1766 : ((size_t) 1 << (new_index / DSA_NUM_SEGMENTS_AT_EACH_SIZE));
2127 1766 : total_size = Min(total_size, area->control->max_segment_size);
2128 1766 : total_size = Min(total_size,
2129 : area->control->max_total_segment_size -
2130 : area->control->total_segment_size);
2131 :
2132 1766 : total_pages = total_size / FPM_PAGE_SIZE;
2133 1766 : metadata_bytes =
2134 : MAXALIGN(sizeof(dsa_segment_header)) +
2135 1766 : MAXALIGN(sizeof(FreePageManager)) +
2136 : sizeof(dsa_pointer) * total_pages;
2137 :
2138 : /* Add padding up to next page boundary. */
2139 1766 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2140 1766 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2141 1766 : if (total_size <= metadata_bytes)
2142 0 : return NULL;
2143 1766 : usable_pages = (total_size - metadata_bytes) / FPM_PAGE_SIZE;
2144 : Assert(metadata_bytes + usable_pages * FPM_PAGE_SIZE <= total_size);
2145 :
2146 : /* See if that is enough... */
2147 1766 : if (requested_pages > usable_pages)
2148 : {
2149 : /*
2150 : * We'll make an odd-sized segment, working forward from the requested
2151 : * number of pages.
2152 : */
2153 0 : usable_pages = requested_pages;
2154 0 : metadata_bytes =
2155 : MAXALIGN(sizeof(dsa_segment_header)) +
2156 0 : MAXALIGN(sizeof(FreePageManager)) +
2157 : usable_pages * sizeof(dsa_pointer);
2158 :
2159 : /* Add padding up to next page boundary. */
2160 0 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2161 0 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2162 0 : total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
2163 :
2164 : /* Is that too large for dsa_pointer's addressing scheme? */
2165 0 : if (total_size > DSA_MAX_SEGMENT_SIZE)
2166 0 : return NULL;
2167 :
2168 : /* Would that exceed the limit? */
2169 0 : if (total_size > area->control->max_total_segment_size -
2170 0 : area->control->total_segment_size)
2171 0 : return NULL;
2172 : }
2173 :
2174 : /* Create the segment. */
2175 1766 : oldowner = CurrentResourceOwner;
2176 1766 : CurrentResourceOwner = area->resowner;
2177 1766 : segment = dsm_create(total_size, 0);
2178 1766 : CurrentResourceOwner = oldowner;
2179 1766 : if (segment == NULL)
2180 0 : return NULL;
2181 1766 : dsm_pin_segment(segment);
2182 :
2183 : /* Store the handle in shared memory to be found by index. */
2184 3532 : area->control->segment_handles[new_index] =
2185 1766 : dsm_segment_handle(segment);
2186 : /* Track the highest segment index in the history of the area. */
2187 1766 : if (area->control->high_segment_index < new_index)
2188 1766 : area->control->high_segment_index = new_index;
2189 : /* Track the highest segment index this backend has ever mapped. */
2190 1766 : if (area->high_segment_index < new_index)
2191 1766 : area->high_segment_index = new_index;
2192 : /* Track total size of all segments. */
2193 1766 : area->control->total_segment_size += total_size;
2194 : Assert(area->control->total_segment_size <=
2195 : area->control->max_total_segment_size);
2196 :
2197 : /* Build a segment map for this segment in this backend. */
2198 1766 : segment_map = &area->segment_maps[new_index];
2199 1766 : segment_map->segment = segment;
2200 1766 : segment_map->mapped_address = dsm_segment_address(segment);
2201 1766 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
2202 1766 : segment_map->fpm = (FreePageManager *)
2203 1766 : (segment_map->mapped_address +
2204 : MAXALIGN(sizeof(dsa_segment_header)));
2205 1766 : segment_map->pagemap = (dsa_pointer *)
2206 1766 : (segment_map->mapped_address +
2207 1766 : MAXALIGN(sizeof(dsa_segment_header)) +
2208 : MAXALIGN(sizeof(FreePageManager)));
2209 :
2210 : /* Set up the free page map. */
2211 1766 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
2212 1766 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
2213 : usable_pages);
2214 :
2215 : /* Set up the segment header and put it in the appropriate bin. */
2216 1766 : segment_map->header->magic =
2217 1766 : DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ new_index;
2218 1766 : segment_map->header->usable_pages = usable_pages;
2219 1766 : segment_map->header->size = total_size;
2220 1766 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
2221 1766 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2222 1766 : segment_map->header->next =
2223 1766 : area->control->segment_bins[segment_map->header->bin];
2224 1766 : segment_map->header->freed = false;
2225 1766 : area->control->segment_bins[segment_map->header->bin] = new_index;
2226 1766 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2227 : {
2228 : dsa_segment_map *next =
2229 0 : get_segment_by_index(area, segment_map->header->next);
2230 :
2231 : Assert(next->header->bin == segment_map->header->bin);
2232 0 : next->header->prev = new_index;
2233 : }
2234 :
2235 1766 : return segment_map;
2236 : }
2237 :
2238 : /*
2239 : * Check if any segments have been freed by destroy_superblock, so we can
2240 : * detach from them in this backend. This function is called by
2241 : * dsa_get_address and dsa_free to make sure that a dsa_pointer they have
2242 : * received can be resolved to the correct segment.
2243 : *
2244 : * The danger we want to defend against is that there could be an old segment
2245 : * mapped into a given slot in this backend, and the dsa_pointer they have
2246 : * might refer to some new segment in the same slot. So those functions must
2247 : * be sure to process all instructions to detach from a freed segment that had
2248 : * been generated by the time this process received the dsa_pointer, before
2249 : * they call get_segment_by_index.
2250 : */
2251 : static void
2252 14155102 : check_for_freed_segments(dsa_area *area)
2253 : {
2254 : size_t freed_segment_counter;
2255 :
2256 : /*
2257 : * Any other process that has freed a segment has incremented
2258 : * freed_segment_counter while holding an LWLock, and that must precede
2259 : * any backend creating a new segment in the same slot while holding an
2260 : * LWLock, and that must precede the creation of any dsa_pointer pointing
2261 : * into the new segment which might reach us here, and the caller must
2262 : * have sent the dsa_pointer to this process using appropriate memory
2263 : * synchronization (some kind of locking or atomic primitive or system
2264 : * call). So all we need to do on the reading side is ask for the load of
2265 : * freed_segment_counter to follow the caller's load of the dsa_pointer it
2266 : * has, and we can be sure to detect any segments that had been freed as
2267 : * of the time that the dsa_pointer reached this process.
2268 : */
2269 14155102 : pg_read_barrier();
2270 14155102 : freed_segment_counter = area->control->freed_segment_counter;
2271 14155102 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2272 : {
2273 : /* Check all currently mapped segments to find what's been freed. */
2274 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2275 0 : check_for_freed_segments_locked(area);
2276 0 : LWLockRelease(DSA_AREA_LOCK(area));
2277 : }
2278 14155102 : }
2279 :
2280 : /*
2281 : * Workhorse for check_for_freed_segments(), and also used directly in path
2282 : * where the area lock is already held. This should be called after acquiring
2283 : * the lock but before looking up any segment by index number, to make sure we
2284 : * unmap any stale segments that might have previously had the same index as a
2285 : * current segment.
2286 : */
2287 : static void
2288 24564 : check_for_freed_segments_locked(dsa_area *area)
2289 : {
2290 : size_t freed_segment_counter;
2291 : int i;
2292 :
2293 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2294 24564 : freed_segment_counter = area->control->freed_segment_counter;
2295 24564 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2296 : {
2297 0 : for (i = 0; i <= area->high_segment_index; ++i)
2298 : {
2299 0 : if (area->segment_maps[i].header != NULL &&
2300 0 : area->segment_maps[i].header->freed)
2301 : {
2302 0 : dsm_detach(area->segment_maps[i].segment);
2303 0 : area->segment_maps[i].segment = NULL;
2304 0 : area->segment_maps[i].header = NULL;
2305 0 : area->segment_maps[i].mapped_address = NULL;
2306 : }
2307 : }
2308 0 : area->freed_segment_counter = freed_segment_counter;
2309 : }
2310 24564 : }
2311 :
2312 : /*
2313 : * Re-bin segment if it's no longer in the appropriate bin.
2314 : */
2315 : static void
2316 7966 : rebin_segment(dsa_area *area, dsa_segment_map *segment_map)
2317 : {
2318 : size_t new_bin;
2319 : dsa_segment_index segment_index;
2320 :
2321 7966 : new_bin = contiguous_pages_to_segment_bin(fpm_largest(segment_map->fpm));
2322 7966 : if (segment_map->header->bin == new_bin)
2323 4090 : return;
2324 :
2325 : /* Remove it from its current bin. */
2326 3876 : unlink_segment(area, segment_map);
2327 :
2328 : /* Push it onto the front of its new bin. */
2329 3876 : segment_index = get_segment_index(area, segment_map);
2330 3876 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2331 3876 : segment_map->header->next = area->control->segment_bins[new_bin];
2332 3876 : segment_map->header->bin = new_bin;
2333 3876 : area->control->segment_bins[new_bin] = segment_index;
2334 3876 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2335 : {
2336 : dsa_segment_map *next;
2337 :
2338 22 : next = get_segment_by_index(area, segment_map->header->next);
2339 : Assert(next->header->bin == new_bin);
2340 22 : next->header->prev = segment_index;
2341 : }
2342 : }
|