LCOV - code coverage report
Current view: top level - src/backend/utils/mmgr - dsa.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 77.6 % 678 526
Test Date: 2026-02-17 17:20:33 Functions: 89.7 % 39 35
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * dsa.c
       4              :  *    Dynamic shared memory areas.
       5              :  *
       6              :  * This module provides dynamic shared memory areas which are built on top of
       7              :  * DSM segments.  While dsm.c allows segments of memory of shared memory to be
       8              :  * created and shared between backends, it isn't designed to deal with small
       9              :  * objects.  A DSA area is a shared memory heap usually backed by one or more
      10              :  * DSM segments which can allocate memory using dsa_allocate() and dsa_free().
      11              :  * Alternatively, it can be created in pre-existing shared memory, including a
      12              :  * DSM segment, and then create extra DSM segments as required.  Unlike the
      13              :  * regular system heap, it deals in pseudo-pointers which must be converted to
      14              :  * backend-local pointers before they are dereferenced.  These pseudo-pointers
      15              :  * can however be shared with other backends, and can be used to construct
      16              :  * shared data structures.
      17              :  *
      18              :  * Each DSA area manages a set of DSM segments, adding new segments as
      19              :  * required and detaching them when they are no longer needed.  Each segment
      20              :  * contains a number of 4KB pages, a free page manager for tracking
      21              :  * consecutive runs of free pages, and a page map for tracking the source of
      22              :  * objects allocated on each page.  Allocation requests above 8KB are handled
      23              :  * by choosing a segment and finding consecutive free pages in its free page
      24              :  * manager.  Allocation requests for smaller sizes are handled using pools of
      25              :  * objects of a selection of sizes.  Each pool consists of a number of 16 page
      26              :  * (64KB) superblocks allocated in the same way as large objects.  Allocation
      27              :  * of large objects and new superblocks is serialized by a single LWLock, but
      28              :  * allocation of small objects from pre-existing superblocks uses one LWLock
      29              :  * per pool.  Currently there is one pool, and therefore one lock, per size
      30              :  * class.  Per-core pools to increase concurrency and strategies for reducing
      31              :  * the resulting fragmentation are areas for future research.  Each superblock
      32              :  * is managed with a 'span', which tracks the superblock's freelist.  Free
      33              :  * requests are handled by looking in the page map to find which span an
      34              :  * address was allocated from, so that small objects can be returned to the
      35              :  * appropriate free list, and large object pages can be returned directly to
      36              :  * the free page map.  When allocating, simple heuristics for selecting
      37              :  * segments and superblocks try to encourage occupied memory to be
      38              :  * concentrated, increasing the likelihood that whole superblocks can become
      39              :  * empty and be returned to the free page manager, and whole segments can
      40              :  * become empty and be returned to the operating system.
      41              :  *
      42              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      43              :  * Portions Copyright (c) 1994, Regents of the University of California
      44              :  *
      45              :  * IDENTIFICATION
      46              :  *    src/backend/utils/mmgr/dsa.c
      47              :  *
      48              :  *-------------------------------------------------------------------------
      49              :  */
      50              : 
      51              : #include "postgres.h"
      52              : 
      53              : #include "port/atomics.h"
      54              : #include "port/pg_bitutils.h"
      55              : #include "storage/dsm.h"
      56              : #include "storage/lwlock.h"
      57              : #include "utils/dsa.h"
      58              : #include "utils/freepage.h"
      59              : #include "utils/memutils.h"
      60              : #include "utils/resowner.h"
      61              : 
      62              : /*
      63              :  * How many segments to create before we double the segment size.  If this is
      64              :  * low, then there is likely to be a lot of wasted space in the largest
      65              :  * segment.  If it is high, then we risk running out of segment slots (see
      66              :  * dsm.c's limits on total number of segments), or limiting the total size
      67              :  * an area can manage when using small pointers.
      68              :  */
      69              : #define DSA_NUM_SEGMENTS_AT_EACH_SIZE 2
      70              : 
      71              : /*
      72              :  * The maximum number of DSM segments that an area can own, determined by
      73              :  * the number of bits remaining (but capped at 1024).
      74              :  */
      75              : #define DSA_MAX_SEGMENTS \
      76              :     Min(1024, (1 << ((SIZEOF_DSA_POINTER * 8) - DSA_OFFSET_WIDTH)))
      77              : 
      78              : /* The bitmask for extracting the offset from a dsa_pointer. */
      79              : #define DSA_OFFSET_BITMASK (((dsa_pointer) 1 << DSA_OFFSET_WIDTH) - 1)
      80              : 
      81              : /* Number of pages (see FPM_PAGE_SIZE) per regular superblock. */
      82              : #define DSA_PAGES_PER_SUPERBLOCK        16
      83              : 
      84              : /*
      85              :  * A magic number used as a sanity check for following DSM segments belonging
      86              :  * to a DSA area (this number will be XORed with the area handle and
      87              :  * the segment index).
      88              :  */
      89              : #define DSA_SEGMENT_HEADER_MAGIC 0x0ce26608
      90              : 
      91              : /* Build a dsa_pointer given a segment number and offset. */
      92              : #define DSA_MAKE_POINTER(segment_number, offset) \
      93              :     (((dsa_pointer) (segment_number) << DSA_OFFSET_WIDTH) | (offset))
      94              : 
      95              : /* Extract the segment number from a dsa_pointer. */
      96              : #define DSA_EXTRACT_SEGMENT_NUMBER(dp) ((dp) >> DSA_OFFSET_WIDTH)
      97              : 
      98              : /* Extract the offset from a dsa_pointer. */
      99              : #define DSA_EXTRACT_OFFSET(dp) ((dp) & DSA_OFFSET_BITMASK)
     100              : 
     101              : /* The type used for index segment indexes (zero based). */
     102              : typedef size_t dsa_segment_index;
     103              : 
     104              : /* Sentinel value for dsa_segment_index indicating 'none' or 'end'. */
     105              : #define DSA_SEGMENT_INDEX_NONE (~(dsa_segment_index)0)
     106              : 
     107              : /*
     108              :  * How many bins of segments do we have?  The bins are used to categorize
     109              :  * segments by their largest contiguous run of free pages.
     110              :  */
     111              : #define DSA_NUM_SEGMENT_BINS 16
     112              : 
     113              : /*
     114              :  * What is the lowest bin that holds segments that *might* have n contiguous
     115              :  * free pages?  There is no point in looking in segments in lower bins; they
     116              :  * definitely can't service a request for n free pages.
     117              :  */
     118              : static inline size_t
     119        22928 : contiguous_pages_to_segment_bin(size_t n)
     120              : {
     121              :     size_t      bin;
     122              : 
     123        22928 :     if (n == 0)
     124          766 :         bin = 0;
     125              :     else
     126        22162 :         bin = pg_leftmost_one_pos_size_t(n) + 1;
     127              : 
     128        22928 :     return Min(bin, DSA_NUM_SEGMENT_BINS - 1);
     129              : }
     130              : 
     131              : /* Macros for access to locks. */
     132              : #define DSA_AREA_LOCK(area) (&area->control->lock)
     133              : #define DSA_SCLASS_LOCK(area, sclass) (&area->control->pools[sclass].lock)
     134              : 
     135              : /*
     136              :  * The header for an individual segment.  This lives at the start of each DSM
     137              :  * segment owned by a DSA area including the first segment (where it appears
     138              :  * as part of the dsa_area_control struct).
     139              :  */
     140              : typedef struct
     141              : {
     142              :     /* Sanity check magic value. */
     143              :     uint32      magic;
     144              :     /* Total number of pages in this segment (excluding metadata area). */
     145              :     size_t      usable_pages;
     146              :     /* Total size of this segment in bytes. */
     147              :     size_t      size;
     148              : 
     149              :     /*
     150              :      * Index of the segment that precedes this one in the same segment bin, or
     151              :      * DSA_SEGMENT_INDEX_NONE if this is the first one.
     152              :      */
     153              :     dsa_segment_index prev;
     154              : 
     155              :     /*
     156              :      * Index of the segment that follows this one in the same segment bin, or
     157              :      * DSA_SEGMENT_INDEX_NONE if this is the last one.
     158              :      */
     159              :     dsa_segment_index next;
     160              :     /* The index of the bin that contains this segment. */
     161              :     size_t      bin;
     162              : 
     163              :     /*
     164              :      * A flag raised to indicate that this segment is being returned to the
     165              :      * operating system and has been unpinned.
     166              :      */
     167              :     bool        freed;
     168              : } dsa_segment_header;
     169              : 
     170              : /*
     171              :  * Metadata for one superblock.
     172              :  *
     173              :  * For most blocks, span objects are stored out-of-line; that is, the span
     174              :  * object is not stored within the block itself.  But, as an exception, for a
     175              :  * "span of spans", the span object is stored "inline".  The allocation is
     176              :  * always exactly one page, and the dsa_area_span object is located at
     177              :  * the beginning of that page.  The size class is DSA_SCLASS_BLOCK_OF_SPANS,
     178              :  * and the remaining fields are used just as they would be in an ordinary
     179              :  * block.  We can't allocate spans out of ordinary superblocks because
     180              :  * creating an ordinary superblock requires us to be able to allocate a span
     181              :  * *first*.  Doing it this way avoids that circularity.
     182              :  */
     183              : typedef struct
     184              : {
     185              :     dsa_pointer pool;           /* Containing pool. */
     186              :     dsa_pointer prevspan;       /* Previous span. */
     187              :     dsa_pointer nextspan;       /* Next span. */
     188              :     dsa_pointer start;          /* Starting address. */
     189              :     size_t      npages;         /* Length of span in pages. */
     190              :     uint16      size_class;     /* Size class. */
     191              :     uint16      ninitialized;   /* Maximum number of objects ever allocated. */
     192              :     uint16      nallocatable;   /* Number of objects currently allocatable. */
     193              :     uint16      firstfree;      /* First object on free list. */
     194              :     uint16      nmax;           /* Maximum number of objects ever possible. */
     195              :     uint16      fclass;         /* Current fullness class. */
     196              : } dsa_area_span;
     197              : 
     198              : /*
     199              :  * Given a pointer to an object in a span, access the index of the next free
     200              :  * object in the same span (ie in the span's freelist) as an L-value.
     201              :  */
     202              : #define NextFreeObjectIndex(object) (* (uint16 *) (object))
     203              : 
     204              : /*
     205              :  * Small allocations are handled by dividing a single block of memory into
     206              :  * many small objects of equal size.  The possible allocation sizes are
     207              :  * defined by the following array.  Larger size classes are spaced more widely
     208              :  * than smaller size classes.  We fudge the spacing for size classes >1kB to
     209              :  * avoid space wastage: based on the knowledge that we plan to allocate 64kB
     210              :  * blocks, we bump the maximum object size up to the largest multiple of
     211              :  * 8 bytes that still lets us fit the same number of objects into one block.
     212              :  *
     213              :  * NB: Because of this fudging, if we were ever to use differently-sized blocks
     214              :  * for small allocations, these size classes would need to be reworked to be
     215              :  * optimal for the new size.
     216              :  *
     217              :  * NB: The optimal spacing for size classes, as well as the size of the blocks
     218              :  * out of which small objects are allocated, is not a question that has one
     219              :  * right answer.  Some allocators (such as tcmalloc) use more closely-spaced
     220              :  * size classes than we do here, while others (like aset.c) use more
     221              :  * widely-spaced classes.  Spacing the classes more closely avoids wasting
     222              :  * memory within individual chunks, but also means a larger number of
     223              :  * potentially-unfilled blocks.
     224              :  */
     225              : static const uint16 dsa_size_classes[] = {
     226              :     sizeof(dsa_area_span), 0,   /* special size classes */
     227              :     8, 16, 24, 32, 40, 48, 56, 64,  /* 8 classes separated by 8 bytes */
     228              :     80, 96, 112, 128,           /* 4 classes separated by 16 bytes */
     229              :     160, 192, 224, 256,         /* 4 classes separated by 32 bytes */
     230              :     320, 384, 448, 512,         /* 4 classes separated by 64 bytes */
     231              :     640, 768, 896, 1024,        /* 4 classes separated by 128 bytes */
     232              :     1280, 1560, 1816, 2048,     /* 4 classes separated by ~256 bytes */
     233              :     2616, 3120, 3640, 4096,     /* 4 classes separated by ~512 bytes */
     234              :     5456, 6552, 7280, 8192      /* 4 classes separated by ~1024 bytes */
     235              : };
     236              : #define DSA_NUM_SIZE_CLASSES                lengthof(dsa_size_classes)
     237              : 
     238              : /* Special size classes. */
     239              : #define DSA_SCLASS_BLOCK_OF_SPANS       0
     240              : #define DSA_SCLASS_SPAN_LARGE           1
     241              : 
     242              : /*
     243              :  * The following lookup table is used to map the size of small objects
     244              :  * (less than 1kB) onto the corresponding size class.  To use this table,
     245              :  * round the size of the object up to the next multiple of 8 bytes, and then
     246              :  * index into this array.
     247              :  */
     248              : static const uint8 dsa_size_class_map[] = {
     249              :     2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13,
     250              :     14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17,
     251              :     18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
     252              :     20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
     253              :     22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
     254              :     23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
     255              :     24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
     256              :     25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25
     257              : };
     258              : #define DSA_SIZE_CLASS_MAP_QUANTUM  8
     259              : 
     260              : /*
     261              :  * Superblocks are binned by how full they are.  Generally, each fullness
     262              :  * class corresponds to one quartile, but the block being used for
     263              :  * allocations is always at the head of the list for fullness class 1,
     264              :  * regardless of how full it really is.
     265              :  */
     266              : #define DSA_FULLNESS_CLASSES        4
     267              : 
     268              : /*
     269              :  * A dsa_area_pool represents a set of objects of a given size class.
     270              :  *
     271              :  * Perhaps there should be multiple pools for the same size class for
     272              :  * contention avoidance, but for now there is just one!
     273              :  */
     274              : typedef struct
     275              : {
     276              :     /* A lock protecting access to this pool. */
     277              :     LWLock      lock;
     278              :     /* A set of linked lists of spans, arranged by fullness. */
     279              :     dsa_pointer spans[DSA_FULLNESS_CLASSES];
     280              :     /* Should we pad this out to a cacheline boundary? */
     281              : } dsa_area_pool;
     282              : 
     283              : /*
     284              :  * The control block for an area.  This lives in shared memory, at the start of
     285              :  * the first DSM segment controlled by this area.
     286              :  */
     287              : typedef struct
     288              : {
     289              :     /* The segment header for the first segment. */
     290              :     dsa_segment_header segment_header;
     291              :     /* The handle for this area. */
     292              :     dsa_handle  handle;
     293              :     /* The handles of the segments owned by this area. */
     294              :     dsm_handle  segment_handles[DSA_MAX_SEGMENTS];
     295              :     /* Lists of segments, binned by maximum contiguous run of free pages. */
     296              :     dsa_segment_index segment_bins[DSA_NUM_SEGMENT_BINS];
     297              :     /* The object pools for each size class. */
     298              :     dsa_area_pool pools[DSA_NUM_SIZE_CLASSES];
     299              :     /* initial allocation segment size */
     300              :     size_t      init_segment_size;
     301              :     /* maximum allocation segment size */
     302              :     size_t      max_segment_size;
     303              :     /* The total size of all active segments. */
     304              :     size_t      total_segment_size;
     305              :     /* The maximum total size of backing storage we are allowed. */
     306              :     size_t      max_total_segment_size;
     307              :     /* Highest used segment index in the history of this area. */
     308              :     dsa_segment_index high_segment_index;
     309              :     /* The reference count for this area. */
     310              :     int         refcnt;
     311              :     /* A flag indicating that this area has been pinned. */
     312              :     bool        pinned;
     313              :     /* The number of times that segments have been freed. */
     314              :     size_t      freed_segment_counter;
     315              :     /* The LWLock tranche ID. */
     316              :     int         lwlock_tranche_id;
     317              :     /* The general lock (protects everything except object pools). */
     318              :     LWLock      lock;
     319              : } dsa_area_control;
     320              : 
     321              : /* Given a pointer to a pool, find a dsa_pointer. */
     322              : #define DsaAreaPoolToDsaPointer(area, p)    \
     323              :     DSA_MAKE_POINTER(0, (char *) p - (char *) area->control)
     324              : 
     325              : /*
     326              :  * A dsa_segment_map is stored within the backend-private memory of each
     327              :  * individual backend.  It holds the base address of the segment within that
     328              :  * backend, plus the addresses of key objects within the segment.  Those
     329              :  * could instead be derived from the base address but it's handy to have them
     330              :  * around.
     331              :  */
     332              : typedef struct
     333              : {
     334              :     dsm_segment *segment;       /* DSM segment */
     335              :     char       *mapped_address; /* Address at which segment is mapped */
     336              :     dsa_segment_header *header; /* Header (same as mapped_address) */
     337              :     FreePageManager *fpm;       /* Free page manager within segment. */
     338              :     dsa_pointer *pagemap;       /* Page map within segment. */
     339              : } dsa_segment_map;
     340              : 
     341              : /*
     342              :  * Per-backend state for a storage area.  Backends obtain one of these by
     343              :  * creating an area or attaching to an existing one using a handle.  Each
     344              :  * process that needs to use an area uses its own object to track where the
     345              :  * segments are mapped.
     346              :  */
     347              : struct dsa_area
     348              : {
     349              :     /* Pointer to the control object in shared memory. */
     350              :     dsa_area_control *control;
     351              : 
     352              :     /*
     353              :      * All the mappings are owned by this.  The dsa_area itself is not
     354              :      * directly tracked by the ResourceOwner, but the effect is the same. NULL
     355              :      * if the attachment has session lifespan, i.e if dsa_pin_mapping() has
     356              :      * been called.
     357              :      */
     358              :     ResourceOwner resowner;
     359              : 
     360              :     /*
     361              :      * This backend's array of segment maps, ordered by segment index
     362              :      * corresponding to control->segment_handles.  Some of the area's segments
     363              :      * may not be mapped in this backend yet, and some slots may have been
     364              :      * freed and need to be detached; these operations happen on demand.
     365              :      */
     366              :     dsa_segment_map segment_maps[DSA_MAX_SEGMENTS];
     367              : 
     368              :     /* The highest segment index this backend has ever mapped. */
     369              :     dsa_segment_index high_segment_index;
     370              : 
     371              :     /* The last observed freed_segment_counter. */
     372              :     size_t      freed_segment_counter;
     373              : };
     374              : 
     375              : #define DSA_SPAN_NOTHING_FREE   ((uint16) -1)
     376              : #define DSA_SUPERBLOCK_SIZE (DSA_PAGES_PER_SUPERBLOCK * FPM_PAGE_SIZE)
     377              : 
     378              : /* Given a pointer to a segment_map, obtain a segment index number. */
     379              : #define get_segment_index(area, segment_map_ptr) \
     380              :     (segment_map_ptr - &area->segment_maps[0])
     381              : 
     382              : static void init_span(dsa_area *area, dsa_pointer span_pointer,
     383              :                       dsa_area_pool *pool, dsa_pointer start, size_t npages,
     384              :                       uint16 size_class);
     385              : static bool transfer_first_span(dsa_area *area, dsa_area_pool *pool,
     386              :                                 int fromclass, int toclass);
     387              : static inline dsa_pointer alloc_object(dsa_area *area, int size_class);
     388              : static bool ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
     389              :                                      int size_class);
     390              : static dsa_segment_map *get_segment_by_index(dsa_area *area,
     391              :                                              dsa_segment_index index);
     392              : static void destroy_superblock(dsa_area *area, dsa_pointer span_pointer);
     393              : static void unlink_span(dsa_area *area, dsa_area_span *span);
     394              : static void add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
     395              :                                        dsa_pointer span_pointer, int fclass);
     396              : static void unlink_segment(dsa_area *area, dsa_segment_map *segment_map);
     397              : static dsa_segment_map *get_best_segment(dsa_area *area, size_t npages);
     398              : static dsa_segment_map *make_new_segment(dsa_area *area, size_t requested_pages);
     399              : static dsa_area *create_internal(void *place, size_t size,
     400              :                                  int tranche_id,
     401              :                                  dsm_handle control_handle,
     402              :                                  dsm_segment *control_segment,
     403              :                                  size_t init_segment_size,
     404              :                                  size_t max_segment_size);
     405              : static dsa_area *attach_internal(void *place, dsm_segment *segment,
     406              :                                  dsa_handle handle);
     407              : static void check_for_freed_segments(dsa_area *area);
     408              : static void check_for_freed_segments_locked(dsa_area *area);
     409              : static void rebin_segment(dsa_area *area, dsa_segment_map *segment_map);
     410              : 
     411              : /*
     412              :  * Create a new shared area in a new DSM segment.  Further DSM segments will
     413              :  * be allocated as required to extend the available space.
     414              :  *
     415              :  * We can't allocate a LWLock tranche_id within this function, because tranche
     416              :  * IDs are a scarce resource; there are only 64k available, using low numbers
     417              :  * when possible matters, and we have no provision for recycling them.  So,
     418              :  * we require the caller to provide one.
     419              :  */
     420              : dsa_area *
     421          115 : dsa_create_ext(int tranche_id, size_t init_segment_size, size_t max_segment_size)
     422              : {
     423              :     dsm_segment *segment;
     424              :     dsa_area   *area;
     425              : 
     426              :     /*
     427              :      * Create the DSM segment that will hold the shared control object and the
     428              :      * first segment of usable space.
     429              :      */
     430          115 :     segment = dsm_create(init_segment_size, 0);
     431              : 
     432              :     /*
     433              :      * All segments backing this area are pinned, so that DSA can explicitly
     434              :      * control their lifetime (otherwise a newly created segment belonging to
     435              :      * this area might be freed when the only backend that happens to have it
     436              :      * mapped in ends, corrupting the area).
     437              :      */
     438          115 :     dsm_pin_segment(segment);
     439              : 
     440              :     /* Create a new DSA area with the control object in this segment. */
     441          115 :     area = create_internal(dsm_segment_address(segment),
     442              :                            init_segment_size,
     443              :                            tranche_id,
     444              :                            dsm_segment_handle(segment), segment,
     445              :                            init_segment_size, max_segment_size);
     446              : 
     447              :     /* Clean up when the control segment detaches. */
     448          115 :     on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     449          115 :                   PointerGetDatum(dsm_segment_address(segment)));
     450              : 
     451          115 :     return area;
     452              : }
     453              : 
     454              : /*
     455              :  * Create a new shared area in an existing shared memory space, which may be
     456              :  * either DSM or Postmaster-initialized memory.  DSM segments will be
     457              :  * allocated as required to extend the available space, though that can be
     458              :  * prevented with dsa_set_size_limit(area, size) using the same size provided
     459              :  * to dsa_create_in_place.
     460              :  *
     461              :  * Areas created in-place must eventually be released by the backend that
     462              :  * created them and all backends that attach to them.  This can be done
     463              :  * explicitly with dsa_release_in_place, or, in the special case that 'place'
     464              :  * happens to be in a pre-existing DSM segment, by passing in a pointer to the
     465              :  * segment so that a detach hook can be registered with the containing DSM
     466              :  * segment.
     467              :  *
     468              :  * See dsa_create() for a note about the tranche arguments.
     469              :  */
     470              : dsa_area *
     471         1605 : dsa_create_in_place_ext(void *place, size_t size,
     472              :                         int tranche_id, dsm_segment *segment,
     473              :                         size_t init_segment_size, size_t max_segment_size)
     474              : {
     475              :     dsa_area   *area;
     476              : 
     477         1605 :     area = create_internal(place, size, tranche_id,
     478              :                            DSM_HANDLE_INVALID, NULL,
     479              :                            init_segment_size, max_segment_size);
     480              : 
     481              :     /*
     482              :      * Clean up when the control segment detaches, if a containing DSM segment
     483              :      * was provided.
     484              :      */
     485         1605 :     if (segment != NULL)
     486          465 :         on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     487              :                       PointerGetDatum(place));
     488              : 
     489         1605 :     return area;
     490              : }
     491              : 
     492              : /*
     493              :  * Obtain a handle that can be passed to other processes so that they can
     494              :  * attach to the given area.  Cannot be called for areas created with
     495              :  * dsa_create_in_place.
     496              :  */
     497              : dsa_handle
     498          112 : dsa_get_handle(dsa_area *area)
     499              : {
     500              :     Assert(area->control->handle != DSA_HANDLE_INVALID);
     501          112 :     return area->control->handle;
     502              : }
     503              : 
     504              : /*
     505              :  * Attach to an area given a handle generated (possibly in another process) by
     506              :  * dsa_get_handle.  The area must have been created with dsa_create (not
     507              :  * dsa_create_in_place).
     508              :  */
     509              : dsa_area *
     510          369 : dsa_attach(dsa_handle handle)
     511              : {
     512              :     dsm_segment *segment;
     513              :     dsa_area   *area;
     514              : 
     515              :     /*
     516              :      * An area handle is really a DSM segment handle for the first segment, so
     517              :      * we go ahead and attach to that.
     518              :      */
     519          369 :     segment = dsm_attach(handle);
     520          369 :     if (segment == NULL)
     521            0 :         ereport(ERROR,
     522              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     523              :                  errmsg("could not attach to dynamic shared area")));
     524              : 
     525          369 :     area = attach_internal(dsm_segment_address(segment), segment, handle);
     526              : 
     527              :     /* Clean up when the control segment detaches. */
     528          369 :     on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     529          369 :                   PointerGetDatum(dsm_segment_address(segment)));
     530              : 
     531          369 :     return area;
     532              : }
     533              : 
     534              : /*
     535              :  * Returns whether the area with the given handle was already attached by the
     536              :  * current process.  The area must have been created with dsa_create (not
     537              :  * dsa_create_in_place).
     538              :  */
     539              : bool
     540           14 : dsa_is_attached(dsa_handle handle)
     541              : {
     542              :     /*
     543              :      * An area handle is really a DSM segment handle for the first segment, so
     544              :      * we can just search for that.
     545              :      */
     546           14 :     return dsm_find_mapping(handle) != NULL;
     547              : }
     548              : 
     549              : /*
     550              :  * Attach to an area that was created with dsa_create_in_place.  The caller
     551              :  * must somehow know the location in memory that was used when the area was
     552              :  * created, though it may be mapped at a different virtual address in this
     553              :  * process.
     554              :  *
     555              :  * See dsa_create_in_place for note about releasing in-place areas, and the
     556              :  * optional 'segment' argument which can be provided to allow automatic
     557              :  * release if the containing memory happens to be a DSM segment.
     558              :  */
     559              : dsa_area *
     560        25961 : dsa_attach_in_place(void *place, dsm_segment *segment)
     561              : {
     562              :     dsa_area   *area;
     563              : 
     564        25961 :     area = attach_internal(place, NULL, DSA_HANDLE_INVALID);
     565              : 
     566              :     /*
     567              :      * Clean up when the control segment detaches, if a containing DSM segment
     568              :      * was provided.
     569              :      */
     570        25961 :     if (segment != NULL)
     571         2825 :         on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     572              :                       PointerGetDatum(place));
     573              : 
     574        25961 :     return area;
     575              : }
     576              : 
     577              : /*
     578              :  * Release a DSA area that was produced by dsa_create_in_place or
     579              :  * dsa_attach_in_place.  The 'segment' argument is ignored but provides an
     580              :  * interface suitable for on_dsm_detach, for the convenience of users who want
     581              :  * to create a DSA segment inside an existing DSM segment and have it
     582              :  * automatically released when the containing DSM segment is detached.
     583              :  * 'place' should be the address of the place where the area was created.
     584              :  *
     585              :  * This callback is automatically registered for the DSM segment containing
     586              :  * the control object of in-place areas when a segment is provided to
     587              :  * dsa_create_in_place or dsa_attach_in_place, and also for all areas created
     588              :  * with dsa_create.
     589              :  */
     590              : void
     591         3774 : dsa_on_dsm_detach_release_in_place(dsm_segment *segment, Datum place)
     592              : {
     593         3774 :     dsa_release_in_place(DatumGetPointer(place));
     594         3774 : }
     595              : 
     596              : /*
     597              :  * Release a DSA area that was produced by dsa_create_in_place or
     598              :  * dsa_attach_in_place.  The 'code' argument is ignored but provides an
     599              :  * interface suitable for on_shmem_exit or before_shmem_exit, for the
     600              :  * convenience of users who want to create a DSA segment inside shared memory
     601              :  * other than a DSM segment and have it automatically release at backend exit.
     602              :  * 'place' should be the address of the place where the area was created.
     603              :  */
     604              : void
     605            0 : dsa_on_shmem_exit_release_in_place(int code, Datum place)
     606              : {
     607            0 :     dsa_release_in_place(DatumGetPointer(place));
     608            0 : }
     609              : 
     610              : /*
     611              :  * Release a DSA area that was produced by dsa_create_in_place or
     612              :  * dsa_attach_in_place.  It is preferable to use one of the 'dsa_on_XXX'
     613              :  * callbacks so that this is managed automatically, because failure to release
     614              :  * an area created in-place leaks its segments permanently.
     615              :  *
     616              :  * This is also called automatically for areas produced by dsa_create or
     617              :  * dsa_attach as an implementation detail.
     618              :  */
     619              : void
     620        26910 : dsa_release_in_place(void *place)
     621              : {
     622        26910 :     dsa_area_control *control = (dsa_area_control *) place;
     623              :     int         i;
     624              : 
     625        26910 :     LWLockAcquire(&control->lock, LW_EXCLUSIVE);
     626              :     Assert(control->segment_header.magic ==
     627              :            (DSA_SEGMENT_HEADER_MAGIC ^ control->handle ^ 0));
     628              :     Assert(control->refcnt > 0);
     629        26910 :     if (--control->refcnt == 0)
     630              :     {
     631         1131 :         for (i = 0; i <= control->high_segment_index; ++i)
     632              :         {
     633              :             dsm_handle  handle;
     634              : 
     635          635 :             handle = control->segment_handles[i];
     636          635 :             if (handle != DSM_HANDLE_INVALID)
     637          170 :                 dsm_unpin_segment(handle);
     638              :         }
     639              :     }
     640        26910 :     LWLockRelease(&control->lock);
     641        26910 : }
     642              : 
     643              : /*
     644              :  * Keep a DSA area attached until end of session or explicit detach.
     645              :  *
     646              :  * By default, areas are owned by the current resource owner, which means they
     647              :  * are detached automatically when that scope ends.
     648              :  */
     649              : void
     650        25128 : dsa_pin_mapping(dsa_area *area)
     651              : {
     652              :     int         i;
     653              : 
     654        25128 :     if (area->resowner != NULL)
     655              :     {
     656         1923 :         area->resowner = NULL;
     657              : 
     658         3866 :         for (i = 0; i <= area->high_segment_index; ++i)
     659         1943 :             if (area->segment_maps[i].segment != NULL)
     660          377 :                 dsm_pin_mapping(area->segment_maps[i].segment);
     661              :     }
     662        25128 : }
     663              : 
     664              : /*
     665              :  * Allocate memory in this storage area.  The return value is a dsa_pointer
     666              :  * that can be passed to other processes, and converted to a local pointer
     667              :  * with dsa_get_address.  'flags' is a bitmap which should be constructed
     668              :  * from the following values:
     669              :  *
     670              :  * DSA_ALLOC_HUGE allows allocations >= 1GB.  Otherwise, such allocations
     671              :  * will result in an ERROR.
     672              :  *
     673              :  * DSA_ALLOC_NO_OOM causes this function to return InvalidDsaPointer when
     674              :  * no memory is available or a size limit established by dsa_set_size_limit
     675              :  * would be exceeded.  Otherwise, such allocations will result in an ERROR.
     676              :  *
     677              :  * DSA_ALLOC_ZERO causes the allocated memory to be zeroed.  Otherwise, the
     678              :  * contents of newly-allocated memory are indeterminate.
     679              :  *
     680              :  * These flags correspond to similarly named flags used by
     681              :  * MemoryContextAllocExtended().  See also the macros dsa_allocate and
     682              :  * dsa_allocate0 which expand to a call to this function with commonly used
     683              :  * flags.
     684              :  */
     685              : dsa_pointer
     686       690587 : dsa_allocate_extended(dsa_area *area, size_t size, int flags)
     687              : {
     688              :     uint16      size_class;
     689              :     dsa_pointer start_pointer;
     690              :     dsa_segment_map *segment_map;
     691              :     dsa_pointer result;
     692              : 
     693              :     Assert(size > 0);
     694              : 
     695              :     /* Sanity check on huge individual allocation size. */
     696       690587 :     if (((flags & DSA_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
     697       690587 :         ((flags & DSA_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
     698            0 :         elog(ERROR, "invalid DSA memory alloc request size %zu", size);
     699              : 
     700              :     /*
     701              :      * If bigger than the largest size class, just grab a run of pages from
     702              :      * the free page manager, instead of allocating an object from a pool.
     703              :      * There will still be a span, but it's a special class of span that
     704              :      * manages this whole allocation and simply gives all pages back to the
     705              :      * free page manager when dsa_free is called.
     706              :      */
     707       690587 :     if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1])
     708              :     {
     709         2883 :         size_t      npages = fpm_size_to_pages(size);
     710              :         size_t      first_page;
     711              :         dsa_pointer span_pointer;
     712         2883 :         dsa_area_pool *pool = &area->control->pools[DSA_SCLASS_SPAN_LARGE];
     713              : 
     714              :         /* Obtain a span object. */
     715         2883 :         span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
     716         2883 :         if (!DsaPointerIsValid(span_pointer))
     717              :         {
     718              :             /* Raise error unless asked not to. */
     719            0 :             if ((flags & DSA_ALLOC_NO_OOM) == 0)
     720            0 :                 ereport(ERROR,
     721              :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     722              :                          errmsg("out of memory"),
     723              :                          errdetail("Failed on DSA request of size %zu.",
     724              :                                    size)));
     725            0 :             return InvalidDsaPointer;
     726              :         }
     727              : 
     728         2883 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     729              : 
     730              :         /* Find a segment from which to allocate. */
     731         2883 :         segment_map = get_best_segment(area, npages);
     732         2883 :         if (segment_map == NULL)
     733           21 :             segment_map = make_new_segment(area, npages);
     734         2883 :         if (segment_map == NULL)
     735              :         {
     736              :             /* Can't make any more segments: game over. */
     737            0 :             LWLockRelease(DSA_AREA_LOCK(area));
     738            0 :             dsa_free(area, span_pointer);
     739              : 
     740              :             /* Raise error unless asked not to. */
     741            0 :             if ((flags & DSA_ALLOC_NO_OOM) == 0)
     742            0 :                 ereport(ERROR,
     743              :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     744              :                          errmsg("out of memory"),
     745              :                          errdetail("Failed on DSA request of size %zu.",
     746              :                                    size)));
     747            0 :             return InvalidDsaPointer;
     748              :         }
     749              : 
     750              :         /*
     751              :          * Ask the free page manager for a run of pages.  This should always
     752              :          * succeed, since both get_best_segment and make_new_segment should
     753              :          * only return a non-NULL pointer if it actually contains enough
     754              :          * contiguous freespace.  If it does fail, something in our backend
     755              :          * private state is out of whack, so use FATAL to kill the process.
     756              :          */
     757         2883 :         if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
     758            0 :             elog(FATAL,
     759              :                  "dsa_allocate could not find %zu free pages", npages);
     760         2883 :         LWLockRelease(DSA_AREA_LOCK(area));
     761              : 
     762         2883 :         start_pointer = DSA_MAKE_POINTER(get_segment_index(area, segment_map),
     763              :                                          first_page * FPM_PAGE_SIZE);
     764              : 
     765              :         /* Initialize span and pagemap. */
     766         2883 :         LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
     767              :                       LW_EXCLUSIVE);
     768         2883 :         init_span(area, span_pointer, pool, start_pointer, npages,
     769              :                   DSA_SCLASS_SPAN_LARGE);
     770         2883 :         segment_map->pagemap[first_page] = span_pointer;
     771         2883 :         LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
     772              : 
     773              :         /* Zero-initialize the memory if requested. */
     774         2883 :         if ((flags & DSA_ALLOC_ZERO) != 0)
     775          793 :             memset(dsa_get_address(area, start_pointer), 0, size);
     776              : 
     777         2883 :         return start_pointer;
     778              :     }
     779              : 
     780              :     /* Map allocation to a size class. */
     781       687704 :     if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM)
     782              :     {
     783              :         int         mapidx;
     784              : 
     785              :         /* For smaller sizes we have a lookup table... */
     786       663266 :         mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) /
     787       663266 :                   DSA_SIZE_CLASS_MAP_QUANTUM) - 1;
     788       663266 :         size_class = dsa_size_class_map[mapidx];
     789              :     }
     790              :     else
     791              :     {
     792              :         uint16      min;
     793              :         uint16      max;
     794              : 
     795              :         /* ... and for the rest we search by binary chop. */
     796        24438 :         min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1];
     797        24438 :         max = lengthof(dsa_size_classes) - 1;
     798              : 
     799       101083 :         while (min < max)
     800              :         {
     801        76645 :             uint16      mid = (min + max) / 2;
     802        76645 :             uint16      class_size = dsa_size_classes[mid];
     803              : 
     804        76645 :             if (class_size < size)
     805        45812 :                 min = mid + 1;
     806              :             else
     807        30833 :                 max = mid;
     808              :         }
     809              : 
     810        24438 :         size_class = min;
     811              :     }
     812              :     Assert(size <= dsa_size_classes[size_class]);
     813              :     Assert(size_class == 0 || size > dsa_size_classes[size_class - 1]);
     814              : 
     815              :     /* Attempt to allocate an object from the appropriate pool. */
     816       687704 :     result = alloc_object(area, size_class);
     817              : 
     818              :     /* Check for failure to allocate. */
     819       687704 :     if (!DsaPointerIsValid(result))
     820              :     {
     821              :         /* Raise error unless asked not to. */
     822            0 :         if ((flags & DSA_ALLOC_NO_OOM) == 0)
     823            0 :             ereport(ERROR,
     824              :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     825              :                      errmsg("out of memory"),
     826              :                      errdetail("Failed on DSA request of size %zu.", size)));
     827            0 :         return InvalidDsaPointer;
     828              :     }
     829              : 
     830              :     /* Zero-initialize the memory if requested. */
     831       687704 :     if ((flags & DSA_ALLOC_ZERO) != 0)
     832       339392 :         memset(dsa_get_address(area, result), 0, size);
     833              : 
     834       687704 :     return result;
     835              : }
     836              : 
     837              : /*
     838              :  * Free memory obtained with dsa_allocate.
     839              :  */
     840              : void
     841       128178 : dsa_free(dsa_area *area, dsa_pointer dp)
     842              : {
     843              :     dsa_segment_map *segment_map;
     844              :     int         pageno;
     845              :     dsa_pointer span_pointer;
     846              :     dsa_area_span *span;
     847              :     char       *superblock;
     848              :     char       *object;
     849              :     size_t      size;
     850              :     int         size_class;
     851              : 
     852              :     /* Make sure we don't have a stale segment in the slot 'dp' refers to. */
     853       128178 :     check_for_freed_segments(area);
     854              : 
     855              :     /* Locate the object, span and pool. */
     856       128178 :     segment_map = get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(dp));
     857       128178 :     pageno = DSA_EXTRACT_OFFSET(dp) / FPM_PAGE_SIZE;
     858       128178 :     span_pointer = segment_map->pagemap[pageno];
     859       128178 :     span = dsa_get_address(area, span_pointer);
     860       128178 :     superblock = dsa_get_address(area, span->start);
     861       128178 :     object = dsa_get_address(area, dp);
     862       128178 :     size_class = span->size_class;
     863       128178 :     size = dsa_size_classes[size_class];
     864              : 
     865              :     /*
     866              :      * Special case for large objects that live in a special span: we return
     867              :      * those pages directly to the free page manager and free the span.
     868              :      */
     869       128178 :     if (span->size_class == DSA_SCLASS_SPAN_LARGE)
     870              :     {
     871              : 
     872              : #ifdef CLOBBER_FREED_MEMORY
     873              :         memset(object, 0x7f, span->npages * FPM_PAGE_SIZE);
     874              : #endif
     875              : 
     876              :         /* Give pages back to free page manager. */
     877         2184 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     878         2184 :         FreePageManagerPut(segment_map->fpm,
     879         2184 :                            DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
     880              :                            span->npages);
     881              : 
     882              :         /* Move segment to appropriate bin if necessary. */
     883         2184 :         rebin_segment(area, segment_map);
     884         2184 :         LWLockRelease(DSA_AREA_LOCK(area));
     885              : 
     886              :         /* Unlink span. */
     887         2184 :         LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
     888              :                       LW_EXCLUSIVE);
     889         2184 :         unlink_span(area, span);
     890         2184 :         LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
     891              :         /* Free the span object so it can be reused. */
     892         2184 :         dsa_free(area, span_pointer);
     893         2184 :         return;
     894              :     }
     895              : 
     896              : #ifdef CLOBBER_FREED_MEMORY
     897              :     memset(object, 0x7f, size);
     898              : #endif
     899              : 
     900       125994 :     LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
     901              : 
     902              :     /* Put the object on the span's freelist. */
     903              :     Assert(object >= superblock);
     904              :     Assert(object < superblock + DSA_SUPERBLOCK_SIZE);
     905              :     Assert((object - superblock) % size == 0);
     906       125994 :     NextFreeObjectIndex(object) = span->firstfree;
     907       125994 :     span->firstfree = (object - superblock) / size;
     908       125994 :     ++span->nallocatable;
     909              : 
     910              :     /*
     911              :      * See if the span needs to moved to a different fullness class, or be
     912              :      * freed so its pages can be given back to the segment.
     913              :      */
     914       125994 :     if (span->nallocatable == 1 && span->fclass == DSA_FULLNESS_CLASSES - 1)
     915              :     {
     916              :         /*
     917              :          * The block was completely full and is located in the
     918              :          * highest-numbered fullness class, which is never scanned for free
     919              :          * chunks.  We must move it to the next-lower fullness class.
     920              :          */
     921          156 :         unlink_span(area, span);
     922          156 :         add_span_to_fullness_class(area, span, span_pointer,
     923              :                                    DSA_FULLNESS_CLASSES - 2);
     924              : 
     925              :         /*
     926              :          * If this is the only span, and there is no active span, then we
     927              :          * should probably move this span to fullness class 1.  (Otherwise if
     928              :          * you allocate exactly all the objects in the only span, it moves to
     929              :          * class 3, then you free them all, it moves to 2, and then is given
     930              :          * back, leaving no active span).
     931              :          */
     932              :     }
     933       125838 :     else if (span->nallocatable == span->nmax &&
     934         4191 :              (span->fclass != 1 || span->prevspan != InvalidDsaPointer))
     935              :     {
     936              :         /*
     937              :          * This entire block is free, and it's not the active block for this
     938              :          * size class.  Return the memory to the free page manager. We don't
     939              :          * do this for the active block to prevent hysteresis: if we
     940              :          * repeatedly allocate and free the only chunk in the active block, it
     941              :          * will be very inefficient if we deallocate and reallocate the block
     942              :          * every time.
     943              :          */
     944            8 :         destroy_superblock(area, span_pointer);
     945              :     }
     946              : 
     947       125994 :     LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
     948              : }
     949              : 
     950              : /*
     951              :  * Obtain a backend-local address for a dsa_pointer.  'dp' must point to
     952              :  * memory allocated by the given area (possibly in another process) that
     953              :  * hasn't yet been freed.  This may cause a segment to be mapped into the
     954              :  * current process if required, and may cause freed segments to be unmapped.
     955              :  */
     956              : void *
     957      9490244 : dsa_get_address(dsa_area *area, dsa_pointer dp)
     958              : {
     959              :     dsa_segment_index index;
     960              :     size_t      offset;
     961              : 
     962              :     /* Convert InvalidDsaPointer to NULL. */
     963      9490244 :     if (!DsaPointerIsValid(dp))
     964      1390248 :         return NULL;
     965              : 
     966              :     /* Process any requests to detach from freed segments. */
     967      8099996 :     check_for_freed_segments(area);
     968              : 
     969              :     /* Break the dsa_pointer into its components. */
     970      8099996 :     index = DSA_EXTRACT_SEGMENT_NUMBER(dp);
     971      8099996 :     offset = DSA_EXTRACT_OFFSET(dp);
     972              :     Assert(index < DSA_MAX_SEGMENTS);
     973              : 
     974              :     /* Check if we need to cause this segment to be mapped in. */
     975      8099996 :     if (unlikely(area->segment_maps[index].mapped_address == NULL))
     976              :     {
     977              :         /* Call for effect (we don't need the result). */
     978        20108 :         get_segment_by_index(area, index);
     979              :     }
     980              : 
     981      8099996 :     return area->segment_maps[index].mapped_address + offset;
     982              : }
     983              : 
     984              : /*
     985              :  * Pin this area, so that it will continue to exist even if all backends
     986              :  * detach from it.  In that case, the area can still be reattached to if a
     987              :  * handle has been recorded somewhere.
     988              :  */
     989              : void
     990         1224 : dsa_pin(dsa_area *area)
     991              : {
     992         1224 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     993         1224 :     if (area->control->pinned)
     994              :     {
     995            0 :         LWLockRelease(DSA_AREA_LOCK(area));
     996            0 :         elog(ERROR, "dsa_area already pinned");
     997              :     }
     998         1224 :     area->control->pinned = true;
     999         1224 :     ++area->control->refcnt;
    1000         1224 :     LWLockRelease(DSA_AREA_LOCK(area));
    1001         1224 : }
    1002              : 
    1003              : /*
    1004              :  * Undo the effects of dsa_pin, so that the given area can be freed when no
    1005              :  * backends are attached to it.  May be called only if dsa_pin has been
    1006              :  * called.
    1007              :  */
    1008              : void
    1009            0 : dsa_unpin(dsa_area *area)
    1010              : {
    1011            0 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1012              :     Assert(area->control->refcnt > 1);
    1013            0 :     if (!area->control->pinned)
    1014              :     {
    1015            0 :         LWLockRelease(DSA_AREA_LOCK(area));
    1016            0 :         elog(ERROR, "dsa_area not pinned");
    1017              :     }
    1018            0 :     area->control->pinned = false;
    1019            0 :     --area->control->refcnt;
    1020            0 :     LWLockRelease(DSA_AREA_LOCK(area));
    1021            0 : }
    1022              : 
    1023              : /*
    1024              :  * Set the total size limit for this area.  This limit is checked whenever new
    1025              :  * segments need to be allocated from the operating system.  If the new size
    1026              :  * limit is already exceeded, this has no immediate effect.
    1027              :  *
    1028              :  * Note that the total virtual memory usage may be temporarily larger than
    1029              :  * this limit when segments have been freed, but not yet detached by all
    1030              :  * backends that have attached to them.
    1031              :  */
    1032              : void
    1033         2280 : dsa_set_size_limit(dsa_area *area, size_t limit)
    1034              : {
    1035         2280 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1036         2280 :     area->control->max_total_segment_size = limit;
    1037         2280 :     LWLockRelease(DSA_AREA_LOCK(area));
    1038         2280 : }
    1039              : 
    1040              : /* Return the total size of all active segments */
    1041              : size_t
    1042         1192 : dsa_get_total_size(dsa_area *area)
    1043              : {
    1044              :     size_t      size;
    1045              : 
    1046         1192 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_SHARED);
    1047         1192 :     size = area->control->total_segment_size;
    1048         1192 :     LWLockRelease(DSA_AREA_LOCK(area));
    1049              : 
    1050         1192 :     return size;
    1051              : }
    1052              : 
    1053              : /*
    1054              :  * Same as dsa_get_total_size(), but accepts a DSA handle.  The area must have
    1055              :  * been created with dsa_create (not dsa_create_in_place).
    1056              :  */
    1057              : size_t
    1058            2 : dsa_get_total_size_from_handle(dsa_handle handle)
    1059              : {
    1060              :     size_t      size;
    1061              :     bool        already_attached;
    1062              :     dsm_segment *segment;
    1063              :     dsa_area_control *control;
    1064              : 
    1065            2 :     already_attached = dsa_is_attached(handle);
    1066            2 :     if (already_attached)
    1067            0 :         segment = dsm_find_mapping(handle);
    1068              :     else
    1069            2 :         segment = dsm_attach(handle);
    1070              : 
    1071            2 :     if (segment == NULL)
    1072            0 :         ereport(ERROR,
    1073              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1074              :                  errmsg("could not attach to dynamic shared area")));
    1075              : 
    1076            2 :     control = (dsa_area_control *) dsm_segment_address(segment);
    1077              : 
    1078            2 :     LWLockAcquire(&control->lock, LW_SHARED);
    1079            2 :     size = control->total_segment_size;
    1080            2 :     LWLockRelease(&control->lock);
    1081              : 
    1082            2 :     if (!already_attached)
    1083            2 :         dsm_detach(segment);
    1084              : 
    1085            2 :     return size;
    1086              : }
    1087              : 
    1088              : /*
    1089              :  * Aggressively free all spare memory in the hope of returning DSM segments to
    1090              :  * the operating system.
    1091              :  */
    1092              : void
    1093            0 : dsa_trim(dsa_area *area)
    1094              : {
    1095              :     int         size_class;
    1096              : 
    1097              :     /*
    1098              :      * Trim in reverse pool order so we get to the spans-of-spans last, just
    1099              :      * in case any become entirely free while processing all the other pools.
    1100              :      */
    1101            0 :     for (size_class = DSA_NUM_SIZE_CLASSES - 1; size_class >= 0; --size_class)
    1102              :     {
    1103            0 :         dsa_area_pool *pool = &area->control->pools[size_class];
    1104              :         dsa_pointer span_pointer;
    1105              : 
    1106            0 :         if (size_class == DSA_SCLASS_SPAN_LARGE)
    1107              :         {
    1108              :             /* Large object frees give back segments aggressively already. */
    1109            0 :             continue;
    1110              :         }
    1111              : 
    1112              :         /*
    1113              :          * Search fullness class 1 only.  That is where we expect to find an
    1114              :          * entirely empty superblock (entirely empty superblocks in other
    1115              :          * fullness classes are returned to the free page map by dsa_free).
    1116              :          */
    1117            0 :         LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
    1118            0 :         span_pointer = pool->spans[1];
    1119            0 :         while (DsaPointerIsValid(span_pointer))
    1120              :         {
    1121            0 :             dsa_area_span *span = dsa_get_address(area, span_pointer);
    1122            0 :             dsa_pointer next = span->nextspan;
    1123              : 
    1124            0 :             if (span->nallocatable == span->nmax)
    1125            0 :                 destroy_superblock(area, span_pointer);
    1126              : 
    1127            0 :             span_pointer = next;
    1128              :         }
    1129            0 :         LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
    1130              :     }
    1131            0 : }
    1132              : 
    1133              : /*
    1134              :  * Print out debugging information about the internal state of the shared
    1135              :  * memory area.
    1136              :  */
    1137              : void
    1138            0 : dsa_dump(dsa_area *area)
    1139              : {
    1140              :     size_t      i,
    1141              :                 j;
    1142              : 
    1143              :     /*
    1144              :      * Note: This gives an inconsistent snapshot as it acquires and releases
    1145              :      * individual locks as it goes...
    1146              :      */
    1147              : 
    1148            0 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1149            0 :     check_for_freed_segments_locked(area);
    1150            0 :     fprintf(stderr, "dsa_area handle %x:\n", area->control->handle);
    1151            0 :     fprintf(stderr, "  max_total_segment_size: %zu\n",
    1152            0 :             area->control->max_total_segment_size);
    1153            0 :     fprintf(stderr, "  total_segment_size: %zu\n",
    1154            0 :             area->control->total_segment_size);
    1155            0 :     fprintf(stderr, "  refcnt: %d\n", area->control->refcnt);
    1156            0 :     fprintf(stderr, "  pinned: %c\n", area->control->pinned ? 't' : 'f');
    1157            0 :     fprintf(stderr, "  segment bins:\n");
    1158            0 :     for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
    1159              :     {
    1160            0 :         if (area->control->segment_bins[i] != DSA_SEGMENT_INDEX_NONE)
    1161              :         {
    1162              :             dsa_segment_index segment_index;
    1163              : 
    1164            0 :             if (i == 0)
    1165            0 :                 fprintf(stderr,
    1166              :                         "    segment bin %zu (no contiguous free pages):\n", i);
    1167              :             else
    1168            0 :                 fprintf(stderr,
    1169              :                         "    segment bin %zu (at least %d contiguous pages free):\n",
    1170            0 :                         i, 1 << (i - 1));
    1171            0 :             segment_index = area->control->segment_bins[i];
    1172            0 :             while (segment_index != DSA_SEGMENT_INDEX_NONE)
    1173              :             {
    1174              :                 dsa_segment_map *segment_map;
    1175              : 
    1176              :                 segment_map =
    1177            0 :                     get_segment_by_index(area, segment_index);
    1178              : 
    1179            0 :                 fprintf(stderr,
    1180              :                         "      segment index %zu, usable_pages = %zu, "
    1181              :                         "contiguous_pages = %zu, mapped at %p\n",
    1182              :                         segment_index,
    1183            0 :                         segment_map->header->usable_pages,
    1184            0 :                         fpm_largest(segment_map->fpm),
    1185              :                         segment_map->mapped_address);
    1186            0 :                 segment_index = segment_map->header->next;
    1187              :             }
    1188              :         }
    1189              :     }
    1190            0 :     LWLockRelease(DSA_AREA_LOCK(area));
    1191              : 
    1192            0 :     fprintf(stderr, "  pools:\n");
    1193            0 :     for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
    1194              :     {
    1195            0 :         bool        found = false;
    1196              : 
    1197            0 :         LWLockAcquire(DSA_SCLASS_LOCK(area, i), LW_EXCLUSIVE);
    1198            0 :         for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
    1199            0 :             if (DsaPointerIsValid(area->control->pools[i].spans[j]))
    1200            0 :                 found = true;
    1201            0 :         if (found)
    1202              :         {
    1203            0 :             if (i == DSA_SCLASS_BLOCK_OF_SPANS)
    1204            0 :                 fprintf(stderr, "    pool for blocks of span objects:\n");
    1205            0 :             else if (i == DSA_SCLASS_SPAN_LARGE)
    1206            0 :                 fprintf(stderr, "    pool for large object spans:\n");
    1207              :             else
    1208            0 :                 fprintf(stderr,
    1209              :                         "    pool for size class %zu (object size %hu bytes):\n",
    1210            0 :                         i, dsa_size_classes[i]);
    1211            0 :             for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
    1212              :             {
    1213            0 :                 if (!DsaPointerIsValid(area->control->pools[i].spans[j]))
    1214            0 :                     fprintf(stderr, "      fullness class %zu is empty\n", j);
    1215              :                 else
    1216              :                 {
    1217            0 :                     dsa_pointer span_pointer = area->control->pools[i].spans[j];
    1218              : 
    1219            0 :                     fprintf(stderr, "      fullness class %zu:\n", j);
    1220            0 :                     while (DsaPointerIsValid(span_pointer))
    1221              :                     {
    1222              :                         dsa_area_span *span;
    1223              : 
    1224            0 :                         span = dsa_get_address(area, span_pointer);
    1225            0 :                         fprintf(stderr,
    1226              :                                 "        span descriptor at "
    1227              :                                 DSA_POINTER_FORMAT ", superblock at "
    1228              :                                 DSA_POINTER_FORMAT
    1229              :                                 ", pages = %zu, objects free = %hu/%hu\n",
    1230              :                                 span_pointer, span->start, span->npages,
    1231            0 :                                 span->nallocatable, span->nmax);
    1232            0 :                         span_pointer = span->nextspan;
    1233              :                     }
    1234              :                 }
    1235              :             }
    1236              :         }
    1237            0 :         LWLockRelease(DSA_SCLASS_LOCK(area, i));
    1238              :     }
    1239            0 : }
    1240              : 
    1241              : /*
    1242              :  * Return the smallest size that you can successfully provide to
    1243              :  * dsa_create_in_place.
    1244              :  */
    1245              : size_t
    1246         2103 : dsa_minimum_size(void)
    1247              : {
    1248              :     size_t      size;
    1249         2103 :     int         pages = 0;
    1250              : 
    1251         2103 :     size = MAXALIGN(sizeof(dsa_area_control)) +
    1252              :         MAXALIGN(sizeof(FreePageManager));
    1253              : 
    1254              :     /* Figure out how many pages we need, including the page map... */
    1255         6309 :     while (((size + FPM_PAGE_SIZE - 1) / FPM_PAGE_SIZE) > pages)
    1256              :     {
    1257         4206 :         ++pages;
    1258         4206 :         size += sizeof(dsa_pointer);
    1259              :     }
    1260              : 
    1261         2103 :     return pages * FPM_PAGE_SIZE;
    1262              : }
    1263              : 
    1264              : /*
    1265              :  * Workhorse function for dsa_create and dsa_create_in_place.
    1266              :  */
    1267              : static dsa_area *
    1268         1720 : create_internal(void *place, size_t size,
    1269              :                 int tranche_id,
    1270              :                 dsm_handle control_handle,
    1271              :                 dsm_segment *control_segment,
    1272              :                 size_t init_segment_size, size_t max_segment_size)
    1273              : {
    1274              :     dsa_area_control *control;
    1275              :     dsa_area   *area;
    1276              :     dsa_segment_map *segment_map;
    1277              :     size_t      usable_pages;
    1278              :     size_t      total_pages;
    1279              :     size_t      metadata_bytes;
    1280              :     int         i;
    1281              : 
    1282              :     /* Check the initial and maximum block sizes */
    1283              :     Assert(init_segment_size >= DSA_MIN_SEGMENT_SIZE);
    1284              :     Assert(max_segment_size >= init_segment_size);
    1285              :     Assert(max_segment_size <= DSA_MAX_SEGMENT_SIZE);
    1286              : 
    1287              :     /* Sanity check on the space we have to work in. */
    1288         1720 :     if (size < dsa_minimum_size())
    1289            0 :         elog(ERROR, "dsa_area space must be at least %zu, but %zu provided",
    1290              :              dsa_minimum_size(), size);
    1291              : 
    1292              :     /* Now figure out how much space is usable */
    1293         1720 :     total_pages = size / FPM_PAGE_SIZE;
    1294         1720 :     metadata_bytes =
    1295              :         MAXALIGN(sizeof(dsa_area_control)) +
    1296         1720 :         MAXALIGN(sizeof(FreePageManager)) +
    1297              :         total_pages * sizeof(dsa_pointer);
    1298              :     /* Add padding up to next page boundary. */
    1299         1720 :     if (metadata_bytes % FPM_PAGE_SIZE != 0)
    1300         1720 :         metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    1301              :     Assert(metadata_bytes <= size);
    1302         1720 :     usable_pages = (size - metadata_bytes) / FPM_PAGE_SIZE;
    1303              : 
    1304              :     /*
    1305              :      * Initialize the dsa_area_control object located at the start of the
    1306              :      * space.
    1307              :      */
    1308         1720 :     control = (dsa_area_control *) place;
    1309         1720 :     memset(place, 0, sizeof(*control));
    1310         1720 :     control->segment_header.magic =
    1311         1720 :         DSA_SEGMENT_HEADER_MAGIC ^ control_handle ^ 0;
    1312         1720 :     control->segment_header.next = DSA_SEGMENT_INDEX_NONE;
    1313         1720 :     control->segment_header.prev = DSA_SEGMENT_INDEX_NONE;
    1314         1720 :     control->segment_header.usable_pages = usable_pages;
    1315         1720 :     control->segment_header.freed = false;
    1316         1720 :     control->segment_header.size = size;
    1317         1720 :     control->handle = control_handle;
    1318         1720 :     control->init_segment_size = init_segment_size;
    1319         1720 :     control->max_segment_size = max_segment_size;
    1320         1720 :     control->max_total_segment_size = (size_t) -1;
    1321         1720 :     control->total_segment_size = size;
    1322         1720 :     control->segment_handles[0] = control_handle;
    1323        29240 :     for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
    1324        27520 :         control->segment_bins[i] = DSA_SEGMENT_INDEX_NONE;
    1325         1720 :     control->refcnt = 1;
    1326         1720 :     control->lwlock_tranche_id = tranche_id;
    1327              : 
    1328              :     /*
    1329              :      * Create the dsa_area object that this backend will use to access the
    1330              :      * area.  Other backends will need to obtain their own dsa_area object by
    1331              :      * attaching.
    1332              :      */
    1333         1720 :     area = palloc_object(dsa_area);
    1334         1720 :     area->control = control;
    1335         1720 :     area->resowner = CurrentResourceOwner;
    1336         1720 :     memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
    1337         1720 :     area->high_segment_index = 0;
    1338         1720 :     area->freed_segment_counter = 0;
    1339         1720 :     LWLockInitialize(&control->lock, control->lwlock_tranche_id);
    1340        67080 :     for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
    1341        65360 :         LWLockInitialize(DSA_SCLASS_LOCK(area, i),
    1342              :                          control->lwlock_tranche_id);
    1343              : 
    1344              :     /* Set up the segment map for this process's mapping. */
    1345         1720 :     segment_map = &area->segment_maps[0];
    1346         1720 :     segment_map->segment = control_segment;
    1347         1720 :     segment_map->mapped_address = place;
    1348         1720 :     segment_map->header = (dsa_segment_header *) place;
    1349         1720 :     segment_map->fpm = (FreePageManager *)
    1350         1720 :         (segment_map->mapped_address +
    1351              :          MAXALIGN(sizeof(dsa_area_control)));
    1352         1720 :     segment_map->pagemap = (dsa_pointer *)
    1353         1720 :         (segment_map->mapped_address +
    1354         1720 :          MAXALIGN(sizeof(dsa_area_control)) +
    1355              :          MAXALIGN(sizeof(FreePageManager)));
    1356              : 
    1357              :     /* Set up the free page map. */
    1358         1720 :     FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
    1359              :     /* There can be 0 usable pages if size is dsa_minimum_size(). */
    1360              : 
    1361         1720 :     if (usable_pages > 0)
    1362         1337 :         FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
    1363              :                            usable_pages);
    1364              : 
    1365              :     /* Put this segment into the appropriate bin. */
    1366         1720 :     control->segment_bins[contiguous_pages_to_segment_bin(usable_pages)] = 0;
    1367         1720 :     segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
    1368              : 
    1369         1720 :     return area;
    1370              : }
    1371              : 
    1372              : /*
    1373              :  * Workhorse function for dsa_attach and dsa_attach_in_place.
    1374              :  */
    1375              : static dsa_area *
    1376        26330 : attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
    1377              : {
    1378              :     dsa_area_control *control;
    1379              :     dsa_area   *area;
    1380              :     dsa_segment_map *segment_map;
    1381              : 
    1382        26330 :     control = (dsa_area_control *) place;
    1383              :     Assert(control->handle == handle);
    1384              :     Assert(control->segment_handles[0] == handle);
    1385              :     Assert(control->segment_header.magic ==
    1386              :            (DSA_SEGMENT_HEADER_MAGIC ^ handle ^ 0));
    1387              : 
    1388              :     /* Build the backend-local area object. */
    1389        26330 :     area = palloc_object(dsa_area);
    1390        26330 :     area->control = control;
    1391        26330 :     area->resowner = CurrentResourceOwner;
    1392        26330 :     memset(&area->segment_maps[0], 0,
    1393              :            sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
    1394        26330 :     area->high_segment_index = 0;
    1395              : 
    1396              :     /* Set up the segment map for this process's mapping. */
    1397        26330 :     segment_map = &area->segment_maps[0];
    1398        26330 :     segment_map->segment = segment; /* NULL for in-place */
    1399        26330 :     segment_map->mapped_address = place;
    1400        26330 :     segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
    1401        26330 :     segment_map->fpm = (FreePageManager *)
    1402        26330 :         (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)));
    1403        26330 :     segment_map->pagemap = (dsa_pointer *)
    1404        26330 :         (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)) +
    1405              :          MAXALIGN(sizeof(FreePageManager)));
    1406              : 
    1407              :     /* Bump the reference count. */
    1408        26330 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1409        26330 :     if (control->refcnt == 0)
    1410              :     {
    1411              :         /* We can't attach to a DSA area that has already been destroyed. */
    1412            0 :         ereport(ERROR,
    1413              :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1414              :                  errmsg("could not attach to dynamic shared area")));
    1415              :     }
    1416        26330 :     ++control->refcnt;
    1417        26330 :     area->freed_segment_counter = area->control->freed_segment_counter;
    1418        26330 :     LWLockRelease(DSA_AREA_LOCK(area));
    1419              : 
    1420        26330 :     return area;
    1421              : }
    1422              : 
    1423              : /*
    1424              :  * Add a new span to fullness class 1 of the indicated pool.
    1425              :  */
    1426              : static void
    1427        13957 : init_span(dsa_area *area,
    1428              :           dsa_pointer span_pointer,
    1429              :           dsa_area_pool *pool, dsa_pointer start, size_t npages,
    1430              :           uint16 size_class)
    1431              : {
    1432        13957 :     dsa_area_span *span = dsa_get_address(area, span_pointer);
    1433        13957 :     size_t      obsize = dsa_size_classes[size_class];
    1434              : 
    1435              :     /*
    1436              :      * The per-pool lock must be held because we manipulate the span list for
    1437              :      * this pool.
    1438              :      */
    1439              :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1440              : 
    1441              :     /* Push this span onto the front of the span list for fullness class 1. */
    1442        13957 :     if (DsaPointerIsValid(pool->spans[1]))
    1443              :     {
    1444              :         dsa_area_span *head = (dsa_area_span *)
    1445         2048 :             dsa_get_address(area, pool->spans[1]);
    1446              : 
    1447         2048 :         head->prevspan = span_pointer;
    1448              :     }
    1449        13957 :     span->pool = DsaAreaPoolToDsaPointer(area, pool);
    1450        13957 :     span->nextspan = pool->spans[1];
    1451        13957 :     span->prevspan = InvalidDsaPointer;
    1452        13957 :     pool->spans[1] = span_pointer;
    1453              : 
    1454        13957 :     span->start = start;
    1455        13957 :     span->npages = npages;
    1456        13957 :     span->size_class = size_class;
    1457        13957 :     span->ninitialized = 0;
    1458        13957 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1459              :     {
    1460              :         /*
    1461              :          * A block-of-spans contains its own descriptor, so mark one object as
    1462              :          * initialized and reduce the count of allocatable objects by one.
    1463              :          * Doing this here has the side effect of also reducing nmax by one,
    1464              :          * which is important to make sure we free this object at the correct
    1465              :          * time.
    1466              :          */
    1467         1423 :         span->ninitialized = 1;
    1468         1423 :         span->nallocatable = FPM_PAGE_SIZE / obsize - 1;
    1469              :     }
    1470        12534 :     else if (size_class != DSA_SCLASS_SPAN_LARGE)
    1471         9651 :         span->nallocatable = DSA_SUPERBLOCK_SIZE / obsize;
    1472        13957 :     span->firstfree = DSA_SPAN_NOTHING_FREE;
    1473        13957 :     span->nmax = span->nallocatable;
    1474        13957 :     span->fclass = 1;
    1475        13957 : }
    1476              : 
    1477              : /*
    1478              :  * Transfer the first span in one fullness class to the head of another
    1479              :  * fullness class.
    1480              :  */
    1481              : static bool
    1482        23324 : transfer_first_span(dsa_area *area,
    1483              :                     dsa_area_pool *pool, int fromclass, int toclass)
    1484              : {
    1485              :     dsa_pointer span_pointer;
    1486              :     dsa_area_span *span;
    1487              :     dsa_area_span *nextspan;
    1488              : 
    1489              :     /* Can't do it if source list is empty. */
    1490        23324 :     span_pointer = pool->spans[fromclass];
    1491        23324 :     if (!DsaPointerIsValid(span_pointer))
    1492        22150 :         return false;
    1493              : 
    1494              :     /* Remove span from head of source list. */
    1495         1174 :     span = dsa_get_address(area, span_pointer);
    1496         1174 :     pool->spans[fromclass] = span->nextspan;
    1497         1174 :     if (DsaPointerIsValid(span->nextspan))
    1498              :     {
    1499              :         nextspan = (dsa_area_span *)
    1500           67 :             dsa_get_address(area, span->nextspan);
    1501           67 :         nextspan->prevspan = InvalidDsaPointer;
    1502              :     }
    1503              : 
    1504              :     /* Add span to head of target list. */
    1505         1174 :     span->nextspan = pool->spans[toclass];
    1506         1174 :     pool->spans[toclass] = span_pointer;
    1507         1174 :     if (DsaPointerIsValid(span->nextspan))
    1508              :     {
    1509              :         nextspan = (dsa_area_span *)
    1510          368 :             dsa_get_address(area, span->nextspan);
    1511          368 :         nextspan->prevspan = span_pointer;
    1512              :     }
    1513         1174 :     span->fclass = toclass;
    1514              : 
    1515         1174 :     return true;
    1516              : }
    1517              : 
    1518              : /*
    1519              :  * Allocate one object of the requested size class from the given area.
    1520              :  */
    1521              : static inline dsa_pointer
    1522       700238 : alloc_object(dsa_area *area, int size_class)
    1523              : {
    1524       700238 :     dsa_area_pool *pool = &area->control->pools[size_class];
    1525              :     dsa_area_span *span;
    1526              :     dsa_pointer block;
    1527              :     dsa_pointer result;
    1528              :     char       *object;
    1529              :     size_t      size;
    1530              : 
    1531              :     /*
    1532              :      * Even though ensure_active_superblock can in turn call alloc_object if
    1533              :      * it needs to allocate a new span, that's always from a different pool,
    1534              :      * and the order of lock acquisition is always the same, so it's OK that
    1535              :      * we hold this lock for the duration of this function.
    1536              :      */
    1537              :     Assert(!LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1538       700238 :     LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
    1539              : 
    1540              :     /*
    1541              :      * If there's no active superblock, we must successfully obtain one or
    1542              :      * fail the request.
    1543              :      */
    1544       700238 :     if (!DsaPointerIsValid(pool->spans[1]) &&
    1545        11168 :         !ensure_active_superblock(area, pool, size_class))
    1546              :     {
    1547            0 :         result = InvalidDsaPointer;
    1548              :     }
    1549              :     else
    1550              :     {
    1551              :         /*
    1552              :          * There should be a block in fullness class 1 at this point, and it
    1553              :          * should never be completely full.  Thus we can either pop an object
    1554              :          * from the free list or, failing that, initialize a new object.
    1555              :          */
    1556              :         Assert(DsaPointerIsValid(pool->spans[1]));
    1557              :         span = (dsa_area_span *)
    1558       700238 :             dsa_get_address(area, pool->spans[1]);
    1559              :         Assert(span->nallocatable > 0);
    1560       700238 :         block = span->start;
    1561              :         Assert(size_class < DSA_NUM_SIZE_CLASSES);
    1562       700238 :         size = dsa_size_classes[size_class];
    1563       700238 :         if (span->firstfree != DSA_SPAN_NOTHING_FREE)
    1564              :         {
    1565       105657 :             result = block + span->firstfree * size;
    1566       105657 :             object = dsa_get_address(area, result);
    1567       105657 :             span->firstfree = NextFreeObjectIndex(object);
    1568              :         }
    1569              :         else
    1570              :         {
    1571       594581 :             result = block + span->ninitialized * size;
    1572       594581 :             ++span->ninitialized;
    1573              :         }
    1574       700238 :         --span->nallocatable;
    1575              : 
    1576              :         /* If it's now full, move it to the highest-numbered fullness class. */
    1577       700238 :         if (span->nallocatable == 0)
    1578         1081 :             transfer_first_span(area, pool, 1, DSA_FULLNESS_CLASSES - 1);
    1579              :     }
    1580              : 
    1581              :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1582       700238 :     LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
    1583              : 
    1584       700238 :     return result;
    1585              : }
    1586              : 
    1587              : /*
    1588              :  * Ensure an active (i.e. fullness class 1) superblock, unless all existing
    1589              :  * superblocks are completely full and no more can be allocated.
    1590              :  *
    1591              :  * Fullness classes K of 0..N are loosely intended to represent blocks whose
    1592              :  * utilization percentage is at least K/N, but we only enforce this rigorously
    1593              :  * for the highest-numbered fullness class, which always contains exactly
    1594              :  * those blocks that are completely full.  It's otherwise acceptable for a
    1595              :  * block to be in a higher-numbered fullness class than the one to which it
    1596              :  * logically belongs.  In addition, the active block, which is always the
    1597              :  * first block in fullness class 1, is permitted to have a higher allocation
    1598              :  * percentage than would normally be allowable for that fullness class; we
    1599              :  * don't move it until it's completely full, and then it goes to the
    1600              :  * highest-numbered fullness class.
    1601              :  *
    1602              :  * It might seem odd that the active block is the head of fullness class 1
    1603              :  * rather than fullness class 0, but experience with other allocators has
    1604              :  * shown that it's usually better to allocate from a block that's moderately
    1605              :  * full rather than one that's nearly empty.  Insofar as is reasonably
    1606              :  * possible, we want to avoid performing new allocations in a block that would
    1607              :  * otherwise become empty soon.
    1608              :  */
    1609              : static bool
    1610        11168 : ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
    1611              :                          int size_class)
    1612              : {
    1613              :     dsa_pointer span_pointer;
    1614              :     dsa_pointer start_pointer;
    1615        11168 :     size_t      obsize = dsa_size_classes[size_class];
    1616              :     size_t      nmax;
    1617              :     int         fclass;
    1618        11168 :     size_t      npages = 1;
    1619              :     size_t      first_page;
    1620              :     size_t      i;
    1621              :     dsa_segment_map *segment_map;
    1622              : 
    1623              :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1624              : 
    1625              :     /*
    1626              :      * Compute the number of objects that will fit in a block of this size
    1627              :      * class.  Span-of-spans blocks are just a single page, and the first
    1628              :      * object isn't available for use because it describes the block-of-spans
    1629              :      * itself.
    1630              :      */
    1631        11168 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1632         1423 :         nmax = FPM_PAGE_SIZE / obsize - 1;
    1633              :     else
    1634         9745 :         nmax = DSA_SUPERBLOCK_SIZE / obsize;
    1635              : 
    1636              :     /*
    1637              :      * If fullness class 1 is empty, try to find a span to put in it by
    1638              :      * scanning higher-numbered fullness classes (excluding the last one,
    1639              :      * whose blocks are certain to all be completely full).
    1640              :      */
    1641        22335 :     for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
    1642              :     {
    1643        11168 :         span_pointer = pool->spans[fclass];
    1644              : 
    1645        11357 :         while (DsaPointerIsValid(span_pointer))
    1646              :         {
    1647              :             int         tfclass;
    1648              :             dsa_area_span *span;
    1649              :             dsa_area_span *nextspan;
    1650              :             dsa_area_span *prevspan;
    1651              :             dsa_pointer next_span_pointer;
    1652              : 
    1653              :             span = (dsa_area_span *)
    1654          189 :                 dsa_get_address(area, span_pointer);
    1655          189 :             next_span_pointer = span->nextspan;
    1656              : 
    1657              :             /* Figure out what fullness class should contain this span. */
    1658          189 :             tfclass = (nmax - span->nallocatable)
    1659          189 :                 * (DSA_FULLNESS_CLASSES - 1) / nmax;
    1660              : 
    1661              :             /* Look up next span. */
    1662          189 :             if (DsaPointerIsValid(span->nextspan))
    1663              :                 nextspan = (dsa_area_span *)
    1664           96 :                     dsa_get_address(area, span->nextspan);
    1665              :             else
    1666           93 :                 nextspan = NULL;
    1667              : 
    1668              :             /*
    1669              :              * If utilization has dropped enough that this now belongs in some
    1670              :              * other fullness class, move it there.
    1671              :              */
    1672          189 :             if (tfclass < fclass)
    1673              :             {
    1674              :                 /* Remove from the current fullness class list. */
    1675            3 :                 if (pool->spans[fclass] == span_pointer)
    1676              :                 {
    1677              :                     /* It was the head; remove it. */
    1678              :                     Assert(!DsaPointerIsValid(span->prevspan));
    1679            3 :                     pool->spans[fclass] = span->nextspan;
    1680            3 :                     if (nextspan != NULL)
    1681            1 :                         nextspan->prevspan = InvalidDsaPointer;
    1682              :                 }
    1683              :                 else
    1684              :                 {
    1685              :                     /* It was not the head. */
    1686              :                     Assert(DsaPointerIsValid(span->prevspan));
    1687              :                     prevspan = (dsa_area_span *)
    1688            0 :                         dsa_get_address(area, span->prevspan);
    1689            0 :                     prevspan->nextspan = span->nextspan;
    1690              :                 }
    1691            3 :                 if (nextspan != NULL)
    1692            1 :                     nextspan->prevspan = span->prevspan;
    1693              : 
    1694              :                 /* Push onto the head of the new fullness class list. */
    1695            3 :                 span->nextspan = pool->spans[tfclass];
    1696            3 :                 pool->spans[tfclass] = span_pointer;
    1697            3 :                 span->prevspan = InvalidDsaPointer;
    1698            3 :                 if (DsaPointerIsValid(span->nextspan))
    1699              :                 {
    1700              :                     nextspan = (dsa_area_span *)
    1701            1 :                         dsa_get_address(area, span->nextspan);
    1702            1 :                     nextspan->prevspan = span_pointer;
    1703              :                 }
    1704            3 :                 span->fclass = tfclass;
    1705              :             }
    1706              : 
    1707              :             /* Advance to next span on list. */
    1708          189 :             span_pointer = next_span_pointer;
    1709              :         }
    1710              : 
    1711              :         /* Stop now if we found a suitable block. */
    1712        11168 :         if (DsaPointerIsValid(pool->spans[1]))
    1713            1 :             return true;
    1714              :     }
    1715              : 
    1716              :     /*
    1717              :      * If there are no blocks that properly belong in fullness class 1, pick
    1718              :      * one from some other fullness class and move it there anyway, so that we
    1719              :      * have an allocation target.  Our last choice is to transfer a block
    1720              :      * that's almost empty (and might become completely empty soon if left
    1721              :      * alone), but even that is better than failing, which is what we must do
    1722              :      * if there are no blocks at all with freespace.
    1723              :      */
    1724              :     Assert(!DsaPointerIsValid(pool->spans[1]));
    1725        22243 :     for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
    1726        11167 :         if (transfer_first_span(area, pool, fclass, 1))
    1727           91 :             return true;
    1728        22152 :     if (!DsaPointerIsValid(pool->spans[1]) &&
    1729        11076 :         transfer_first_span(area, pool, 0, 1))
    1730            2 :         return true;
    1731              : 
    1732              :     /*
    1733              :      * We failed to find an existing span with free objects, so we need to
    1734              :      * allocate a new superblock and construct a new span to manage it.
    1735              :      *
    1736              :      * First, get a dsa_area_span object to describe the new superblock block
    1737              :      * ... unless this allocation is for a dsa_area_span object, in which case
    1738              :      * that's surely not going to work.  We handle that case by storing the
    1739              :      * span describing a block-of-spans inline.
    1740              :      */
    1741        11074 :     if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
    1742              :     {
    1743         9651 :         span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
    1744         9651 :         if (!DsaPointerIsValid(span_pointer))
    1745            0 :             return false;
    1746         9651 :         npages = DSA_PAGES_PER_SUPERBLOCK;
    1747              :     }
    1748              : 
    1749              :     /* Find or create a segment and allocate the superblock. */
    1750        11074 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1751        11074 :     segment_map = get_best_segment(area, npages);
    1752        11074 :     if (segment_map == NULL)
    1753              :     {
    1754         1075 :         segment_map = make_new_segment(area, npages);
    1755         1075 :         if (segment_map == NULL)
    1756              :         {
    1757            0 :             LWLockRelease(DSA_AREA_LOCK(area));
    1758            0 :             return false;
    1759              :         }
    1760              :     }
    1761              : 
    1762              :     /*
    1763              :      * This shouldn't happen: get_best_segment() or make_new_segment()
    1764              :      * promised that we can successfully allocate npages.
    1765              :      */
    1766        11074 :     if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
    1767            0 :         elog(FATAL,
    1768              :              "dsa_allocate could not find %zu free pages for superblock",
    1769              :              npages);
    1770        11074 :     LWLockRelease(DSA_AREA_LOCK(area));
    1771              : 
    1772              :     /* Compute the start of the superblock. */
    1773        11074 :     start_pointer =
    1774        11074 :         DSA_MAKE_POINTER(get_segment_index(area, segment_map),
    1775              :                          first_page * FPM_PAGE_SIZE);
    1776              : 
    1777              :     /*
    1778              :      * If this is a block-of-spans, carve the descriptor right out of the
    1779              :      * allocated space.
    1780              :      */
    1781        11074 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1782              :     {
    1783              :         /*
    1784              :          * We have a pointer into the segment.  We need to build a dsa_pointer
    1785              :          * from the segment index and offset into the segment.
    1786              :          */
    1787         1423 :         span_pointer = start_pointer;
    1788              :     }
    1789              : 
    1790              :     /* Initialize span and pagemap. */
    1791        11074 :     init_span(area, span_pointer, pool, start_pointer, npages, size_class);
    1792       166913 :     for (i = 0; i < npages; ++i)
    1793       155839 :         segment_map->pagemap[first_page + i] = span_pointer;
    1794              : 
    1795        11074 :     return true;
    1796              : }
    1797              : 
    1798              : /*
    1799              :  * Return the segment map corresponding to a given segment index, mapping the
    1800              :  * segment in if necessary.  For internal segment book-keeping, this is called
    1801              :  * with the area lock held.  It is also called by dsa_free and dsa_get_address
    1802              :  * without any locking, relying on the fact they have a known live segment
    1803              :  * index and they always call check_for_freed_segments to ensures that any
    1804              :  * freed segment occupying the same slot is detached first.
    1805              :  */
    1806              : static dsa_segment_map *
    1807       162179 : get_segment_by_index(dsa_area *area, dsa_segment_index index)
    1808              : {
    1809       162179 :     if (unlikely(area->segment_maps[index].mapped_address == NULL))
    1810              :     {
    1811              :         dsm_handle  handle;
    1812              :         dsm_segment *segment;
    1813              :         dsa_segment_map *segment_map;
    1814              :         ResourceOwner oldowner;
    1815              : 
    1816              :         /*
    1817              :          * If we are reached by dsa_free or dsa_get_address, there must be at
    1818              :          * least one object allocated in the referenced segment.  Otherwise,
    1819              :          * their caller has a double-free or access-after-free bug, which we
    1820              :          * have no hope of detecting.  So we know it's safe to access this
    1821              :          * array slot without holding a lock; it won't change underneath us.
    1822              :          * Furthermore, we know that we can see the latest contents of the
    1823              :          * slot, as explained in check_for_freed_segments, which those
    1824              :          * functions call before arriving here.
    1825              :          */
    1826        20175 :         handle = area->control->segment_handles[index];
    1827              : 
    1828              :         /* It's an error to try to access an unused slot. */
    1829        20175 :         if (handle == DSM_HANDLE_INVALID)
    1830            0 :             elog(ERROR,
    1831              :                  "dsa_area could not attach to a segment that has been freed");
    1832              : 
    1833        20175 :         oldowner = CurrentResourceOwner;
    1834        20175 :         CurrentResourceOwner = area->resowner;
    1835        20175 :         segment = dsm_attach(handle);
    1836        20175 :         CurrentResourceOwner = oldowner;
    1837        20175 :         if (segment == NULL)
    1838            0 :             elog(ERROR, "dsa_area could not attach to segment");
    1839        20175 :         segment_map = &area->segment_maps[index];
    1840        20175 :         segment_map->segment = segment;
    1841        20175 :         segment_map->mapped_address = dsm_segment_address(segment);
    1842        20175 :         segment_map->header =
    1843        20175 :             (dsa_segment_header *) segment_map->mapped_address;
    1844        20175 :         segment_map->fpm = (FreePageManager *)
    1845        20175 :             (segment_map->mapped_address +
    1846              :              MAXALIGN(sizeof(dsa_segment_header)));
    1847        20175 :         segment_map->pagemap = (dsa_pointer *)
    1848        20175 :             (segment_map->mapped_address +
    1849        20175 :              MAXALIGN(sizeof(dsa_segment_header)) +
    1850              :              MAXALIGN(sizeof(FreePageManager)));
    1851              : 
    1852              :         /* Remember the highest index this backend has ever mapped. */
    1853        20175 :         if (area->high_segment_index < index)
    1854        20175 :             area->high_segment_index = index;
    1855              : 
    1856              :         Assert(segment_map->header->magic ==
    1857              :                (DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ index));
    1858              :     }
    1859              : 
    1860              :     /*
    1861              :      * Callers of dsa_get_address() and dsa_free() don't hold the area lock,
    1862              :      * but it's a bug in the calling code and undefined behavior if the
    1863              :      * address is not live (ie if the segment might possibly have been freed,
    1864              :      * they're trying to use a dangling pointer).
    1865              :      *
    1866              :      * For dsa.c code that holds the area lock to manipulate segment_bins
    1867              :      * lists, it would be a bug if we ever reach a freed segment here.  After
    1868              :      * it's marked as freed, the only thing any backend should do with it is
    1869              :      * unmap it, and it should always have done that in
    1870              :      * check_for_freed_segments_locked() before arriving here to resolve an
    1871              :      * index to a segment_map.
    1872              :      *
    1873              :      * Either way we can assert that we aren't returning a freed segment.
    1874              :      */
    1875              :     Assert(!area->segment_maps[index].header->freed);
    1876              : 
    1877       162179 :     return &area->segment_maps[index];
    1878              : }
    1879              : 
    1880              : /*
    1881              :  * Return a superblock to the free page manager.  If the underlying segment
    1882              :  * has become entirely free, then return it to the operating system.
    1883              :  *
    1884              :  * The appropriate pool lock must be held.
    1885              :  */
    1886              : static void
    1887            8 : destroy_superblock(dsa_area *area, dsa_pointer span_pointer)
    1888              : {
    1889            8 :     dsa_area_span *span = dsa_get_address(area, span_pointer);
    1890            8 :     int         size_class = span->size_class;
    1891              :     dsa_segment_map *segment_map;
    1892              : 
    1893              : 
    1894              :     /* Remove it from its fullness class list. */
    1895            8 :     unlink_span(area, span);
    1896              : 
    1897              :     /*
    1898              :      * Note: Here we acquire the area lock while we already hold a per-pool
    1899              :      * lock.  We never hold the area lock and then take a pool lock, or we
    1900              :      * could deadlock.
    1901              :      */
    1902            8 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1903            8 :     check_for_freed_segments_locked(area);
    1904              :     segment_map =
    1905            8 :         get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(span->start));
    1906            8 :     FreePageManagerPut(segment_map->fpm,
    1907            8 :                        DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
    1908              :                        span->npages);
    1909              :     /* Check if the segment is now entirely free. */
    1910            8 :     if (fpm_largest(segment_map->fpm) == segment_map->header->usable_pages)
    1911              :     {
    1912            0 :         dsa_segment_index index = get_segment_index(area, segment_map);
    1913              : 
    1914              :         /* If it's not the segment with extra control data, free it. */
    1915            0 :         if (index != 0)
    1916              :         {
    1917              :             /*
    1918              :              * Give it back to the OS, and allow other backends to detect that
    1919              :              * they need to detach.
    1920              :              */
    1921            0 :             unlink_segment(area, segment_map);
    1922            0 :             segment_map->header->freed = true;
    1923              :             Assert(area->control->total_segment_size >=
    1924              :                    segment_map->header->size);
    1925            0 :             area->control->total_segment_size -=
    1926            0 :                 segment_map->header->size;
    1927            0 :             dsm_unpin_segment(dsm_segment_handle(segment_map->segment));
    1928            0 :             dsm_detach(segment_map->segment);
    1929            0 :             area->control->segment_handles[index] = DSM_HANDLE_INVALID;
    1930            0 :             ++area->control->freed_segment_counter;
    1931            0 :             segment_map->segment = NULL;
    1932            0 :             segment_map->header = NULL;
    1933            0 :             segment_map->mapped_address = NULL;
    1934              :         }
    1935              :     }
    1936              : 
    1937              :     /* Move segment to appropriate bin if necessary. */
    1938            8 :     if (segment_map->header != NULL)
    1939            8 :         rebin_segment(area, segment_map);
    1940              : 
    1941            8 :     LWLockRelease(DSA_AREA_LOCK(area));
    1942              : 
    1943              :     /*
    1944              :      * Span-of-spans blocks store the span which describes them within the
    1945              :      * block itself, so freeing the storage implicitly frees the descriptor
    1946              :      * also.  If this is a block of any other type, we need to separately free
    1947              :      * the span object also.  This recursive call to dsa_free will acquire the
    1948              :      * span pool's lock.  We can't deadlock because the acquisition order is
    1949              :      * always some other pool and then the span pool.
    1950              :      */
    1951            8 :     if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
    1952            8 :         dsa_free(area, span_pointer);
    1953            8 : }
    1954              : 
    1955              : static void
    1956         2348 : unlink_span(dsa_area *area, dsa_area_span *span)
    1957              : {
    1958         2348 :     if (DsaPointerIsValid(span->nextspan))
    1959              :     {
    1960         1935 :         dsa_area_span *next = dsa_get_address(area, span->nextspan);
    1961              : 
    1962         1935 :         next->prevspan = span->prevspan;
    1963              :     }
    1964         2348 :     if (DsaPointerIsValid(span->prevspan))
    1965              :     {
    1966          967 :         dsa_area_span *prev = dsa_get_address(area, span->prevspan);
    1967              : 
    1968          967 :         prev->nextspan = span->nextspan;
    1969              :     }
    1970              :     else
    1971              :     {
    1972         1381 :         dsa_area_pool *pool = dsa_get_address(area, span->pool);
    1973              : 
    1974         1381 :         pool->spans[span->fclass] = span->nextspan;
    1975              :     }
    1976         2348 : }
    1977              : 
    1978              : static void
    1979          156 : add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
    1980              :                            dsa_pointer span_pointer,
    1981              :                            int fclass)
    1982              : {
    1983          156 :     dsa_area_pool *pool = dsa_get_address(area, span->pool);
    1984              : 
    1985          156 :     if (DsaPointerIsValid(pool->spans[fclass]))
    1986              :     {
    1987           81 :         dsa_area_span *head = dsa_get_address(area,
    1988              :                                               pool->spans[fclass]);
    1989              : 
    1990           81 :         head->prevspan = span_pointer;
    1991              :     }
    1992          156 :     span->prevspan = InvalidDsaPointer;
    1993          156 :     span->nextspan = pool->spans[fclass];
    1994          156 :     pool->spans[fclass] = span_pointer;
    1995          156 :     span->fclass = fclass;
    1996          156 : }
    1997              : 
    1998              : /*
    1999              :  * Detach from an area that was either created or attached to by this process.
    2000              :  */
    2001              : void
    2002        27525 : dsa_detach(dsa_area *area)
    2003              : {
    2004              :     int         i;
    2005              : 
    2006              :     /* Detach from all segments. */
    2007        76294 :     for (i = 0; i <= area->high_segment_index; ++i)
    2008        48769 :         if (area->segment_maps[i].segment != NULL)
    2009        21303 :             dsm_detach(area->segment_maps[i].segment);
    2010              : 
    2011              :     /*
    2012              :      * Note that 'detaching' (= detaching from DSM segments) doesn't include
    2013              :      * 'releasing' (= adjusting the reference count).  It would be nice to
    2014              :      * combine these operations, but client code might never get around to
    2015              :      * calling dsa_detach because of an error path, and a detach hook on any
    2016              :      * particular segment is too late to detach other segments in the area
    2017              :      * without risking a 'leak' warning in the non-error path.
    2018              :      */
    2019              : 
    2020              :     /* Free the backend-local area object. */
    2021        27525 :     pfree(area);
    2022        27525 : }
    2023              : 
    2024              : /*
    2025              :  * Unlink a segment from the bin that contains it.
    2026              :  */
    2027              : static void
    2028         2416 : unlink_segment(dsa_area *area, dsa_segment_map *segment_map)
    2029              : {
    2030         2416 :     if (segment_map->header->prev != DSA_SEGMENT_INDEX_NONE)
    2031              :     {
    2032              :         dsa_segment_map *prev;
    2033              : 
    2034            1 :         prev = get_segment_by_index(area, segment_map->header->prev);
    2035            1 :         prev->header->next = segment_map->header->next;
    2036              :     }
    2037              :     else
    2038              :     {
    2039              :         Assert(area->control->segment_bins[segment_map->header->bin] ==
    2040              :                get_segment_index(area, segment_map));
    2041         2415 :         area->control->segment_bins[segment_map->header->bin] =
    2042         2415 :             segment_map->header->next;
    2043              :     }
    2044         2416 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2045              :     {
    2046              :         dsa_segment_map *next;
    2047              : 
    2048            0 :         next = get_segment_by_index(area, segment_map->header->next);
    2049            0 :         next->header->prev = segment_map->header->prev;
    2050              :     }
    2051         2416 : }
    2052              : 
    2053              : /*
    2054              :  * Find a segment that could satisfy a request for 'npages' of contiguous
    2055              :  * memory, or return NULL if none can be found.  This may involve attaching to
    2056              :  * segments that weren't previously attached so that we can query their free
    2057              :  * pages map.
    2058              :  */
    2059              : static dsa_segment_map *
    2060        13957 : get_best_segment(dsa_area *area, size_t npages)
    2061              : {
    2062              :     size_t      bin;
    2063              : 
    2064              :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2065        13957 :     check_for_freed_segments_locked(area);
    2066              : 
    2067              :     /*
    2068              :      * Start searching from the first bin that *might* have enough contiguous
    2069              :      * pages.
    2070              :      */
    2071        13957 :     for (bin = contiguous_pages_to_segment_bin(npages);
    2072        60750 :          bin < DSA_NUM_SEGMENT_BINS;
    2073        46793 :          ++bin)
    2074              :     {
    2075              :         /*
    2076              :          * The minimum contiguous size that any segment in this bin should
    2077              :          * have.  We'll re-bin if we see segments with fewer.
    2078              :          */
    2079        59654 :         size_t      threshold = (size_t) 1 << (bin - 1);
    2080              :         dsa_segment_index segment_index;
    2081              : 
    2082              :         /* Search this bin for a segment with enough contiguous space. */
    2083        59654 :         segment_index = area->control->segment_bins[bin];
    2084        60666 :         while (segment_index != DSA_SEGMENT_INDEX_NONE)
    2085              :         {
    2086              :             dsa_segment_map *segment_map;
    2087              :             dsa_segment_index next_segment_index;
    2088              :             size_t      contiguous_pages;
    2089              : 
    2090        13873 :             segment_map = get_segment_by_index(area, segment_index);
    2091        13873 :             next_segment_index = segment_map->header->next;
    2092        13873 :             contiguous_pages = fpm_largest(segment_map->fpm);
    2093              : 
    2094              :             /* Not enough for the request, still enough for this bin. */
    2095        13873 :             if (contiguous_pages >= threshold && contiguous_pages < npages)
    2096              :             {
    2097            0 :                 segment_index = next_segment_index;
    2098            0 :                 continue;
    2099              :             }
    2100              : 
    2101              :             /* Re-bin it if it's no longer in the appropriate bin. */
    2102        13873 :             if (contiguous_pages < threshold)
    2103              :             {
    2104         2243 :                 rebin_segment(area, segment_map);
    2105              : 
    2106              :                 /*
    2107              :                  * But fall through to see if it's enough to satisfy this
    2108              :                  * request anyway....
    2109              :                  */
    2110              :             }
    2111              : 
    2112              :             /* Check if we are done. */
    2113        13873 :             if (contiguous_pages >= npages)
    2114        12861 :                 return segment_map;
    2115              : 
    2116              :             /* Continue searching the same bin. */
    2117         1012 :             segment_index = next_segment_index;
    2118              :         }
    2119              :     }
    2120              : 
    2121              :     /* Not found. */
    2122         1096 :     return NULL;
    2123              : }
    2124              : 
    2125              : /*
    2126              :  * Create a new segment that can handle at least requested_pages.  Returns
    2127              :  * NULL if the requested total size limit or maximum allowed number of
    2128              :  * segments would be exceeded.
    2129              :  */
    2130              : static dsa_segment_map *
    2131         1096 : make_new_segment(dsa_area *area, size_t requested_pages)
    2132              : {
    2133              :     dsa_segment_index new_index;
    2134              :     size_t      metadata_bytes;
    2135              :     size_t      total_size;
    2136              :     size_t      total_pages;
    2137              :     size_t      usable_pages;
    2138              :     dsa_segment_map *segment_map;
    2139              :     dsm_segment *segment;
    2140              :     ResourceOwner oldowner;
    2141              : 
    2142              :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2143              : 
    2144              :     /* Find a segment slot that is not in use (linearly for now). */
    2145         1134 :     for (new_index = 1; new_index < DSA_MAX_SEGMENTS; ++new_index)
    2146              :     {
    2147         1134 :         if (area->control->segment_handles[new_index] == DSM_HANDLE_INVALID)
    2148         1096 :             break;
    2149              :     }
    2150         1096 :     if (new_index == DSA_MAX_SEGMENTS)
    2151            0 :         return NULL;
    2152              : 
    2153              :     /*
    2154              :      * If the total size limit is already exceeded, then we exit early and
    2155              :      * avoid arithmetic wraparound in the unsigned expressions below.
    2156              :      */
    2157         1096 :     if (area->control->total_segment_size >=
    2158         1096 :         area->control->max_total_segment_size)
    2159            0 :         return NULL;
    2160              : 
    2161              :     /*
    2162              :      * The size should be at least as big as requested, and at least big
    2163              :      * enough to follow a geometric series that approximately doubles the
    2164              :      * total storage each time we create a new segment.  We use geometric
    2165              :      * growth because the underlying DSM system isn't designed for large
    2166              :      * numbers of segments (otherwise we might even consider just using one
    2167              :      * DSM segment for each large allocation and for each superblock, and then
    2168              :      * we wouldn't need to use FreePageManager).
    2169              :      *
    2170              :      * We decide on a total segment size first, so that we produce tidy
    2171              :      * power-of-two sized segments.  This is a good property to have if we
    2172              :      * move to huge pages in the future.  Then we work back to the number of
    2173              :      * pages we can fit.
    2174              :      */
    2175         1096 :     total_size = area->control->init_segment_size *
    2176         1096 :         ((size_t) 1 << (new_index / DSA_NUM_SEGMENTS_AT_EACH_SIZE));
    2177         1096 :     total_size = Min(total_size, area->control->max_segment_size);
    2178         1096 :     total_size = Min(total_size,
    2179              :                      area->control->max_total_segment_size -
    2180              :                      area->control->total_segment_size);
    2181              : 
    2182         1096 :     total_pages = total_size / FPM_PAGE_SIZE;
    2183         1096 :     metadata_bytes =
    2184              :         MAXALIGN(sizeof(dsa_segment_header)) +
    2185         1096 :         MAXALIGN(sizeof(FreePageManager)) +
    2186              :         sizeof(dsa_pointer) * total_pages;
    2187              : 
    2188              :     /* Add padding up to next page boundary. */
    2189         1096 :     if (metadata_bytes % FPM_PAGE_SIZE != 0)
    2190         1096 :         metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    2191         1096 :     if (total_size <= metadata_bytes)
    2192            0 :         return NULL;
    2193         1096 :     usable_pages = (total_size - metadata_bytes) / FPM_PAGE_SIZE;
    2194              :     Assert(metadata_bytes + usable_pages * FPM_PAGE_SIZE <= total_size);
    2195              : 
    2196              :     /* See if that is enough... */
    2197         1096 :     if (requested_pages > usable_pages)
    2198              :     {
    2199              :         /*
    2200              :          * We'll make an odd-sized segment, working forward from the requested
    2201              :          * number of pages.
    2202              :          */
    2203            0 :         usable_pages = requested_pages;
    2204            0 :         metadata_bytes =
    2205              :             MAXALIGN(sizeof(dsa_segment_header)) +
    2206            0 :             MAXALIGN(sizeof(FreePageManager)) +
    2207              :             usable_pages * sizeof(dsa_pointer);
    2208              : 
    2209              :         /* Add padding up to next page boundary. */
    2210            0 :         if (metadata_bytes % FPM_PAGE_SIZE != 0)
    2211            0 :             metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    2212            0 :         total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
    2213              : 
    2214              :         /* Is that too large for dsa_pointer's addressing scheme? */
    2215            0 :         if (total_size > DSA_MAX_SEGMENT_SIZE)
    2216            0 :             return NULL;
    2217              : 
    2218              :         /* Would that exceed the limit? */
    2219            0 :         if (total_size > area->control->max_total_segment_size -
    2220            0 :             area->control->total_segment_size)
    2221            0 :             return NULL;
    2222              :     }
    2223              : 
    2224              :     /* Create the segment. */
    2225         1096 :     oldowner = CurrentResourceOwner;
    2226         1096 :     CurrentResourceOwner = area->resowner;
    2227         1096 :     segment = dsm_create(total_size, 0);
    2228         1096 :     CurrentResourceOwner = oldowner;
    2229         1096 :     if (segment == NULL)
    2230            0 :         return NULL;
    2231         1096 :     dsm_pin_segment(segment);
    2232              : 
    2233              :     /* Store the handle in shared memory to be found by index. */
    2234         2192 :     area->control->segment_handles[new_index] =
    2235         1096 :         dsm_segment_handle(segment);
    2236              :     /* Track the highest segment index in the history of the area. */
    2237         1096 :     if (area->control->high_segment_index < new_index)
    2238         1096 :         area->control->high_segment_index = new_index;
    2239              :     /* Track the highest segment index this backend has ever mapped. */
    2240         1096 :     if (area->high_segment_index < new_index)
    2241         1096 :         area->high_segment_index = new_index;
    2242              :     /* Track total size of all segments. */
    2243         1096 :     area->control->total_segment_size += total_size;
    2244              :     Assert(area->control->total_segment_size <=
    2245              :            area->control->max_total_segment_size);
    2246              : 
    2247              :     /* Build a segment map for this segment in this backend. */
    2248         1096 :     segment_map = &area->segment_maps[new_index];
    2249         1096 :     segment_map->segment = segment;
    2250         1096 :     segment_map->mapped_address = dsm_segment_address(segment);
    2251         1096 :     segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
    2252         1096 :     segment_map->fpm = (FreePageManager *)
    2253         1096 :         (segment_map->mapped_address +
    2254              :          MAXALIGN(sizeof(dsa_segment_header)));
    2255         1096 :     segment_map->pagemap = (dsa_pointer *)
    2256         1096 :         (segment_map->mapped_address +
    2257         1096 :          MAXALIGN(sizeof(dsa_segment_header)) +
    2258              :          MAXALIGN(sizeof(FreePageManager)));
    2259              : 
    2260              :     /* Set up the free page map. */
    2261         1096 :     FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
    2262         1096 :     FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
    2263              :                        usable_pages);
    2264              : 
    2265              :     /* Set up the segment header and put it in the appropriate bin. */
    2266         1096 :     segment_map->header->magic =
    2267         1096 :         DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ new_index;
    2268         1096 :     segment_map->header->usable_pages = usable_pages;
    2269         1096 :     segment_map->header->size = total_size;
    2270         1096 :     segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
    2271         1096 :     segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
    2272         1096 :     segment_map->header->next =
    2273         1096 :         area->control->segment_bins[segment_map->header->bin];
    2274         1096 :     segment_map->header->freed = false;
    2275         1096 :     area->control->segment_bins[segment_map->header->bin] = new_index;
    2276         1096 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2277              :     {
    2278              :         dsa_segment_map *next =
    2279            0 :             get_segment_by_index(area, segment_map->header->next);
    2280              : 
    2281              :         Assert(next->header->bin == segment_map->header->bin);
    2282            0 :         next->header->prev = new_index;
    2283              :     }
    2284              : 
    2285         1096 :     return segment_map;
    2286              : }
    2287              : 
    2288              : /*
    2289              :  * Check if any segments have been freed by destroy_superblock, so we can
    2290              :  * detach from them in this backend.  This function is called by
    2291              :  * dsa_get_address and dsa_free to make sure that a dsa_pointer they have
    2292              :  * received can be resolved to the correct segment.
    2293              :  *
    2294              :  * The danger we want to defend against is that there could be an old segment
    2295              :  * mapped into a given slot in this backend, and the dsa_pointer they have
    2296              :  * might refer to some new segment in the same slot.  So those functions must
    2297              :  * be sure to process all instructions to detach from a freed segment that had
    2298              :  * been generated by the time this process received the dsa_pointer, before
    2299              :  * they call get_segment_by_index.
    2300              :  */
    2301              : static void
    2302      8228174 : check_for_freed_segments(dsa_area *area)
    2303              : {
    2304              :     size_t      freed_segment_counter;
    2305              : 
    2306              :     /*
    2307              :      * Any other process that has freed a segment has incremented
    2308              :      * freed_segment_counter while holding an LWLock, and that must precede
    2309              :      * any backend creating a new segment in the same slot while holding an
    2310              :      * LWLock, and that must precede the creation of any dsa_pointer pointing
    2311              :      * into the new segment which might reach us here, and the caller must
    2312              :      * have sent the dsa_pointer to this process using appropriate memory
    2313              :      * synchronization (some kind of locking or atomic primitive or system
    2314              :      * call).  So all we need to do on the reading side is ask for the load of
    2315              :      * freed_segment_counter to follow the caller's load of the dsa_pointer it
    2316              :      * has, and we can be sure to detect any segments that had been freed as
    2317              :      * of the time that the dsa_pointer reached this process.
    2318              :      */
    2319      8228174 :     pg_read_barrier();
    2320      8228174 :     freed_segment_counter = area->control->freed_segment_counter;
    2321      8228174 :     if (unlikely(area->freed_segment_counter != freed_segment_counter))
    2322              :     {
    2323              :         /* Check all currently mapped segments to find what's been freed. */
    2324            0 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    2325            0 :         check_for_freed_segments_locked(area);
    2326            0 :         LWLockRelease(DSA_AREA_LOCK(area));
    2327              :     }
    2328      8228174 : }
    2329              : 
    2330              : /*
    2331              :  * Workhorse for check_for_freed_segments(), and also used directly in path
    2332              :  * where the area lock is already held.  This should be called after acquiring
    2333              :  * the lock but before looking up any segment by index number, to make sure we
    2334              :  * unmap any stale segments that might have previously had the same index as a
    2335              :  * current segment.
    2336              :  */
    2337              : static void
    2338        13965 : check_for_freed_segments_locked(dsa_area *area)
    2339              : {
    2340              :     size_t      freed_segment_counter;
    2341              :     int         i;
    2342              : 
    2343              :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2344        13965 :     freed_segment_counter = area->control->freed_segment_counter;
    2345        13965 :     if (unlikely(area->freed_segment_counter != freed_segment_counter))
    2346              :     {
    2347            0 :         for (i = 0; i <= area->high_segment_index; ++i)
    2348              :         {
    2349            0 :             if (area->segment_maps[i].header != NULL &&
    2350            0 :                 area->segment_maps[i].header->freed)
    2351              :             {
    2352            0 :                 dsm_detach(area->segment_maps[i].segment);
    2353            0 :                 area->segment_maps[i].segment = NULL;
    2354            0 :                 area->segment_maps[i].header = NULL;
    2355            0 :                 area->segment_maps[i].mapped_address = NULL;
    2356              :             }
    2357              :         }
    2358            0 :         area->freed_segment_counter = freed_segment_counter;
    2359              :     }
    2360        13965 : }
    2361              : 
    2362              : /*
    2363              :  * Re-bin segment if it's no longer in the appropriate bin.
    2364              :  */
    2365              : static void
    2366         4435 : rebin_segment(dsa_area *area, dsa_segment_map *segment_map)
    2367              : {
    2368              :     size_t      new_bin;
    2369              :     dsa_segment_index segment_index;
    2370              : 
    2371         4435 :     new_bin = contiguous_pages_to_segment_bin(fpm_largest(segment_map->fpm));
    2372         4435 :     if (segment_map->header->bin == new_bin)
    2373         2019 :         return;
    2374              : 
    2375              :     /* Remove it from its current bin. */
    2376         2416 :     unlink_segment(area, segment_map);
    2377              : 
    2378              :     /* Push it onto the front of its new bin. */
    2379         2416 :     segment_index = get_segment_index(area, segment_map);
    2380         2416 :     segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
    2381         2416 :     segment_map->header->next = area->control->segment_bins[new_bin];
    2382         2416 :     segment_map->header->bin = new_bin;
    2383         2416 :     area->control->segment_bins[new_bin] = segment_index;
    2384         2416 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2385              :     {
    2386              :         dsa_segment_map *next;
    2387              : 
    2388           11 :         next = get_segment_by_index(area, segment_map->header->next);
    2389              :         Assert(next->header->bin == new_bin);
    2390           11 :         next->header->prev = segment_index;
    2391              :     }
    2392              : }
        

Generated by: LCOV version 2.0-1