Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * slab.c
4 : * SLAB allocator definitions.
5 : *
6 : * SLAB is a MemoryContext implementation designed for cases where large
7 : * numbers of equally-sized objects can be allocated and freed efficiently
8 : * with minimal memory wastage and fragmentation.
9 : *
10 : *
11 : * Portions Copyright (c) 2017-2025, PostgreSQL Global Development Group
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/mmgr/slab.c
15 : *
16 : *
17 : * NOTE:
18 : * The constant allocation size allows significant simplification and various
19 : * optimizations over more general purpose allocators. The blocks are carved
20 : * into chunks of exactly the right size, wasting only the space required to
21 : * MAXALIGN the allocated chunks.
22 : *
23 : * Slab can also help reduce memory fragmentation in cases where longer-lived
24 : * chunks remain stored on blocks while most of the other chunks have already
25 : * been pfree'd. We give priority to putting new allocations into the
26 : * "fullest" block. This help avoid having too many sparsely used blocks
27 : * around and allows blocks to more easily become completely unused which
28 : * allows them to be eventually free'd.
29 : *
30 : * We identify the "fullest" block to put new allocations on by using a block
31 : * from the lowest populated element of the context's "blocklist" array.
32 : * This is an array of dlists containing blocks which we partition by the
33 : * number of free chunks which block has. Blocks with fewer free chunks are
34 : * stored in a lower indexed dlist array slot. Full blocks go on the 0th
35 : * element of the blocklist array. So that we don't have to have too many
36 : * elements in the array, each dlist in the array is responsible for a range
37 : * of free chunks. When a chunk is palloc'd or pfree'd we may need to move
38 : * the block onto another dlist if the number of free chunks crosses the
39 : * range boundary that the current list is responsible for. Having just a
40 : * few blocklist elements reduces the number of times we must move the block
41 : * onto another dlist element.
42 : *
43 : * We keep track of free chunks within each block by using a block-level free
44 : * list. We consult this list when we allocate a new chunk in the block.
45 : * The free list is a linked list, the head of which is pointed to with
46 : * SlabBlock's freehead field. Each subsequent list item is stored in the
47 : * free chunk's memory. We ensure chunks are large enough to store this
48 : * address.
49 : *
50 : * When we allocate a new block, technically all chunks are free, however, to
51 : * avoid having to write out the entire block to set the linked list for the
52 : * free chunks for every chunk in the block, we instead store a pointer to
53 : * the next "unused" chunk on the block and keep track of how many of these
54 : * unused chunks there are. When a new block is malloc'd, all chunks are
55 : * unused. The unused pointer starts with the first chunk on the block and
56 : * as chunks are allocated, the unused pointer is incremented. As chunks are
57 : * pfree'd, the unused pointer never goes backwards. The unused pointer can
58 : * be thought of as a high watermark for the maximum number of chunks in the
59 : * block which have been in use concurrently. When a chunk is pfree'd the
60 : * chunk is put onto the head of the free list and the unused pointer is not
61 : * changed. We only consume more unused chunks if we run out of free chunks
62 : * on the free list. This method effectively gives priority to using
63 : * previously used chunks over previously unused chunks, which should perform
64 : * better due to CPU caching effects.
65 : *
66 : *-------------------------------------------------------------------------
67 : */
68 :
69 : #include "postgres.h"
70 :
71 : #include "lib/ilist.h"
72 : #include "utils/memdebug.h"
73 : #include "utils/memutils.h"
74 : #include "utils/memutils_internal.h"
75 : #include "utils/memutils_memorychunk.h"
76 :
77 : #define Slab_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlock))
78 :
79 : #ifdef MEMORY_CONTEXT_CHECKING
80 : /*
81 : * Size of the memory required to store the SlabContext.
82 : * MEMORY_CONTEXT_CHECKING builds need some extra memory for the isChunkFree
83 : * array.
84 : */
85 : #define Slab_CONTEXT_HDRSZ(chunksPerBlock) \
86 : (sizeof(SlabContext) + ((chunksPerBlock) * sizeof(bool)))
87 : #else
88 : #define Slab_CONTEXT_HDRSZ(chunksPerBlock) sizeof(SlabContext)
89 : #endif
90 :
91 : /*
92 : * The number of partitions to divide the blocklist into based their number of
93 : * free chunks. There must be at least 2.
94 : */
95 : #define SLAB_BLOCKLIST_COUNT 3
96 :
97 : /* The maximum number of completely empty blocks to keep around for reuse. */
98 : #define SLAB_MAXIMUM_EMPTY_BLOCKS 10
99 :
100 : /*
101 : * SlabContext is a specialized implementation of MemoryContext.
102 : */
103 : typedef struct SlabContext
104 : {
105 : MemoryContextData header; /* Standard memory-context fields */
106 : /* Allocation parameters for this context: */
107 : uint32 chunkSize; /* the requested (non-aligned) chunk size */
108 : uint32 fullChunkSize; /* chunk size with chunk header and alignment */
109 : uint32 blockSize; /* the size to make each block of chunks */
110 : int32 chunksPerBlock; /* number of chunks that fit in 1 block */
111 : int32 curBlocklistIndex; /* index into the blocklist[] element
112 : * containing the fullest, blocks */
113 : #ifdef MEMORY_CONTEXT_CHECKING
114 : bool *isChunkFree; /* array to mark free chunks in a block during
115 : * SlabCheck */
116 : #endif
117 :
118 : int32 blocklist_shift; /* number of bits to shift the nfree count
119 : * by to get the index into blocklist[] */
120 : dclist_head emptyblocks; /* empty blocks to use up first instead of
121 : * mallocing new blocks */
122 :
123 : /*
124 : * Blocks with free space, grouped by the number of free chunks they
125 : * contain. Completely full blocks are stored in the 0th element.
126 : * Completely empty blocks are stored in emptyblocks or free'd if we have
127 : * enough empty blocks already.
128 : */
129 : dlist_head blocklist[SLAB_BLOCKLIST_COUNT];
130 : } SlabContext;
131 :
132 : /*
133 : * SlabBlock
134 : * Structure of a single slab block.
135 : *
136 : * slab: pointer back to the owning MemoryContext
137 : * nfree: number of chunks on the block which are unallocated
138 : * nunused: number of chunks on the block unallocated and not on the block's
139 : * freelist.
140 : * freehead: linked-list header storing a pointer to the first free chunk on
141 : * the block. Subsequent pointers are stored in the chunk's memory. NULL
142 : * indicates the end of the list.
143 : * unused: pointer to the next chunk which has yet to be used.
144 : * node: doubly-linked list node for the context's blocklist
145 : */
146 : typedef struct SlabBlock
147 : {
148 : SlabContext *slab; /* owning context */
149 : int32 nfree; /* number of chunks on free + unused chunks */
150 : int32 nunused; /* number of unused chunks */
151 : MemoryChunk *freehead; /* pointer to the first free chunk */
152 : MemoryChunk *unused; /* pointer to the next unused chunk */
153 : dlist_node node; /* doubly-linked list for blocklist[] */
154 : } SlabBlock;
155 :
156 :
157 : #define Slab_CHUNKHDRSZ sizeof(MemoryChunk)
158 : #define SlabChunkGetPointer(chk) \
159 : ((void *) (((char *) (chk)) + sizeof(MemoryChunk)))
160 :
161 : /*
162 : * SlabBlockGetChunk
163 : * Obtain a pointer to the nth (0-based) chunk in the block
164 : */
165 : #define SlabBlockGetChunk(slab, block, n) \
166 : ((MemoryChunk *) ((char *) (block) + Slab_BLOCKHDRSZ \
167 : + ((n) * (slab)->fullChunkSize)))
168 :
169 : #if defined(MEMORY_CONTEXT_CHECKING) || defined(USE_ASSERT_CHECKING)
170 :
171 : /*
172 : * SlabChunkIndex
173 : * Get the 0-based index of how many chunks into the block the given
174 : * chunk is.
175 : */
176 : #define SlabChunkIndex(slab, block, chunk) \
177 : (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) / \
178 : (slab)->fullChunkSize)
179 :
180 : /*
181 : * SlabChunkMod
182 : * A MemoryChunk should always be at an address which is a multiple of
183 : * fullChunkSize starting from the 0th chunk position. This will return
184 : * non-zero if it's not.
185 : */
186 : #define SlabChunkMod(slab, block, chunk) \
187 : (((char *) (chunk) - (char *) SlabBlockGetChunk(slab, block, 0)) % \
188 : (slab)->fullChunkSize)
189 :
190 : #endif
191 :
192 : /*
193 : * SlabIsValid
194 : * True iff set is a valid slab allocation set.
195 : */
196 : #define SlabIsValid(set) (PointerIsValid(set) && IsA(set, SlabContext))
197 :
198 : /*
199 : * SlabBlockIsValid
200 : * True iff block is a valid block of slab allocation set.
201 : */
202 : #define SlabBlockIsValid(block) \
203 : (PointerIsValid(block) && SlabIsValid((block)->slab))
204 :
205 : /*
206 : * SlabBlocklistIndex
207 : * Determine the blocklist index that a block should be in for the given
208 : * number of free chunks.
209 : */
210 : static inline int32
211 11803590 : SlabBlocklistIndex(SlabContext *slab, int nfree)
212 : {
213 : int32 index;
214 11803590 : int32 blocklist_shift = slab->blocklist_shift;
215 :
216 : Assert(nfree >= 0 && nfree <= slab->chunksPerBlock);
217 :
218 : /*
219 : * Determine the blocklist index based on the number of free chunks. We
220 : * must ensure that 0 free chunks is dedicated to index 0. Everything
221 : * else must be >= 1 and < SLAB_BLOCKLIST_COUNT.
222 : *
223 : * To make this as efficient as possible, we exploit some two's complement
224 : * arithmetic where we reverse the sign before bit shifting. This results
225 : * in an nfree of 0 using index 0 and anything non-zero staying non-zero.
226 : * This is exploiting 0 and -0 being the same in two's complement. When
227 : * we're done, we just need to flip the sign back over again for a
228 : * positive index.
229 : */
230 11803590 : index = -((-nfree) >> blocklist_shift);
231 :
232 : if (nfree == 0)
233 : Assert(index == 0);
234 : else
235 : Assert(index >= 1 && index < SLAB_BLOCKLIST_COUNT);
236 :
237 11803590 : return index;
238 : }
239 :
240 : /*
241 : * SlabFindNextBlockListIndex
242 : * Search blocklist for blocks which have free chunks and return the
243 : * index of the blocklist found containing at least 1 block with free
244 : * chunks. If no block can be found we return 0.
245 : *
246 : * Note: We give priority to fuller blocks so that these are filled before
247 : * emptier blocks. This is done to increase the chances that mostly-empty
248 : * blocks will eventually become completely empty so they can be free'd.
249 : */
250 : static int32
251 189902 : SlabFindNextBlockListIndex(SlabContext *slab)
252 : {
253 : /* start at 1 as blocklist[0] is for full blocks. */
254 313788 : for (int i = 1; i < SLAB_BLOCKLIST_COUNT; i++)
255 : {
256 : /* return the first found non-empty index */
257 257420 : if (!dlist_is_empty(&slab->blocklist[i]))
258 133534 : return i;
259 : }
260 :
261 : /* no blocks with free space */
262 56368 : return 0;
263 : }
264 :
265 : /*
266 : * SlabGetNextFreeChunk
267 : * Return the next free chunk in block and update the block to account
268 : * for the returned chunk now being used.
269 : */
270 : static inline MemoryChunk *
271 3889906 : SlabGetNextFreeChunk(SlabContext *slab, SlabBlock *block)
272 : {
273 : MemoryChunk *chunk;
274 :
275 : Assert(block->nfree > 0);
276 :
277 3889906 : if (block->freehead != NULL)
278 : {
279 3500012 : chunk = block->freehead;
280 :
281 : /*
282 : * Pop the chunk from the linked list of free chunks. The pointer to
283 : * the next free chunk is stored in the chunk itself.
284 : */
285 : VALGRIND_MAKE_MEM_DEFINED(SlabChunkGetPointer(chunk), sizeof(MemoryChunk *));
286 3500012 : block->freehead = *(MemoryChunk **) SlabChunkGetPointer(chunk);
287 :
288 : /* check nothing stomped on the free chunk's memory */
289 : Assert(block->freehead == NULL ||
290 : (block->freehead >= SlabBlockGetChunk(slab, block, 0) &&
291 : block->freehead <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) &&
292 : SlabChunkMod(slab, block, block->freehead) == 0));
293 : }
294 : else
295 : {
296 : Assert(block->nunused > 0);
297 :
298 389894 : chunk = block->unused;
299 389894 : block->unused = (MemoryChunk *) (((char *) block->unused) + slab->fullChunkSize);
300 389894 : block->nunused--;
301 : }
302 :
303 3889906 : block->nfree--;
304 :
305 3889906 : return chunk;
306 : }
307 :
308 : /*
309 : * SlabContextCreate
310 : * Create a new Slab context.
311 : *
312 : * parent: parent context, or NULL if top-level context
313 : * name: name of context (must be statically allocated)
314 : * blockSize: allocation block size
315 : * chunkSize: allocation chunk size
316 : *
317 : * The Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1) may not exceed
318 : * MEMORYCHUNK_MAX_VALUE.
319 : * 'blockSize' may not exceed MEMORYCHUNK_MAX_BLOCKOFFSET.
320 : */
321 : MemoryContext
322 581670 : SlabContextCreate(MemoryContext parent,
323 : const char *name,
324 : Size blockSize,
325 : Size chunkSize)
326 : {
327 : int chunksPerBlock;
328 : Size fullChunkSize;
329 : SlabContext *slab;
330 : int i;
331 :
332 : /* ensure MemoryChunk's size is properly maxaligned */
333 : StaticAssertDecl(Slab_CHUNKHDRSZ == MAXALIGN(Slab_CHUNKHDRSZ),
334 : "sizeof(MemoryChunk) is not maxaligned");
335 : Assert(blockSize <= MEMORYCHUNK_MAX_BLOCKOFFSET);
336 :
337 : /*
338 : * Ensure there's enough space to store the pointer to the next free chunk
339 : * in the memory of the (otherwise) unused allocation.
340 : */
341 581670 : if (chunkSize < sizeof(MemoryChunk *))
342 0 : chunkSize = sizeof(MemoryChunk *);
343 :
344 : /* length of the maxaligned chunk including the chunk header */
345 : #ifdef MEMORY_CONTEXT_CHECKING
346 : /* ensure there's always space for the sentinel byte */
347 : fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize + 1);
348 : #else
349 581670 : fullChunkSize = Slab_CHUNKHDRSZ + MAXALIGN(chunkSize);
350 : #endif
351 :
352 : Assert(fullChunkSize <= MEMORYCHUNK_MAX_VALUE);
353 :
354 : /* compute the number of chunks that will fit on each block */
355 581670 : chunksPerBlock = (blockSize - Slab_BLOCKHDRSZ) / fullChunkSize;
356 :
357 : /* Make sure the block can store at least one chunk. */
358 581670 : if (chunksPerBlock == 0)
359 0 : elog(ERROR, "block size %zu for slab is too small for %zu-byte chunks",
360 : blockSize, chunkSize);
361 :
362 :
363 :
364 581670 : slab = (SlabContext *) malloc(Slab_CONTEXT_HDRSZ(chunksPerBlock));
365 581670 : if (slab == NULL)
366 : {
367 0 : MemoryContextStats(TopMemoryContext);
368 0 : ereport(ERROR,
369 : (errcode(ERRCODE_OUT_OF_MEMORY),
370 : errmsg("out of memory"),
371 : errdetail("Failed while creating memory context \"%s\".",
372 : name)));
373 : }
374 :
375 : /*
376 : * Avoid writing code that can fail between here and MemoryContextCreate;
377 : * we'd leak the header if we ereport in this stretch.
378 : */
379 :
380 : /* Fill in SlabContext-specific header fields */
381 581670 : slab->chunkSize = (uint32) chunkSize;
382 581670 : slab->fullChunkSize = (uint32) fullChunkSize;
383 581670 : slab->blockSize = (uint32) blockSize;
384 581670 : slab->chunksPerBlock = chunksPerBlock;
385 581670 : slab->curBlocklistIndex = 0;
386 :
387 : /*
388 : * Compute a shift that guarantees that shifting chunksPerBlock with it is
389 : * < SLAB_BLOCKLIST_COUNT - 1. The reason that we subtract 1 from
390 : * SLAB_BLOCKLIST_COUNT in this calculation is that we reserve the 0th
391 : * blocklist element for blocks which have no free chunks.
392 : *
393 : * We calculate the number of bits to shift by rather than a divisor to
394 : * divide by as performing division each time we need to find the
395 : * blocklist index would be much slower.
396 : */
397 581670 : slab->blocklist_shift = 0;
398 3721082 : while ((slab->chunksPerBlock >> slab->blocklist_shift) >= (SLAB_BLOCKLIST_COUNT - 1))
399 3139412 : slab->blocklist_shift++;
400 :
401 : /* initialize the list to store empty blocks to be reused */
402 581670 : dclist_init(&slab->emptyblocks);
403 :
404 : /* initialize each blocklist slot */
405 2326680 : for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
406 1745010 : dlist_init(&slab->blocklist[i]);
407 :
408 : #ifdef MEMORY_CONTEXT_CHECKING
409 : /* set the isChunkFree pointer right after the end of the context */
410 : slab->isChunkFree = (bool *) ((char *) slab + sizeof(SlabContext));
411 : #endif
412 :
413 : /* Finally, do the type-independent part of context creation */
414 581670 : MemoryContextCreate((MemoryContext) slab,
415 : T_SlabContext,
416 : MCTX_SLAB_ID,
417 : parent,
418 : name);
419 :
420 581670 : return (MemoryContext) slab;
421 : }
422 :
423 : /*
424 : * SlabReset
425 : * Frees all memory which is allocated in the given set.
426 : *
427 : * The code simply frees all the blocks in the context - we don't keep any
428 : * keeper blocks or anything like that.
429 : */
430 : void
431 580998 : SlabReset(MemoryContext context)
432 : {
433 580998 : SlabContext *slab = (SlabContext *) context;
434 : dlist_mutable_iter miter;
435 : int i;
436 :
437 : Assert(SlabIsValid(slab));
438 :
439 : #ifdef MEMORY_CONTEXT_CHECKING
440 : /* Check for corruption and leaks before freeing */
441 : SlabCheck(context);
442 : #endif
443 :
444 : /* release any retained empty blocks */
445 583596 : dclist_foreach_modify(miter, &slab->emptyblocks)
446 : {
447 2598 : SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
448 :
449 2598 : dclist_delete_from(&slab->emptyblocks, miter.cur);
450 :
451 : #ifdef CLOBBER_FREED_MEMORY
452 : wipe_mem(block, slab->blockSize);
453 : #endif
454 2598 : free(block);
455 2598 : context->mem_allocated -= slab->blockSize;
456 : }
457 :
458 : /* walk over blocklist and free the blocks */
459 2323992 : for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
460 : {
461 1858682 : dlist_foreach_modify(miter, &slab->blocklist[i])
462 : {
463 115688 : SlabBlock *block = dlist_container(SlabBlock, node, miter.cur);
464 :
465 115688 : dlist_delete(miter.cur);
466 :
467 : #ifdef CLOBBER_FREED_MEMORY
468 : wipe_mem(block, slab->blockSize);
469 : #endif
470 115688 : free(block);
471 115688 : context->mem_allocated -= slab->blockSize;
472 : }
473 : }
474 :
475 580998 : slab->curBlocklistIndex = 0;
476 :
477 : Assert(context->mem_allocated == 0);
478 580998 : }
479 :
480 : /*
481 : * SlabDelete
482 : * Free all memory which is allocated in the given context.
483 : */
484 : void
485 580998 : SlabDelete(MemoryContext context)
486 : {
487 : /* Reset to release all the SlabBlocks */
488 580998 : SlabReset(context);
489 : /* And free the context header */
490 580998 : free(context);
491 580998 : }
492 :
493 : /*
494 : * Small helper for allocating a new chunk from a chunk, to avoid duplicating
495 : * the code between SlabAlloc() and SlabAllocFromNewBlock().
496 : */
497 : static inline void *
498 4011982 : SlabAllocSetupNewChunk(MemoryContext context, SlabBlock *block,
499 : MemoryChunk *chunk, Size size)
500 : {
501 4011982 : SlabContext *slab = (SlabContext *) context;
502 :
503 : /*
504 : * Check that the chunk pointer is actually somewhere on the block and is
505 : * aligned as expected.
506 : */
507 : Assert(chunk >= SlabBlockGetChunk(slab, block, 0));
508 : Assert(chunk <= SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1));
509 : Assert(SlabChunkMod(slab, block, chunk) == 0);
510 :
511 : /* Prepare to initialize the chunk header. */
512 : VALGRIND_MAKE_MEM_UNDEFINED(chunk, Slab_CHUNKHDRSZ);
513 :
514 4011982 : MemoryChunkSetHdrMask(chunk, block, MAXALIGN(slab->chunkSize), MCTX_SLAB_ID);
515 :
516 : #ifdef MEMORY_CONTEXT_CHECKING
517 : /* slab mark to catch clobber of "unused" space */
518 : Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
519 : set_sentinel(MemoryChunkGetPointer(chunk), size);
520 : VALGRIND_MAKE_MEM_NOACCESS(((char *) chunk) + Slab_CHUNKHDRSZ +
521 : slab->chunkSize,
522 : slab->fullChunkSize -
523 : (slab->chunkSize + Slab_CHUNKHDRSZ));
524 : #endif
525 :
526 : #ifdef RANDOMIZE_ALLOCATED_MEMORY
527 : /* fill the allocated space with junk */
528 : randomize_mem((char *) MemoryChunkGetPointer(chunk), size);
529 : #endif
530 :
531 : /* Disallow access to the chunk header. */
532 : VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
533 :
534 4011982 : return MemoryChunkGetPointer(chunk);
535 : }
536 :
537 : pg_noinline
538 : static void *
539 172022 : SlabAllocFromNewBlock(MemoryContext context, Size size, int flags)
540 : {
541 172022 : SlabContext *slab = (SlabContext *) context;
542 : SlabBlock *block;
543 : MemoryChunk *chunk;
544 : dlist_head *blocklist;
545 : int blocklist_idx;
546 :
547 : /* to save allocating a new one, first check the empty blocks list */
548 172022 : if (dclist_count(&slab->emptyblocks) > 0)
549 : {
550 49946 : dlist_node *node = dclist_pop_head_node(&slab->emptyblocks);
551 :
552 49946 : block = dlist_container(SlabBlock, node, node);
553 :
554 : /*
555 : * SlabFree() should have left this block in a valid state with all
556 : * chunks free. Ensure that's the case.
557 : */
558 : Assert(block->nfree == slab->chunksPerBlock);
559 :
560 : /* fetch the next chunk from this block */
561 49946 : chunk = SlabGetNextFreeChunk(slab, block);
562 : }
563 : else
564 : {
565 122076 : block = (SlabBlock *) malloc(slab->blockSize);
566 :
567 122076 : if (unlikely(block == NULL))
568 0 : return MemoryContextAllocationFailure(context, size, flags);
569 :
570 122076 : block->slab = slab;
571 122076 : context->mem_allocated += slab->blockSize;
572 :
573 : /* use the first chunk in the new block */
574 122076 : chunk = SlabBlockGetChunk(slab, block, 0);
575 :
576 122076 : block->nfree = slab->chunksPerBlock - 1;
577 122076 : block->unused = SlabBlockGetChunk(slab, block, 1);
578 122076 : block->freehead = NULL;
579 122076 : block->nunused = slab->chunksPerBlock - 1;
580 : }
581 :
582 : /* find the blocklist element for storing blocks with 1 used chunk */
583 172022 : blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
584 172022 : blocklist = &slab->blocklist[blocklist_idx];
585 :
586 : /* this better be empty. We just added a block thinking it was */
587 : Assert(dlist_is_empty(blocklist));
588 :
589 172022 : dlist_push_head(blocklist, &block->node);
590 :
591 172022 : slab->curBlocklistIndex = blocklist_idx;
592 :
593 172022 : return SlabAllocSetupNewChunk(context, block, chunk, size);
594 : }
595 :
596 : /*
597 : * SlabAllocInvalidSize
598 : * Handle raising an ERROR for an invalid size request. We don't do this
599 : * in slab alloc as calling the elog functions would force the compiler
600 : * to setup the stack frame in SlabAlloc. For performance reasons, we
601 : * want to avoid that.
602 : */
603 : pg_noinline
604 : static void
605 : pg_attribute_noreturn()
606 0 : SlabAllocInvalidSize(MemoryContext context, Size size)
607 : {
608 0 : SlabContext *slab = (SlabContext *) context;
609 :
610 0 : elog(ERROR, "unexpected alloc chunk size %zu (expected %u)", size,
611 : slab->chunkSize);
612 : }
613 :
614 : /*
615 : * SlabAlloc
616 : * Returns a pointer to a newly allocated memory chunk or raises an ERROR
617 : * on allocation failure, or returns NULL when flags contains
618 : * MCXT_ALLOC_NO_OOM. 'size' must be the same size as was specified
619 : * during SlabContextCreate().
620 : *
621 : * This function should only contain the most common code paths. Everything
622 : * else should be in pg_noinline helper functions, thus avoiding the overhead
623 : * of creating a stack frame for the common cases. Allocating memory is often
624 : * a bottleneck in many workloads, so avoiding stack frame setup is
625 : * worthwhile. Helper functions should always directly return the newly
626 : * allocated memory so that we can just return that address directly as a tail
627 : * call.
628 : */
629 : void *
630 4011982 : SlabAlloc(MemoryContext context, Size size, int flags)
631 : {
632 4011982 : SlabContext *slab = (SlabContext *) context;
633 : SlabBlock *block;
634 : MemoryChunk *chunk;
635 :
636 : Assert(SlabIsValid(slab));
637 :
638 : /* sanity check that this is pointing to a valid blocklist */
639 : Assert(slab->curBlocklistIndex >= 0);
640 : Assert(slab->curBlocklistIndex <= SlabBlocklistIndex(slab, slab->chunksPerBlock));
641 :
642 : /*
643 : * Make sure we only allow correct request size. This doubles as the
644 : * MemoryContextCheckSize check.
645 : */
646 4011982 : if (unlikely(size != slab->chunkSize))
647 0 : SlabAllocInvalidSize(context, size);
648 :
649 4011982 : if (unlikely(slab->curBlocklistIndex == 0))
650 : {
651 : /*
652 : * Handle the case when there are no partially filled blocks
653 : * available. This happens either when the last allocation took the
654 : * last chunk in the block, or when SlabFree() free'd the final block.
655 : */
656 172022 : return SlabAllocFromNewBlock(context, size, flags);
657 : }
658 : else
659 : {
660 3839960 : dlist_head *blocklist = &slab->blocklist[slab->curBlocklistIndex];
661 : int new_blocklist_idx;
662 :
663 : Assert(!dlist_is_empty(blocklist));
664 :
665 : /* grab the block from the blocklist */
666 3839960 : block = dlist_head_element(SlabBlock, node, blocklist);
667 :
668 : /* make sure we actually got a valid block, with matching nfree */
669 : Assert(block != NULL);
670 : Assert(slab->curBlocklistIndex == SlabBlocklistIndex(slab, block->nfree));
671 : Assert(block->nfree > 0);
672 :
673 : /* fetch the next chunk from this block */
674 3839960 : chunk = SlabGetNextFreeChunk(slab, block);
675 :
676 : /* get the new blocklist index based on the new free chunk count */
677 3839960 : new_blocklist_idx = SlabBlocklistIndex(slab, block->nfree);
678 :
679 : /*
680 : * Handle the case where the blocklist index changes. This also deals
681 : * with blocks becoming full as only full blocks go at index 0.
682 : */
683 3839960 : if (unlikely(slab->curBlocklistIndex != new_blocklist_idx))
684 : {
685 88280 : dlist_delete_from(blocklist, &block->node);
686 88280 : dlist_push_head(&slab->blocklist[new_blocklist_idx], &block->node);
687 :
688 88280 : if (dlist_is_empty(blocklist))
689 83610 : slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
690 : }
691 : }
692 :
693 3839960 : return SlabAllocSetupNewChunk(context, block, chunk, size);
694 : }
695 :
696 : /*
697 : * SlabFree
698 : * Frees allocated memory; memory is removed from the slab.
699 : */
700 : void
701 3895804 : SlabFree(void *pointer)
702 : {
703 3895804 : MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
704 : SlabBlock *block;
705 : SlabContext *slab;
706 : int curBlocklistIdx;
707 : int newBlocklistIdx;
708 :
709 : /* Allow access to the chunk header. */
710 : VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
711 :
712 3895804 : block = MemoryChunkGetBlock(chunk);
713 :
714 : /*
715 : * For speed reasons we just Assert that the referenced block is good.
716 : * Future field experience may show that this Assert had better become a
717 : * regular runtime test-and-elog check.
718 : */
719 : Assert(SlabBlockIsValid(block));
720 3895804 : slab = block->slab;
721 :
722 : #ifdef MEMORY_CONTEXT_CHECKING
723 : /* Test for someone scribbling on unused space in chunk */
724 : Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
725 : if (!sentinel_ok(pointer, slab->chunkSize))
726 : elog(WARNING, "detected write past chunk end in %s %p",
727 : slab->header.name, chunk);
728 : #endif
729 :
730 : /* push this chunk onto the head of the block's free list */
731 3895804 : *(MemoryChunk **) pointer = block->freehead;
732 3895804 : block->freehead = chunk;
733 :
734 3895804 : block->nfree++;
735 :
736 : Assert(block->nfree > 0);
737 : Assert(block->nfree <= slab->chunksPerBlock);
738 :
739 : #ifdef CLOBBER_FREED_MEMORY
740 : /* don't wipe the free list MemoryChunk pointer stored in the chunk */
741 : wipe_mem((char *) pointer + sizeof(MemoryChunk *),
742 : slab->chunkSize - sizeof(MemoryChunk *));
743 : #endif
744 :
745 3895804 : curBlocklistIdx = SlabBlocklistIndex(slab, block->nfree - 1);
746 3895804 : newBlocklistIdx = SlabBlocklistIndex(slab, block->nfree);
747 :
748 : /*
749 : * Check if the block needs to be moved to another element on the
750 : * blocklist based on it now having 1 more free chunk.
751 : */
752 3895804 : if (unlikely(curBlocklistIdx != newBlocklistIdx))
753 : {
754 : /* do the move */
755 88274 : dlist_delete_from(&slab->blocklist[curBlocklistIdx], &block->node);
756 88274 : dlist_push_head(&slab->blocklist[newBlocklistIdx], &block->node);
757 :
758 : /*
759 : * The blocklist[curBlocklistIdx] may now be empty or we may now be
760 : * able to use a lower-element blocklist. We'll need to redetermine
761 : * what the slab->curBlocklistIndex is if the current blocklist was
762 : * changed or if a lower element one was changed. We must ensure we
763 : * use the list with the fullest block(s).
764 : */
765 88274 : if (slab->curBlocklistIndex >= curBlocklistIdx)
766 : {
767 88274 : slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
768 :
769 : /*
770 : * We know there must be a block with at least 1 unused chunk as
771 : * we just pfree'd one. Ensure curBlocklistIndex reflects this.
772 : */
773 : Assert(slab->curBlocklistIndex > 0);
774 : }
775 : }
776 :
777 : /* Handle when a block becomes completely empty */
778 3895804 : if (unlikely(block->nfree == slab->chunksPerBlock))
779 : {
780 : /* remove the block */
781 56310 : dlist_delete_from(&slab->blocklist[newBlocklistIdx], &block->node);
782 :
783 : /*
784 : * To avoid thrashing malloc/free, we keep a list of empty blocks that
785 : * we can reuse again instead of having to malloc a new one.
786 : */
787 56310 : if (dclist_count(&slab->emptyblocks) < SLAB_MAXIMUM_EMPTY_BLOCKS)
788 53318 : dclist_push_head(&slab->emptyblocks, &block->node);
789 : else
790 : {
791 : /*
792 : * When we have enough empty blocks stored already, we actually
793 : * free the block.
794 : */
795 : #ifdef CLOBBER_FREED_MEMORY
796 : wipe_mem(block, slab->blockSize);
797 : #endif
798 2992 : free(block);
799 2992 : slab->header.mem_allocated -= slab->blockSize;
800 : }
801 :
802 : /*
803 : * Check if we need to reset the blocklist index. This is required
804 : * when the blocklist this block is on has become completely empty.
805 : */
806 79970 : if (slab->curBlocklistIndex == newBlocklistIdx &&
807 23660 : dlist_is_empty(&slab->blocklist[newBlocklistIdx]))
808 18018 : slab->curBlocklistIndex = SlabFindNextBlockListIndex(slab);
809 : }
810 3895804 : }
811 :
812 : /*
813 : * SlabRealloc
814 : * Change the allocated size of a chunk.
815 : *
816 : * As Slab is designed for allocating equally-sized chunks of memory, it can't
817 : * do an actual chunk size change. We try to be gentle and allow calls with
818 : * exactly the same size, as in that case we can simply return the same
819 : * chunk. When the size differs, we throw an error.
820 : *
821 : * We could also allow requests with size < chunkSize. That however seems
822 : * rather pointless - Slab is meant for chunks of constant size, and moreover
823 : * realloc is usually used to enlarge the chunk.
824 : */
825 : void *
826 0 : SlabRealloc(void *pointer, Size size, int flags)
827 : {
828 0 : MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
829 : SlabBlock *block;
830 : SlabContext *slab;
831 :
832 : /* Allow access to the chunk header. */
833 : VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
834 :
835 0 : block = MemoryChunkGetBlock(chunk);
836 :
837 : /* Disallow access to the chunk header. */
838 : VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
839 :
840 : /*
841 : * Try to verify that we have a sane block pointer: the block header
842 : * should reference a slab context. (We use a test-and-elog, not just
843 : * Assert, because it seems highly likely that we're here in error in the
844 : * first place.)
845 : */
846 0 : if (!SlabBlockIsValid(block))
847 0 : elog(ERROR, "could not find block containing chunk %p", chunk);
848 0 : slab = block->slab;
849 :
850 : /* can't do actual realloc with slab, but let's try to be gentle */
851 0 : if (size == slab->chunkSize)
852 0 : return pointer;
853 :
854 0 : elog(ERROR, "slab allocator does not support realloc()");
855 : return NULL; /* keep compiler quiet */
856 : }
857 :
858 : /*
859 : * SlabGetChunkContext
860 : * Return the MemoryContext that 'pointer' belongs to.
861 : */
862 : MemoryContext
863 0 : SlabGetChunkContext(void *pointer)
864 : {
865 0 : MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
866 : SlabBlock *block;
867 :
868 : /* Allow access to the chunk header. */
869 : VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
870 :
871 0 : block = MemoryChunkGetBlock(chunk);
872 :
873 : /* Disallow access to the chunk header. */
874 : VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
875 :
876 : Assert(SlabBlockIsValid(block));
877 :
878 0 : return &block->slab->header;
879 : }
880 :
881 : /*
882 : * SlabGetChunkSpace
883 : * Given a currently-allocated chunk, determine the total space
884 : * it occupies (including all memory-allocation overhead).
885 : */
886 : Size
887 0 : SlabGetChunkSpace(void *pointer)
888 : {
889 0 : MemoryChunk *chunk = PointerGetMemoryChunk(pointer);
890 : SlabBlock *block;
891 : SlabContext *slab;
892 :
893 : /* Allow access to the chunk header. */
894 : VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
895 :
896 0 : block = MemoryChunkGetBlock(chunk);
897 :
898 : /* Disallow access to the chunk header. */
899 : VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
900 :
901 : Assert(SlabBlockIsValid(block));
902 0 : slab = block->slab;
903 :
904 0 : return slab->fullChunkSize;
905 : }
906 :
907 : /*
908 : * SlabIsEmpty
909 : * Is the slab empty of any allocated space?
910 : */
911 : bool
912 0 : SlabIsEmpty(MemoryContext context)
913 : {
914 : Assert(SlabIsValid((SlabContext *) context));
915 :
916 0 : return (context->mem_allocated == 0);
917 : }
918 :
919 : /*
920 : * SlabStats
921 : * Compute stats about memory consumption of a Slab context.
922 : *
923 : * printfunc: if not NULL, pass a human-readable stats string to this.
924 : * passthru: pass this pointer through to printfunc.
925 : * totals: if not NULL, add stats about this context into *totals.
926 : * print_to_stderr: print stats to stderr if true, elog otherwise.
927 : */
928 : void
929 0 : SlabStats(MemoryContext context,
930 : MemoryStatsPrintFunc printfunc, void *passthru,
931 : MemoryContextCounters *totals,
932 : bool print_to_stderr)
933 : {
934 0 : SlabContext *slab = (SlabContext *) context;
935 0 : Size nblocks = 0;
936 0 : Size freechunks = 0;
937 : Size totalspace;
938 0 : Size freespace = 0;
939 : int i;
940 :
941 : Assert(SlabIsValid(slab));
942 :
943 : /* Include context header in totalspace */
944 0 : totalspace = Slab_CONTEXT_HDRSZ(slab->chunksPerBlock);
945 :
946 : /* Add the space consumed by blocks in the emptyblocks list */
947 0 : totalspace += dclist_count(&slab->emptyblocks) * slab->blockSize;
948 :
949 0 : for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
950 : {
951 : dlist_iter iter;
952 :
953 0 : dlist_foreach(iter, &slab->blocklist[i])
954 : {
955 0 : SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
956 :
957 0 : nblocks++;
958 0 : totalspace += slab->blockSize;
959 0 : freespace += slab->fullChunkSize * block->nfree;
960 0 : freechunks += block->nfree;
961 : }
962 : }
963 :
964 0 : if (printfunc)
965 : {
966 : char stats_string[200];
967 :
968 : /* XXX should we include free chunks on empty blocks? */
969 0 : snprintf(stats_string, sizeof(stats_string),
970 : "%zu total in %zu blocks; %u empty blocks; %zu free (%zu chunks); %zu used",
971 0 : totalspace, nblocks, dclist_count(&slab->emptyblocks),
972 : freespace, freechunks, totalspace - freespace);
973 0 : printfunc(context, passthru, stats_string, print_to_stderr);
974 : }
975 :
976 0 : if (totals)
977 : {
978 0 : totals->nblocks += nblocks;
979 0 : totals->freechunks += freechunks;
980 0 : totals->totalspace += totalspace;
981 0 : totals->freespace += freespace;
982 : }
983 0 : }
984 :
985 :
986 : #ifdef MEMORY_CONTEXT_CHECKING
987 :
988 : /*
989 : * SlabCheck
990 : * Walk through all blocks looking for inconsistencies.
991 : *
992 : * NOTE: report errors as WARNING, *not* ERROR or FATAL. Otherwise you'll
993 : * find yourself in an infinite loop when trouble occurs, because this
994 : * routine will be entered again when elog cleanup tries to release memory!
995 : */
996 : void
997 : SlabCheck(MemoryContext context)
998 : {
999 : SlabContext *slab = (SlabContext *) context;
1000 : int i;
1001 : int nblocks = 0;
1002 : const char *name = slab->header.name;
1003 : dlist_iter iter;
1004 :
1005 : Assert(SlabIsValid(slab));
1006 : Assert(slab->chunksPerBlock > 0);
1007 :
1008 : /*
1009 : * Have a look at the empty blocks. These should have all their chunks
1010 : * marked as free. Ensure that's the case.
1011 : */
1012 : dclist_foreach(iter, &slab->emptyblocks)
1013 : {
1014 : SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
1015 :
1016 : if (block->nfree != slab->chunksPerBlock)
1017 : elog(WARNING, "problem in slab %s: empty block %p should have %d free chunks but has %d chunks free",
1018 : name, block, slab->chunksPerBlock, block->nfree);
1019 : }
1020 :
1021 : /* walk the non-empty block lists */
1022 : for (i = 0; i < SLAB_BLOCKLIST_COUNT; i++)
1023 : {
1024 : int j,
1025 : nfree;
1026 :
1027 : /* walk all blocks on this blocklist */
1028 : dlist_foreach(iter, &slab->blocklist[i])
1029 : {
1030 : SlabBlock *block = dlist_container(SlabBlock, node, iter.cur);
1031 : MemoryChunk *cur_chunk;
1032 :
1033 : /*
1034 : * Make sure the number of free chunks (in the block header)
1035 : * matches the position in the blocklist.
1036 : */
1037 : if (SlabBlocklistIndex(slab, block->nfree) != i)
1038 : elog(WARNING, "problem in slab %s: block %p is on blocklist %d but should be on blocklist %d",
1039 : name, block, i, SlabBlocklistIndex(slab, block->nfree));
1040 :
1041 : /* make sure the block is not empty */
1042 : if (block->nfree >= slab->chunksPerBlock)
1043 : elog(WARNING, "problem in slab %s: empty block %p incorrectly stored on blocklist element %d",
1044 : name, block, i);
1045 :
1046 : /* make sure the slab pointer correctly points to this context */
1047 : if (block->slab != slab)
1048 : elog(WARNING, "problem in slab %s: bogus slab link in block %p",
1049 : name, block);
1050 :
1051 : /* reset the array of free chunks for this block */
1052 : memset(slab->isChunkFree, 0, (slab->chunksPerBlock * sizeof(bool)));
1053 : nfree = 0;
1054 :
1055 : /* walk through the block's free list chunks */
1056 : cur_chunk = block->freehead;
1057 : while (cur_chunk != NULL)
1058 : {
1059 : int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
1060 :
1061 : /*
1062 : * Ensure the free list link points to something on the block
1063 : * at an address aligned according to the full chunk size.
1064 : */
1065 : if (cur_chunk < SlabBlockGetChunk(slab, block, 0) ||
1066 : cur_chunk > SlabBlockGetChunk(slab, block, slab->chunksPerBlock - 1) ||
1067 : SlabChunkMod(slab, block, cur_chunk) != 0)
1068 : elog(WARNING, "problem in slab %s: bogus free list link %p in block %p",
1069 : name, cur_chunk, block);
1070 :
1071 : /* count the chunk and mark it free on the free chunk array */
1072 : nfree++;
1073 : slab->isChunkFree[chunkidx] = true;
1074 :
1075 : /* read pointer of the next free chunk */
1076 : VALGRIND_MAKE_MEM_DEFINED(MemoryChunkGetPointer(cur_chunk), sizeof(MemoryChunk *));
1077 : cur_chunk = *(MemoryChunk **) SlabChunkGetPointer(cur_chunk);
1078 : }
1079 :
1080 : /* check that the unused pointer matches what nunused claims */
1081 : if (SlabBlockGetChunk(slab, block, slab->chunksPerBlock - block->nunused) !=
1082 : block->unused)
1083 : elog(WARNING, "problem in slab %s: mismatch detected between nunused chunks and unused pointer in block %p",
1084 : name, block);
1085 :
1086 : /*
1087 : * count the remaining free chunks that have yet to make it onto
1088 : * the block's free list.
1089 : */
1090 : cur_chunk = block->unused;
1091 : for (j = 0; j < block->nunused; j++)
1092 : {
1093 : int chunkidx = SlabChunkIndex(slab, block, cur_chunk);
1094 :
1095 :
1096 : /* count the chunk as free and mark it as so in the array */
1097 : nfree++;
1098 : if (chunkidx < slab->chunksPerBlock)
1099 : slab->isChunkFree[chunkidx] = true;
1100 :
1101 : /* move forward 1 chunk */
1102 : cur_chunk = (MemoryChunk *) (((char *) cur_chunk) + slab->fullChunkSize);
1103 : }
1104 :
1105 : for (j = 0; j < slab->chunksPerBlock; j++)
1106 : {
1107 : if (!slab->isChunkFree[j])
1108 : {
1109 : MemoryChunk *chunk = SlabBlockGetChunk(slab, block, j);
1110 : SlabBlock *chunkblock;
1111 :
1112 : /* Allow access to the chunk header. */
1113 : VALGRIND_MAKE_MEM_DEFINED(chunk, Slab_CHUNKHDRSZ);
1114 :
1115 : chunkblock = (SlabBlock *) MemoryChunkGetBlock(chunk);
1116 :
1117 : /* Disallow access to the chunk header. */
1118 : VALGRIND_MAKE_MEM_NOACCESS(chunk, Slab_CHUNKHDRSZ);
1119 :
1120 : /*
1121 : * check the chunk's blockoffset correctly points back to
1122 : * the block
1123 : */
1124 : if (chunkblock != block)
1125 : elog(WARNING, "problem in slab %s: bogus block link in block %p, chunk %p",
1126 : name, block, chunk);
1127 :
1128 : /* check the sentinel byte is intact */
1129 : Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
1130 : if (!sentinel_ok(chunk, Slab_CHUNKHDRSZ + slab->chunkSize))
1131 : elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p",
1132 : name, block, chunk);
1133 : }
1134 : }
1135 :
1136 : /*
1137 : * Make sure we got the expected number of free chunks (as tracked
1138 : * in the block header).
1139 : */
1140 : if (nfree != block->nfree)
1141 : elog(WARNING, "problem in slab %s: nfree in block %p is %d but %d chunk were found as free",
1142 : name, block, block->nfree, nfree);
1143 :
1144 : nblocks++;
1145 : }
1146 : }
1147 :
1148 : /* the stored empty blocks are tracked in mem_allocated too */
1149 : nblocks += dclist_count(&slab->emptyblocks);
1150 :
1151 : Assert(nblocks * slab->blockSize == context->mem_allocated);
1152 : }
1153 :
1154 : #endif /* MEMORY_CONTEXT_CHECKING */
|