Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * localbuf.c
4 : * local buffer manager. Fast buffer manager for temporary tables,
5 : * which never need to be WAL-logged or checkpointed, etc.
6 : *
7 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994-5, Regents of the University of California
9 : *
10 : *
11 : * IDENTIFICATION
12 : * src/backend/storage/buffer/localbuf.c
13 : *
14 : *-------------------------------------------------------------------------
15 : */
16 : #include "postgres.h"
17 :
18 : #include "access/parallel.h"
19 : #include "catalog/catalog.h"
20 : #include "executor/instrument.h"
21 : #include "storage/buf_internals.h"
22 : #include "storage/bufmgr.h"
23 : #include "utils/guc.h"
24 : #include "utils/memutils.h"
25 : #include "utils/resowner_private.h"
26 :
27 :
28 : /*#define LBDEBUG*/
29 :
30 : /* entry for buffer lookup hashtable */
31 : typedef struct
32 : {
33 : BufferTag key; /* Tag of a disk page */
34 : int id; /* Associated local buffer's index */
35 : } LocalBufferLookupEnt;
36 :
37 : /* Note: this macro only works on local buffers, not shared ones! */
38 : #define LocalBufHdrGetBlock(bufHdr) \
39 : LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
40 :
41 : int NLocBuffer = 0; /* until buffers are initialized */
42 :
43 : BufferDesc *LocalBufferDescriptors = NULL;
44 : Block *LocalBufferBlockPointers = NULL;
45 : int32 *LocalRefCount = NULL;
46 :
47 : static int nextFreeLocalBuf = 0;
48 :
49 : static HTAB *LocalBufHash = NULL;
50 :
51 :
52 : static void InitLocalBuffers(void);
53 : static Block GetLocalBufferStorage(void);
54 :
55 :
56 : /*
57 : * PrefetchLocalBuffer -
58 : * initiate asynchronous read of a block of a relation
59 : *
60 : * Do PrefetchBuffer's work for temporary relations.
61 : * No-op if prefetching isn't compiled in.
62 : */
63 : PrefetchBufferResult
64 5058 : PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
65 : BlockNumber blockNum)
66 : {
67 5058 : PrefetchBufferResult result = {InvalidBuffer, false};
68 : BufferTag newTag; /* identity of requested block */
69 : LocalBufferLookupEnt *hresult;
70 :
71 5058 : InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
72 :
73 : /* Initialize local buffers if first request in this session */
74 5058 : if (LocalBufHash == NULL)
75 0 : InitLocalBuffers();
76 :
77 : /* See if the desired buffer already exists */
78 : hresult = (LocalBufferLookupEnt *)
79 5058 : hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
80 :
81 5058 : if (hresult)
82 : {
83 : /* Yes, so nothing to do */
84 5058 : result.recent_buffer = -hresult->id - 1;
85 : }
86 : else
87 : {
88 : #ifdef USE_PREFETCH
89 : /* Not in buffers, so initiate prefetch */
90 0 : smgrprefetch(smgr, forkNum, blockNum);
91 0 : result.initiated_io = true;
92 : #endif /* USE_PREFETCH */
93 : }
94 :
95 5058 : return result;
96 : }
97 :
98 :
99 : /*
100 : * LocalBufferAlloc -
101 : * Find or create a local buffer for the given page of the given relation.
102 : *
103 : * API is similar to bufmgr.c's BufferAlloc, except that we do not need
104 : * to do any locking since this is all local. Also, IO_IN_PROGRESS
105 : * does not get set. Lastly, we support only default access strategy
106 : * (hence, usage_count is always advanced).
107 : */
108 : BufferDesc *
109 1019574 : LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
110 : bool *foundPtr)
111 : {
112 : BufferTag newTag; /* identity of requested block */
113 : LocalBufferLookupEnt *hresult;
114 : BufferDesc *bufHdr;
115 : int b;
116 : int trycounter;
117 : bool found;
118 : uint32 buf_state;
119 :
120 1019574 : InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum);
121 :
122 : /* Initialize local buffers if first request in this session */
123 1019574 : if (LocalBufHash == NULL)
124 436 : InitLocalBuffers();
125 :
126 : /* See if the desired buffer already exists */
127 : hresult = (LocalBufferLookupEnt *)
128 1019574 : hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
129 :
130 1019574 : if (hresult)
131 : {
132 997560 : b = hresult->id;
133 997560 : bufHdr = GetLocalBufferDescriptor(b);
134 : Assert(BufferTagsEqual(&bufHdr->tag, &newTag));
135 : #ifdef LBDEBUG
136 : fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
137 : smgr->smgr_rlocator.locator.relNumber, forkNum, blockNum, -b - 1);
138 : #endif
139 997560 : buf_state = pg_atomic_read_u32(&bufHdr->state);
140 :
141 : /* this part is equivalent to PinBuffer for a shared buffer */
142 997560 : if (LocalRefCount[b] == 0)
143 : {
144 988818 : if (BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT)
145 : {
146 72906 : buf_state += BUF_USAGECOUNT_ONE;
147 72906 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
148 : }
149 : }
150 997560 : LocalRefCount[b]++;
151 997560 : ResourceOwnerRememberBuffer(CurrentResourceOwner,
152 : BufferDescriptorGetBuffer(bufHdr));
153 997560 : if (buf_state & BM_VALID)
154 997560 : *foundPtr = true;
155 : else
156 : {
157 : /* Previous read attempt must have failed; try again */
158 0 : *foundPtr = false;
159 : }
160 997560 : return bufHdr;
161 : }
162 :
163 : #ifdef LBDEBUG
164 : fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
165 : smgr->smgr_rlocator.locator.relNumber, forkNum, blockNum,
166 : -nextFreeLocalBuf - 1);
167 : #endif
168 :
169 : /*
170 : * Need to get a new buffer. We use a clock sweep algorithm (essentially
171 : * the same as what freelist.c does now...)
172 : */
173 22014 : trycounter = NLocBuffer;
174 : for (;;)
175 : {
176 23592 : b = nextFreeLocalBuf;
177 :
178 23592 : if (++nextFreeLocalBuf >= NLocBuffer)
179 6 : nextFreeLocalBuf = 0;
180 :
181 23592 : bufHdr = GetLocalBufferDescriptor(b);
182 :
183 23592 : if (LocalRefCount[b] == 0)
184 : {
185 23592 : buf_state = pg_atomic_read_u32(&bufHdr->state);
186 :
187 23592 : if (BUF_STATE_GET_USAGECOUNT(buf_state) > 0)
188 : {
189 1578 : buf_state -= BUF_USAGECOUNT_ONE;
190 1578 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
191 1578 : trycounter = NLocBuffer;
192 : }
193 : else
194 : {
195 : /* Found a usable buffer */
196 22014 : LocalRefCount[b]++;
197 22014 : ResourceOwnerRememberBuffer(CurrentResourceOwner,
198 : BufferDescriptorGetBuffer(bufHdr));
199 22014 : break;
200 : }
201 : }
202 0 : else if (--trycounter == 0)
203 0 : ereport(ERROR,
204 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
205 : errmsg("no empty local buffer available")));
206 : }
207 :
208 : /*
209 : * this buffer is not referenced but it might still be dirty. if that's
210 : * the case, write it out before reusing it!
211 : */
212 22014 : if (buf_state & BM_DIRTY)
213 : {
214 : SMgrRelation oreln;
215 0 : Page localpage = (char *) LocalBufHdrGetBlock(bufHdr);
216 :
217 : /* Find smgr relation for buffer */
218 0 : oreln = smgropen(bufHdr->tag.rlocator, MyBackendId);
219 :
220 0 : PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
221 :
222 : /* And write... */
223 0 : smgrwrite(oreln,
224 : bufHdr->tag.forkNum,
225 : bufHdr->tag.blockNum,
226 : localpage,
227 : false);
228 :
229 : /* Mark not-dirty now in case we error out below */
230 0 : buf_state &= ~BM_DIRTY;
231 0 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
232 :
233 0 : pgBufferUsage.local_blks_written++;
234 : }
235 :
236 : /*
237 : * lazy memory allocation: allocate space on first use of a buffer.
238 : */
239 22014 : if (LocalBufHdrGetBlock(bufHdr) == NULL)
240 : {
241 : /* Set pointer for use by BufferGetBlock() macro */
242 20436 : LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage();
243 : }
244 :
245 : /*
246 : * Update the hash table: remove old entry, if any, and make new one.
247 : */
248 22014 : if (buf_state & BM_TAG_VALID)
249 : {
250 : hresult = (LocalBufferLookupEnt *)
251 0 : hash_search(LocalBufHash, (void *) &bufHdr->tag,
252 : HASH_REMOVE, NULL);
253 0 : if (!hresult) /* shouldn't happen */
254 0 : elog(ERROR, "local buffer hash table corrupted");
255 : /* mark buffer invalid just in case hash insert fails */
256 0 : ClearBufferTag(&bufHdr->tag);
257 0 : buf_state &= ~(BM_VALID | BM_TAG_VALID);
258 0 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
259 : }
260 :
261 : hresult = (LocalBufferLookupEnt *)
262 22014 : hash_search(LocalBufHash, (void *) &newTag, HASH_ENTER, &found);
263 22014 : if (found) /* shouldn't happen */
264 0 : elog(ERROR, "local buffer hash table corrupted");
265 22014 : hresult->id = b;
266 :
267 : /*
268 : * it's all ours now.
269 : */
270 22014 : bufHdr->tag = newTag;
271 22014 : buf_state &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
272 22014 : buf_state |= BM_TAG_VALID;
273 22014 : buf_state &= ~BUF_USAGECOUNT_MASK;
274 22014 : buf_state += BUF_USAGECOUNT_ONE;
275 22014 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
276 :
277 22014 : *foundPtr = false;
278 22014 : return bufHdr;
279 : }
280 :
281 : /*
282 : * MarkLocalBufferDirty -
283 : * mark a local buffer dirty
284 : */
285 : void
286 2105340 : MarkLocalBufferDirty(Buffer buffer)
287 : {
288 : int bufid;
289 : BufferDesc *bufHdr;
290 : uint32 buf_state;
291 :
292 : Assert(BufferIsLocal(buffer));
293 :
294 : #ifdef LBDEBUG
295 : fprintf(stderr, "LB DIRTY %d\n", buffer);
296 : #endif
297 :
298 2105340 : bufid = -(buffer + 1);
299 :
300 : Assert(LocalRefCount[bufid] > 0);
301 :
302 2105340 : bufHdr = GetLocalBufferDescriptor(bufid);
303 :
304 2105340 : buf_state = pg_atomic_read_u32(&bufHdr->state);
305 :
306 2105340 : if (!(buf_state & BM_DIRTY))
307 15098 : pgBufferUsage.local_blks_dirtied++;
308 :
309 2105340 : buf_state |= BM_DIRTY;
310 :
311 2105340 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
312 2105340 : }
313 :
314 : /*
315 : * DropRelationLocalBuffers
316 : * This function removes from the buffer pool all the pages of the
317 : * specified relation that have block numbers >= firstDelBlock.
318 : * (In particular, with firstDelBlock = 0, all pages are removed.)
319 : * Dirty pages are simply dropped, without bothering to write them
320 : * out first. Therefore, this is NOT rollback-able, and so should be
321 : * used only with extreme caution!
322 : *
323 : * See DropRelationBuffers in bufmgr.c for more notes.
324 : */
325 : void
326 680 : DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
327 : BlockNumber firstDelBlock)
328 : {
329 : int i;
330 :
331 633512 : for (i = 0; i < NLocBuffer; i++)
332 : {
333 632832 : BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
334 : LocalBufferLookupEnt *hresult;
335 : uint32 buf_state;
336 :
337 632832 : buf_state = pg_atomic_read_u32(&bufHdr->state);
338 :
339 632832 : if ((buf_state & BM_TAG_VALID) &&
340 44448 : RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator) &&
341 234 : bufHdr->tag.forkNum == forkNum &&
342 136 : bufHdr->tag.blockNum >= firstDelBlock)
343 : {
344 108 : if (LocalRefCount[i] != 0)
345 0 : elog(ERROR, "block %u of %s is still referenced (local %u)",
346 : bufHdr->tag.blockNum,
347 : relpathbackend(bufHdr->tag.rlocator, MyBackendId,
348 : bufHdr->tag.forkNum),
349 : LocalRefCount[i]);
350 : /* Remove entry from hashtable */
351 : hresult = (LocalBufferLookupEnt *)
352 108 : hash_search(LocalBufHash, (void *) &bufHdr->tag,
353 : HASH_REMOVE, NULL);
354 108 : if (!hresult) /* shouldn't happen */
355 0 : elog(ERROR, "local buffer hash table corrupted");
356 : /* Mark buffer invalid */
357 108 : ClearBufferTag(&bufHdr->tag);
358 108 : buf_state &= ~BUF_FLAG_MASK;
359 108 : buf_state &= ~BUF_USAGECOUNT_MASK;
360 108 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
361 : }
362 : }
363 680 : }
364 :
365 : /*
366 : * DropRelationAllLocalBuffers
367 : * This function removes from the buffer pool all pages of all forks
368 : * of the specified relation.
369 : *
370 : * See DropRelationsAllBuffers in bufmgr.c for more notes.
371 : */
372 : void
373 5100 : DropRelationAllLocalBuffers(RelFileLocator rlocator)
374 : {
375 : int i;
376 :
377 4961260 : for (i = 0; i < NLocBuffer; i++)
378 : {
379 4956160 : BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
380 : LocalBufferLookupEnt *hresult;
381 : uint32 buf_state;
382 :
383 4956160 : buf_state = pg_atomic_read_u32(&bufHdr->state);
384 :
385 4956160 : if ((buf_state & BM_TAG_VALID) &&
386 316874 : RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator))
387 : {
388 21906 : if (LocalRefCount[i] != 0)
389 0 : elog(ERROR, "block %u of %s is still referenced (local %u)",
390 : bufHdr->tag.blockNum,
391 : relpathbackend(bufHdr->tag.rlocator, MyBackendId,
392 : bufHdr->tag.forkNum),
393 : LocalRefCount[i]);
394 : /* Remove entry from hashtable */
395 : hresult = (LocalBufferLookupEnt *)
396 21906 : hash_search(LocalBufHash, (void *) &bufHdr->tag,
397 : HASH_REMOVE, NULL);
398 21906 : if (!hresult) /* shouldn't happen */
399 0 : elog(ERROR, "local buffer hash table corrupted");
400 : /* Mark buffer invalid */
401 21906 : ClearBufferTag(&bufHdr->tag);
402 21906 : buf_state &= ~BUF_FLAG_MASK;
403 21906 : buf_state &= ~BUF_USAGECOUNT_MASK;
404 21906 : pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
405 : }
406 : }
407 5100 : }
408 :
409 : /*
410 : * InitLocalBuffers -
411 : * init the local buffer cache. Since most queries (esp. multi-user ones)
412 : * don't involve local buffers, we delay allocating actual memory for the
413 : * buffers until we need them; just make the buffer headers here.
414 : */
415 : static void
416 436 : InitLocalBuffers(void)
417 : {
418 436 : int nbufs = num_temp_buffers;
419 : HASHCTL info;
420 : int i;
421 :
422 : /*
423 : * Parallel workers can't access data in temporary tables, because they
424 : * have no visibility into the local buffers of their leader. This is a
425 : * convenient, low-cost place to provide a backstop check for that. Note
426 : * that we don't wish to prevent a parallel worker from accessing catalog
427 : * metadata about a temp table, so checks at higher levels would be
428 : * inappropriate.
429 : */
430 436 : if (IsParallelWorker())
431 0 : ereport(ERROR,
432 : (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
433 : errmsg("cannot access temporary tables during a parallel operation")));
434 :
435 : /* Allocate and zero buffer headers and auxiliary arrays */
436 436 : LocalBufferDescriptors = (BufferDesc *) calloc(nbufs, sizeof(BufferDesc));
437 436 : LocalBufferBlockPointers = (Block *) calloc(nbufs, sizeof(Block));
438 436 : LocalRefCount = (int32 *) calloc(nbufs, sizeof(int32));
439 436 : if (!LocalBufferDescriptors || !LocalBufferBlockPointers || !LocalRefCount)
440 0 : ereport(FATAL,
441 : (errcode(ERRCODE_OUT_OF_MEMORY),
442 : errmsg("out of memory")));
443 :
444 436 : nextFreeLocalBuf = 0;
445 :
446 : /* initialize fields that need to start off nonzero */
447 446900 : for (i = 0; i < nbufs; i++)
448 : {
449 446464 : BufferDesc *buf = GetLocalBufferDescriptor(i);
450 :
451 : /*
452 : * negative to indicate local buffer. This is tricky: shared buffers
453 : * start with 0. We have to start with -2. (Note that the routine
454 : * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
455 : * is -1.)
456 : */
457 446464 : buf->buf_id = -i - 2;
458 :
459 : /*
460 : * Intentionally do not initialize the buffer's atomic variable
461 : * (besides zeroing the underlying memory above). That way we get
462 : * errors on platforms without atomics, if somebody (re-)introduces
463 : * atomic operations for local buffers.
464 : */
465 : }
466 :
467 : /* Create the lookup hash table */
468 436 : info.keysize = sizeof(BufferTag);
469 436 : info.entrysize = sizeof(LocalBufferLookupEnt);
470 :
471 436 : LocalBufHash = hash_create("Local Buffer Lookup Table",
472 : nbufs,
473 : &info,
474 : HASH_ELEM | HASH_BLOBS);
475 :
476 436 : if (!LocalBufHash)
477 0 : elog(ERROR, "could not initialize local buffer hash table");
478 :
479 : /* Initialization done, mark buffers allocated */
480 436 : NLocBuffer = nbufs;
481 436 : }
482 :
483 : /*
484 : * GetLocalBufferStorage - allocate memory for a local buffer
485 : *
486 : * The idea of this function is to aggregate our requests for storage
487 : * so that the memory manager doesn't see a whole lot of relatively small
488 : * requests. Since we'll never give back a local buffer once it's created
489 : * within a particular process, no point in burdening memmgr with separately
490 : * managed chunks.
491 : */
492 : static Block
493 20436 : GetLocalBufferStorage(void)
494 : {
495 : static char *cur_block = NULL;
496 : static int next_buf_in_block = 0;
497 : static int num_bufs_in_block = 0;
498 : static int total_bufs_allocated = 0;
499 : static MemoryContext LocalBufferContext = NULL;
500 :
501 : char *this_buf;
502 :
503 : Assert(total_bufs_allocated < NLocBuffer);
504 :
505 20436 : if (next_buf_in_block >= num_bufs_in_block)
506 : {
507 : /* Need to make a new request to memmgr */
508 : int num_bufs;
509 :
510 : /*
511 : * We allocate local buffers in a context of their own, so that the
512 : * space eaten for them is easily recognizable in MemoryContextStats
513 : * output. Create the context on first use.
514 : */
515 642 : if (LocalBufferContext == NULL)
516 436 : LocalBufferContext =
517 436 : AllocSetContextCreate(TopMemoryContext,
518 : "LocalBufferContext",
519 : ALLOCSET_DEFAULT_SIZES);
520 :
521 : /* Start with a 16-buffer request; subsequent ones double each time */
522 642 : num_bufs = Max(num_bufs_in_block * 2, 16);
523 : /* But not more than what we need for all remaining local bufs */
524 642 : num_bufs = Min(num_bufs, NLocBuffer - total_bufs_allocated);
525 : /* And don't overflow MaxAllocSize, either */
526 642 : num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ);
527 :
528 1284 : cur_block = (char *) MemoryContextAlloc(LocalBufferContext,
529 642 : num_bufs * BLCKSZ);
530 642 : next_buf_in_block = 0;
531 642 : num_bufs_in_block = num_bufs;
532 : }
533 :
534 : /* Allocate next buffer in current memory block */
535 20436 : this_buf = cur_block + next_buf_in_block * BLCKSZ;
536 20436 : next_buf_in_block++;
537 20436 : total_bufs_allocated++;
538 :
539 20436 : return (Block) this_buf;
540 : }
541 :
542 : /*
543 : * CheckForLocalBufferLeaks - ensure this backend holds no local buffer pins
544 : *
545 : * This is just like CheckForBufferLeaks(), but for local buffers.
546 : */
547 : static void
548 929258 : CheckForLocalBufferLeaks(void)
549 : {
550 : #ifdef USE_ASSERT_CHECKING
551 : if (LocalRefCount)
552 : {
553 : int RefCountErrors = 0;
554 : int i;
555 :
556 : for (i = 0; i < NLocBuffer; i++)
557 : {
558 : if (LocalRefCount[i] != 0)
559 : {
560 : Buffer b = -i - 1;
561 :
562 : PrintBufferLeakWarning(b);
563 : RefCountErrors++;
564 : }
565 : }
566 : Assert(RefCountErrors == 0);
567 : }
568 : #endif
569 929258 : }
570 :
571 : /*
572 : * AtEOXact_LocalBuffers - clean up at end of transaction.
573 : *
574 : * This is just like AtEOXact_Buffers, but for local buffers.
575 : */
576 : void
577 905194 : AtEOXact_LocalBuffers(bool isCommit)
578 : {
579 905194 : CheckForLocalBufferLeaks();
580 905194 : }
581 :
582 : /*
583 : * AtProcExit_LocalBuffers - ensure we have dropped pins during backend exit.
584 : *
585 : * This is just like AtProcExit_Buffers, but for local buffers.
586 : */
587 : void
588 24064 : AtProcExit_LocalBuffers(void)
589 : {
590 : /*
591 : * We shouldn't be holding any remaining pins; if we are, and assertions
592 : * aren't enabled, we'll fail later in DropRelationBuffers while
593 : * trying to drop the temp rels.
594 : */
595 24064 : CheckForLocalBufferLeaks();
596 24064 : }
|