Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * smgr.c
4 : * public interface routines to storage manager switch.
5 : *
6 : * All file system operations on relations dispatch through these routines.
7 : * An SMgrRelation represents physical on-disk relation files that are open
8 : * for reading and writing.
9 : *
10 : * When a relation is first accessed through the relation cache, the
11 : * corresponding SMgrRelation entry is opened by calling smgropen(), and the
12 : * reference is stored in the relation cache entry.
13 : *
14 : * Accesses that don't go through the relation cache open the SMgrRelation
15 : * directly. That includes flushing buffers from the buffer cache, as well as
16 : * all accesses in auxiliary processes like the checkpointer or the WAL redo
17 : * in the startup process.
18 : *
19 : * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references
20 : * independent of the relation cache. They need to prepare the physical files
21 : * before updating the relation cache.
22 : *
23 : * There is a hash table that holds all the SMgrRelation entries in the
24 : * backend. If you call smgropen() twice for the same rel locator, you get a
25 : * reference to the same SMgrRelation. The reference is valid until the end of
26 : * transaction. This makes repeated access to the same relation efficient,
27 : * and allows caching things like the relation size in the SMgrRelation entry.
28 : *
29 : * At end of transaction, all SMgrRelation entries that haven't been pinned
30 : * are removed. An SMgrRelation can hold kernel file system descriptors for
31 : * the underlying files, and we'd like to close those reasonably soon if the
32 : * file gets deleted. The SMgrRelations references held by the relcache are
33 : * pinned to prevent them from being closed.
34 : *
35 : * There is another mechanism to close file descriptors early:
36 : * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all
37 : * file descriptors. Upon receiving that signal, the backend closes all file
38 : * descriptors held open by SMgrRelations, but because it can happen in the
39 : * middle of a transaction, we cannot destroy the SMgrRelation objects
40 : * themselves, as there could pointers to them in active use. See
41 : * smgrrelease() and smgrreleaseall().
42 : *
43 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
44 : * Portions Copyright (c) 1994, Regents of the University of California
45 : *
46 : *
47 : * IDENTIFICATION
48 : * src/backend/storage/smgr/smgr.c
49 : *
50 : *-------------------------------------------------------------------------
51 : */
52 : #include "postgres.h"
53 :
54 : #include "access/xlogutils.h"
55 : #include "lib/ilist.h"
56 : #include "storage/bufmgr.h"
57 : #include "storage/ipc.h"
58 : #include "storage/md.h"
59 : #include "storage/smgr.h"
60 : #include "utils/hsearch.h"
61 : #include "utils/inval.h"
62 :
63 :
64 : /*
65 : * This struct of function pointers defines the API between smgr.c and
66 : * any individual storage manager module. Note that smgr subfunctions are
67 : * generally expected to report problems via elog(ERROR). An exception is
68 : * that smgr_unlink should use elog(WARNING), rather than erroring out,
69 : * because we normally unlink relations during post-commit/abort cleanup,
70 : * and so it's too late to raise an error. Also, various conditions that
71 : * would normally be errors should be allowed during bootstrap and/or WAL
72 : * recovery --- see comments in md.c for details.
73 : */
74 : typedef struct f_smgr
75 : {
76 : void (*smgr_init) (void); /* may be NULL */
77 : void (*smgr_shutdown) (void); /* may be NULL */
78 : void (*smgr_open) (SMgrRelation reln);
79 : void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
80 : void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
81 : bool isRedo);
82 : bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
83 : void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
84 : bool isRedo);
85 : void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
86 : BlockNumber blocknum, const void *buffer, bool skipFsync);
87 : void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
88 : BlockNumber blocknum, int nblocks, bool skipFsync);
89 : bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
90 : BlockNumber blocknum, int nblocks);
91 : void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
92 : BlockNumber blocknum,
93 : void **buffers, BlockNumber nblocks);
94 : void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
95 : BlockNumber blocknum,
96 : const void **buffers, BlockNumber nblocks,
97 : bool skipFsync);
98 : void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
99 : BlockNumber blocknum, BlockNumber nblocks);
100 : BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
101 : void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
102 : BlockNumber nblocks);
103 : void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
104 : void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum);
105 : } f_smgr;
106 :
107 : static const f_smgr smgrsw[] = {
108 : /* magnetic disk */
109 : {
110 : .smgr_init = mdinit,
111 : .smgr_shutdown = NULL,
112 : .smgr_open = mdopen,
113 : .smgr_close = mdclose,
114 : .smgr_create = mdcreate,
115 : .smgr_exists = mdexists,
116 : .smgr_unlink = mdunlink,
117 : .smgr_extend = mdextend,
118 : .smgr_zeroextend = mdzeroextend,
119 : .smgr_prefetch = mdprefetch,
120 : .smgr_readv = mdreadv,
121 : .smgr_writev = mdwritev,
122 : .smgr_writeback = mdwriteback,
123 : .smgr_nblocks = mdnblocks,
124 : .smgr_truncate = mdtruncate,
125 : .smgr_immedsync = mdimmedsync,
126 : .smgr_registersync = mdregistersync,
127 : }
128 : };
129 :
130 : static const int NSmgr = lengthof(smgrsw);
131 :
132 : /*
133 : * Each backend has a hashtable that stores all extant SMgrRelation objects.
134 : * In addition, "unpinned" SMgrRelation objects are chained together in a list.
135 : */
136 : static HTAB *SMgrRelationHash = NULL;
137 :
138 : static dlist_head unpinned_relns;
139 :
140 : /* local function prototypes */
141 : static void smgrshutdown(int code, Datum arg);
142 : static void smgrdestroy(SMgrRelation reln);
143 :
144 :
145 : /*
146 : * smgrinit(), smgrshutdown() -- Initialize or shut down storage
147 : * managers.
148 : *
149 : * Note: smgrinit is called during backend startup (normal or standalone
150 : * case), *not* during postmaster start. Therefore, any resources created
151 : * here or destroyed in smgrshutdown are backend-local.
152 : */
153 : void
154 29800 : smgrinit(void)
155 : {
156 : int i;
157 :
158 59600 : for (i = 0; i < NSmgr; i++)
159 : {
160 29800 : if (smgrsw[i].smgr_init)
161 29800 : smgrsw[i].smgr_init();
162 : }
163 :
164 : /* register the shutdown proc */
165 29800 : on_proc_exit(smgrshutdown, 0);
166 29800 : }
167 :
168 : /*
169 : * on_proc_exit hook for smgr cleanup during backend shutdown
170 : */
171 : static void
172 29800 : smgrshutdown(int code, Datum arg)
173 : {
174 : int i;
175 :
176 59600 : for (i = 0; i < NSmgr; i++)
177 : {
178 29800 : if (smgrsw[i].smgr_shutdown)
179 0 : smgrsw[i].smgr_shutdown();
180 : }
181 29800 : }
182 :
183 : /*
184 : * smgropen() -- Return an SMgrRelation object, creating it if need be.
185 : *
186 : * In versions of PostgreSQL prior to 17, this function returned an object
187 : * with no defined lifetime. Now, however, the object remains valid for the
188 : * lifetime of the transaction, up to the point where AtEOXact_SMgr() is
189 : * called, making it much easier for callers to know for how long they can
190 : * hold on to a pointer to the returned object. If this function is called
191 : * outside of a transaction, the object remains valid until smgrdestroy() or
192 : * smgrdestroyall() is called. Background processes that use smgr but not
193 : * transactions typically do this once per checkpoint cycle.
194 : *
195 : * This does not attempt to actually open the underlying files.
196 : */
197 : SMgrRelation
198 15319280 : smgropen(RelFileLocator rlocator, ProcNumber backend)
199 : {
200 : RelFileLocatorBackend brlocator;
201 : SMgrRelation reln;
202 : bool found;
203 :
204 : Assert(RelFileNumberIsValid(rlocator.relNumber));
205 :
206 15319280 : if (SMgrRelationHash == NULL)
207 : {
208 : /* First time through: initialize the hash table */
209 : HASHCTL ctl;
210 :
211 26638 : ctl.keysize = sizeof(RelFileLocatorBackend);
212 26638 : ctl.entrysize = sizeof(SMgrRelationData);
213 26638 : SMgrRelationHash = hash_create("smgr relation table", 400,
214 : &ctl, HASH_ELEM | HASH_BLOBS);
215 26638 : dlist_init(&unpinned_relns);
216 : }
217 :
218 : /* Look up or create an entry */
219 15319280 : brlocator.locator = rlocator;
220 15319280 : brlocator.backend = backend;
221 15319280 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
222 : &brlocator,
223 : HASH_ENTER, &found);
224 :
225 : /* Initialize it if not present before */
226 15319280 : if (!found)
227 : {
228 : /* hash_search already filled in the lookup key */
229 1539032 : reln->smgr_targblock = InvalidBlockNumber;
230 7695160 : for (int i = 0; i <= MAX_FORKNUM; ++i)
231 6156128 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
232 1539032 : reln->smgr_which = 0; /* we only have md.c at present */
233 :
234 : /* implementation-specific initialization */
235 1539032 : smgrsw[reln->smgr_which].smgr_open(reln);
236 :
237 : /* it is not pinned yet */
238 1539032 : reln->pincount = 0;
239 1539032 : dlist_push_tail(&unpinned_relns, &reln->node);
240 : }
241 :
242 15319280 : return reln;
243 : }
244 :
245 : /*
246 : * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of
247 : * of transaction
248 : */
249 : void
250 1299344 : smgrpin(SMgrRelation reln)
251 : {
252 1299344 : if (reln->pincount == 0)
253 1299344 : dlist_delete(&reln->node);
254 1299344 : reln->pincount++;
255 1299344 : }
256 :
257 : /*
258 : * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of
259 : * transaction
260 : *
261 : * The object remains valid, but if there are no other pins on it, it is moved
262 : * to the unpinned list where it will be destroyed by AtEOXact_SMgr().
263 : */
264 : void
265 361682 : smgrunpin(SMgrRelation reln)
266 : {
267 : Assert(reln->pincount > 0);
268 361682 : reln->pincount--;
269 361682 : if (reln->pincount == 0)
270 361682 : dlist_push_tail(&unpinned_relns, &reln->node);
271 361682 : }
272 :
273 : /*
274 : * smgrdestroy() -- Delete an SMgrRelation object.
275 : */
276 : static void
277 524528 : smgrdestroy(SMgrRelation reln)
278 : {
279 : ForkNumber forknum;
280 :
281 : Assert(reln->pincount == 0);
282 :
283 2622640 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
284 2098112 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
285 :
286 524528 : dlist_delete(&reln->node);
287 :
288 524528 : if (hash_search(SMgrRelationHash,
289 524528 : &(reln->smgr_rlocator),
290 : HASH_REMOVE, NULL) == NULL)
291 0 : elog(ERROR, "SMgrRelation hashtable corrupted");
292 524528 : }
293 :
294 : /*
295 : * smgrrelease() -- Release all resources used by this object.
296 : *
297 : * The object remains valid.
298 : */
299 : void
300 694688 : smgrrelease(SMgrRelation reln)
301 : {
302 3473440 : for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
303 : {
304 2778752 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
305 2778752 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
306 : }
307 694688 : reln->smgr_targblock = InvalidBlockNumber;
308 694688 : }
309 :
310 : /*
311 : * smgrclose() -- Close an SMgrRelation object.
312 : *
313 : * The SMgrRelation reference should not be used after this call. However,
314 : * because we don't keep track of the references returned by smgropen(), we
315 : * don't know if there are other references still pointing to the same object,
316 : * so we cannot remove the SMgrRelation object yet. Therefore, this is just a
317 : * synonym for smgrrelease() at the moment.
318 : */
319 : void
320 509922 : smgrclose(SMgrRelation reln)
321 : {
322 509922 : smgrrelease(reln);
323 509922 : }
324 :
325 : /*
326 : * smgrdestroyall() -- Release resources used by all unpinned objects.
327 : *
328 : * It must be known that there are no pointers to SMgrRelations, other than
329 : * those pinned with smgrpin().
330 : */
331 : void
332 566264 : smgrdestroyall(void)
333 : {
334 : dlist_mutable_iter iter;
335 :
336 : /*
337 : * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
338 : * each one from the list.
339 : */
340 1090792 : dlist_foreach_modify(iter, &unpinned_relns)
341 : {
342 524528 : SMgrRelation rel = dlist_container(SMgrRelationData, node,
343 : iter.cur);
344 :
345 524528 : smgrdestroy(rel);
346 : }
347 566264 : }
348 :
349 : /*
350 : * smgrreleaseall() -- Release resources used by all objects.
351 : */
352 : void
353 4696 : smgrreleaseall(void)
354 : {
355 : HASH_SEQ_STATUS status;
356 : SMgrRelation reln;
357 :
358 : /* Nothing to do if hashtable not set up */
359 4696 : if (SMgrRelationHash == NULL)
360 162 : return;
361 :
362 4534 : hash_seq_init(&status, SMgrRelationHash);
363 :
364 166928 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
365 : {
366 162394 : smgrrelease(reln);
367 : }
368 : }
369 :
370 : /*
371 : * smgrreleaserellocator() -- Release resources for given RelFileLocator, if
372 : * it's open.
373 : *
374 : * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids
375 : * uselessly creating a hashtable entry only to drop it again when no
376 : * such entry exists already.
377 : */
378 : void
379 385284 : smgrreleaserellocator(RelFileLocatorBackend rlocator)
380 : {
381 : SMgrRelation reln;
382 :
383 : /* Nothing to do if hashtable not set up */
384 385284 : if (SMgrRelationHash == NULL)
385 252 : return;
386 :
387 385032 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
388 : &rlocator,
389 : HASH_FIND, NULL);
390 385032 : if (reln != NULL)
391 22372 : smgrrelease(reln);
392 : }
393 :
394 : /*
395 : * smgrexists() -- Does the underlying file for a fork exist?
396 : */
397 : bool
398 1845920 : smgrexists(SMgrRelation reln, ForkNumber forknum)
399 : {
400 1845920 : return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
401 : }
402 :
403 : /*
404 : * smgrcreate() -- Create a new relation.
405 : *
406 : * Given an already-created (but presumably unused) SMgrRelation,
407 : * cause the underlying disk file or other storage for the fork
408 : * to be created.
409 : */
410 : void
411 5476914 : smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
412 : {
413 5476914 : smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
414 5476914 : }
415 :
416 : /*
417 : * smgrdosyncall() -- Immediately sync all forks of all given relations
418 : *
419 : * All forks of all given relations are synced out to the store.
420 : *
421 : * This is equivalent to FlushRelationBuffers() for each smgr relation,
422 : * then calling smgrimmedsync() for all forks of each relation, but it's
423 : * significantly quicker so should be preferred when possible.
424 : */
425 : void
426 20 : smgrdosyncall(SMgrRelation *rels, int nrels)
427 : {
428 20 : int i = 0;
429 : ForkNumber forknum;
430 :
431 20 : if (nrels == 0)
432 0 : return;
433 :
434 20 : FlushRelationsAllBuffers(rels, nrels);
435 :
436 : /*
437 : * Sync the physical file(s).
438 : */
439 44 : for (i = 0; i < nrels; i++)
440 : {
441 24 : int which = rels[i]->smgr_which;
442 :
443 120 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
444 : {
445 96 : if (smgrsw[which].smgr_exists(rels[i], forknum))
446 26 : smgrsw[which].smgr_immedsync(rels[i], forknum);
447 : }
448 : }
449 : }
450 :
451 : /*
452 : * smgrdounlinkall() -- Immediately unlink all forks of all given relations
453 : *
454 : * All forks of all given relations are removed from the store. This
455 : * should not be used during transactional operations, since it can't be
456 : * undone.
457 : *
458 : * If isRedo is true, it is okay for the underlying file(s) to be gone
459 : * already.
460 : */
461 : void
462 24590 : smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
463 : {
464 24590 : int i = 0;
465 : RelFileLocatorBackend *rlocators;
466 : ForkNumber forknum;
467 :
468 24590 : if (nrels == 0)
469 762 : return;
470 :
471 : /*
472 : * Get rid of any remaining buffers for the relations. bufmgr will just
473 : * drop them without bothering to write the contents.
474 : */
475 23828 : DropRelationsAllBuffers(rels, nrels);
476 :
477 : /*
478 : * create an array which contains all relations to be dropped, and close
479 : * each relation's forks at the smgr level while at it
480 : */
481 23828 : rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
482 106522 : for (i = 0; i < nrels; i++)
483 : {
484 82694 : RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
485 82694 : int which = rels[i]->smgr_which;
486 :
487 82694 : rlocators[i] = rlocator;
488 :
489 : /* Close the forks at smgr level */
490 413470 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
491 330776 : smgrsw[which].smgr_close(rels[i], forknum);
492 : }
493 :
494 : /*
495 : * Send a shared-inval message to force other backends to close any
496 : * dangling smgr references they may have for these rels. We should do
497 : * this before starting the actual unlinking, in case we fail partway
498 : * through that step. Note that the sinval messages will eventually come
499 : * back to this backend, too, and thereby provide a backstop that we
500 : * closed our own smgr rel.
501 : */
502 106522 : for (i = 0; i < nrels; i++)
503 82694 : CacheInvalidateSmgr(rlocators[i]);
504 :
505 : /*
506 : * Delete the physical file(s).
507 : *
508 : * Note: smgr_unlink must treat deletion failure as a WARNING, not an
509 : * ERROR, because we've already decided to commit or abort the current
510 : * xact.
511 : */
512 :
513 106522 : for (i = 0; i < nrels; i++)
514 : {
515 82694 : int which = rels[i]->smgr_which;
516 :
517 413470 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
518 330776 : smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
519 : }
520 :
521 23828 : pfree(rlocators);
522 : }
523 :
524 :
525 : /*
526 : * smgrextend() -- Add a new block to a file.
527 : *
528 : * The semantics are nearly the same as smgrwrite(): write at the
529 : * specified position. However, this is to be used for the case of
530 : * extending a relation (i.e., blocknum is at or beyond the current
531 : * EOF). Note that we assume writing a block beyond current EOF
532 : * causes intervening file space to become filled with zeroes.
533 : */
534 : void
535 213316 : smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
536 : const void *buffer, bool skipFsync)
537 : {
538 213316 : smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
539 : buffer, skipFsync);
540 :
541 : /*
542 : * Normally we expect this to increase nblocks by one, but if the cached
543 : * value isn't as expected, just invalidate it so the next call asks the
544 : * kernel.
545 : */
546 213316 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
547 106010 : reln->smgr_cached_nblocks[forknum] = blocknum + 1;
548 : else
549 107306 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
550 213316 : }
551 :
552 : /*
553 : * smgrzeroextend() -- Add new zeroed out blocks to a file.
554 : *
555 : * Similar to smgrextend(), except the relation can be extended by
556 : * multiple blocks at once and the added blocks will be filled with
557 : * zeroes.
558 : */
559 : void
560 368476 : smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
561 : int nblocks, bool skipFsync)
562 : {
563 368476 : smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
564 : nblocks, skipFsync);
565 :
566 : /*
567 : * Normally we expect this to increase the fork size by nblocks, but if
568 : * the cached value isn't as expected, just invalidate it so the next call
569 : * asks the kernel.
570 : */
571 368476 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
572 368476 : reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
573 : else
574 0 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
575 368476 : }
576 :
577 : /*
578 : * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
579 : *
580 : * In recovery only, this can return false to indicate that a file
581 : * doesn't exist (presumably it has been dropped by a later WAL
582 : * record).
583 : */
584 : bool
585 252300 : smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
586 : int nblocks)
587 : {
588 252300 : return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
589 : }
590 :
591 : /*
592 : * smgrreadv() -- read a particular block range from a relation into the
593 : * supplied buffers.
594 : *
595 : * This routine is called from the buffer manager in order to
596 : * instantiate pages in the shared buffer cache. All storage managers
597 : * return pages in the format that POSTGRES expects.
598 : */
599 : void
600 2136586 : smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
601 : void **buffers, BlockNumber nblocks)
602 : {
603 2136586 : smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
604 : nblocks);
605 2136556 : }
606 :
607 : /*
608 : * smgrwritev() -- Write the supplied buffers out.
609 : *
610 : * This is to be used only for updating already-existing blocks of a
611 : * relation (ie, those before the current EOF). To extend a relation,
612 : * use smgrextend().
613 : *
614 : * This is not a synchronous write -- the block is not necessarily
615 : * on disk at return, only dumped out to the kernel. However,
616 : * provisions will be made to fsync the write before the next checkpoint.
617 : *
618 : * NB: The mechanism to ensure fsync at next checkpoint assumes that there is
619 : * something that prevents a concurrent checkpoint from "racing ahead" of the
620 : * write. One way to prevent that is by holding a lock on the buffer; the
621 : * buffer manager's writes are protected by that. The bulk writer facility
622 : * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a
623 : * checkpoint happened; that relies on the fact that no other backend can be
624 : * concurrently modifying the page.
625 : *
626 : * skipFsync indicates that the caller will make other provisions to
627 : * fsync the relation, so we needn't bother. Temporary relations also
628 : * do not require fsync.
629 : */
630 : void
631 884672 : smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
632 : const void **buffers, BlockNumber nblocks, bool skipFsync)
633 : {
634 884672 : smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
635 : buffers, nblocks, skipFsync);
636 884672 : }
637 :
638 : /*
639 : * smgrwriteback() -- Trigger kernel writeback for the supplied range of
640 : * blocks.
641 : */
642 : void
643 133720 : smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
644 : BlockNumber nblocks)
645 : {
646 133720 : smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
647 : nblocks);
648 133720 : }
649 :
650 : /*
651 : * smgrnblocks() -- Calculate the number of blocks in the
652 : * supplied relation.
653 : */
654 : BlockNumber
655 9808982 : smgrnblocks(SMgrRelation reln, ForkNumber forknum)
656 : {
657 : BlockNumber result;
658 :
659 : /* Check and return if we get the cached value for the number of blocks. */
660 9808982 : result = smgrnblocks_cached(reln, forknum);
661 9808982 : if (result != InvalidBlockNumber)
662 6327832 : return result;
663 :
664 3481150 : result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
665 :
666 3481112 : reln->smgr_cached_nblocks[forknum] = result;
667 :
668 3481112 : return result;
669 : }
670 :
671 : /*
672 : * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
673 : * relation.
674 : *
675 : * Returns an InvalidBlockNumber when not in recovery and when the relation
676 : * fork size is not cached.
677 : */
678 : BlockNumber
679 9846182 : smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
680 : {
681 : /*
682 : * For now, we only use cached values in recovery due to lack of a shared
683 : * invalidation mechanism for changes in file size.
684 : */
685 9846182 : if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
686 6332270 : return reln->smgr_cached_nblocks[forknum];
687 :
688 3513912 : return InvalidBlockNumber;
689 : }
690 :
691 : /*
692 : * smgrtruncate() -- Truncate the given forks of supplied relation to
693 : * each specified numbers of blocks
694 : *
695 : * The truncation is done immediately, so this can't be rolled back.
696 : *
697 : * The caller must hold AccessExclusiveLock on the relation, to ensure that
698 : * other backends receive the smgr invalidation event that this function sends
699 : * before they access any forks of the relation again.
700 : */
701 : void
702 1122 : smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
703 : {
704 : int i;
705 :
706 : /*
707 : * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
708 : * just drop them without bothering to write the contents.
709 : */
710 1122 : DropRelationBuffers(reln, forknum, nforks, nblocks);
711 :
712 : /*
713 : * Send a shared-inval message to force other backends to close any smgr
714 : * references they may have for this rel. This is useful because they
715 : * might have open file pointers to segments that got removed, and/or
716 : * smgr_targblock variables pointing past the new rel end. (The inval
717 : * message will come back to our backend, too, causing a
718 : * probably-unnecessary local smgr flush. But we don't expect that this
719 : * is a performance-critical path.) As in the unlink code, we want to be
720 : * sure the message is sent before we start changing things on-disk.
721 : */
722 1122 : CacheInvalidateSmgr(reln->smgr_rlocator);
723 :
724 : /* Do the truncation */
725 2656 : for (i = 0; i < nforks; i++)
726 : {
727 : /* Make the cached size is invalid if we encounter an error. */
728 1534 : reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
729 :
730 1534 : smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
731 :
732 : /*
733 : * We might as well update the local smgr_cached_nblocks values. The
734 : * smgr cache inval message that this function sent will cause other
735 : * backends to invalidate their copies of smgr_fsm_nblocks and
736 : * smgr_vm_nblocks, and these ones too at the next command boundary.
737 : * But these ensure they aren't outright wrong until then.
738 : */
739 1534 : reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
740 : }
741 1122 : }
742 :
743 : /*
744 : * smgrregistersync() -- Request a relation to be sync'd at next checkpoint
745 : *
746 : * This can be used after calling smgrwrite() or smgrextend() with skipFsync =
747 : * true, to register the fsyncs that were skipped earlier.
748 : *
749 : * Note: be mindful that a checkpoint could already have happened between the
750 : * smgrwrite or smgrextend calls and this! In that case, the checkpoint
751 : * already missed fsyncing this relation, and you should use smgrimmedsync
752 : * instead. Most callers should use the bulk loading facility in bulk_write.c
753 : * which handles all that.
754 : */
755 : void
756 43874 : smgrregistersync(SMgrRelation reln, ForkNumber forknum)
757 : {
758 43874 : smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
759 43874 : }
760 :
761 : /*
762 : * smgrimmedsync() -- Force the specified relation to stable storage.
763 : *
764 : * Synchronously force all previous writes to the specified relation
765 : * down to disk.
766 : *
767 : * This is useful for building completely new relations (eg, new
768 : * indexes). Instead of incrementally WAL-logging the index build
769 : * steps, we can just write completed index pages to disk with smgrwrite
770 : * or smgrextend, and then fsync the completed index file before
771 : * committing the transaction. (This is sufficient for purposes of
772 : * crash recovery, since it effectively duplicates forcing a checkpoint
773 : * for the completed index. But it is *not* sufficient if one wishes
774 : * to use the WAL log for PITR or replication purposes: in that case
775 : * we have to make WAL entries as well.)
776 : *
777 : * The preceding writes should specify skipFsync = true to avoid
778 : * duplicative fsyncs.
779 : *
780 : * Note that you need to do FlushRelationBuffers() first if there is
781 : * any possibility that there are dirty buffers for the relation;
782 : * otherwise the sync is not very meaningful.
783 : *
784 : * Most callers should use the bulk loading facility in bulk_write.c
785 : * instead of calling this directly.
786 : */
787 : void
788 0 : smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
789 : {
790 0 : smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
791 0 : }
792 :
793 : /*
794 : * AtEOXact_SMgr
795 : *
796 : * This routine is called during transaction commit or abort (it doesn't
797 : * particularly care which). All unpinned SMgrRelation objects are destroyed.
798 : *
799 : * We do this as a compromise between wanting transient SMgrRelations to
800 : * live awhile (to amortize the costs of blind writes of multiple blocks)
801 : * and needing them to not live forever (since we're probably holding open
802 : * a kernel file descriptor for the underlying file, and we need to ensure
803 : * that gets closed reasonably soon if the file gets deleted).
804 : */
805 : void
806 565112 : AtEOXact_SMgr(void)
807 : {
808 565112 : smgrdestroyall();
809 565112 : }
810 :
811 : /*
812 : * This routine is called when we are ordered to release all open files by a
813 : * ProcSignalBarrier.
814 : */
815 : bool
816 572 : ProcessBarrierSmgrRelease(void)
817 : {
818 572 : smgrreleaseall();
819 572 : return true;
820 : }
|