Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * smgr.c
4 : * public interface routines to storage manager switch.
5 : *
6 : * All file system operations in POSTGRES dispatch through these
7 : * routines.
8 : *
9 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994, Regents of the University of California
11 : *
12 : *
13 : * IDENTIFICATION
14 : * src/backend/storage/smgr/smgr.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include "access/xlogutils.h"
21 : #include "lib/ilist.h"
22 : #include "storage/bufmgr.h"
23 : #include "storage/fd.h"
24 : #include "storage/ipc.h"
25 : #include "storage/md.h"
26 : #include "storage/smgr.h"
27 : #include "utils/hsearch.h"
28 : #include "utils/inval.h"
29 :
30 :
31 : /*
32 : * This struct of function pointers defines the API between smgr.c and
33 : * any individual storage manager module. Note that smgr subfunctions are
34 : * generally expected to report problems via elog(ERROR). An exception is
35 : * that smgr_unlink should use elog(WARNING), rather than erroring out,
36 : * because we normally unlink relations during post-commit/abort cleanup,
37 : * and so it's too late to raise an error. Also, various conditions that
38 : * would normally be errors should be allowed during bootstrap and/or WAL
39 : * recovery --- see comments in md.c for details.
40 : */
41 : typedef struct f_smgr
42 : {
43 : void (*smgr_init) (void); /* may be NULL */
44 : void (*smgr_shutdown) (void); /* may be NULL */
45 : void (*smgr_open) (SMgrRelation reln);
46 : void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
47 : void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
48 : bool isRedo);
49 : bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
50 : void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
51 : bool isRedo);
52 : void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
53 : BlockNumber blocknum, const void *buffer, bool skipFsync);
54 : void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
55 : BlockNumber blocknum, int nblocks, bool skipFsync);
56 : bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
57 : BlockNumber blocknum);
58 : void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
59 : BlockNumber blocknum, void *buffer);
60 : void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
61 : BlockNumber blocknum, const void *buffer, bool skipFsync);
62 : void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
63 : BlockNumber blocknum, BlockNumber nblocks);
64 : BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
65 : void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
66 : BlockNumber nblocks);
67 : void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
68 : } f_smgr;
69 :
70 : static const f_smgr smgrsw[] = {
71 : /* magnetic disk */
72 : {
73 : .smgr_init = mdinit,
74 : .smgr_shutdown = NULL,
75 : .smgr_open = mdopen,
76 : .smgr_close = mdclose,
77 : .smgr_create = mdcreate,
78 : .smgr_exists = mdexists,
79 : .smgr_unlink = mdunlink,
80 : .smgr_extend = mdextend,
81 : .smgr_zeroextend = mdzeroextend,
82 : .smgr_prefetch = mdprefetch,
83 : .smgr_read = mdread,
84 : .smgr_write = mdwrite,
85 : .smgr_writeback = mdwriteback,
86 : .smgr_nblocks = mdnblocks,
87 : .smgr_truncate = mdtruncate,
88 : .smgr_immedsync = mdimmedsync,
89 : }
90 : };
91 :
92 : static const int NSmgr = lengthof(smgrsw);
93 :
94 : /*
95 : * Each backend has a hashtable that stores all extant SMgrRelation objects.
96 : * In addition, "unowned" SMgrRelation objects are chained together in a list.
97 : */
98 : static HTAB *SMgrRelationHash = NULL;
99 :
100 : static dlist_head unowned_relns;
101 :
102 : /* local function prototypes */
103 : static void smgrshutdown(int code, Datum arg);
104 :
105 :
106 : /*
107 : * smgrinit(), smgrshutdown() -- Initialize or shut down storage
108 : * managers.
109 : *
110 : * Note: smgrinit is called during backend startup (normal or standalone
111 : * case), *not* during postmaster start. Therefore, any resources created
112 : * here or destroyed in smgrshutdown are backend-local.
113 : */
114 : void
115 27812 : smgrinit(void)
116 : {
117 : int i;
118 :
119 55624 : for (i = 0; i < NSmgr; i++)
120 : {
121 27812 : if (smgrsw[i].smgr_init)
122 27812 : smgrsw[i].smgr_init();
123 : }
124 :
125 : /* register the shutdown proc */
126 27812 : on_proc_exit(smgrshutdown, 0);
127 27812 : }
128 :
129 : /*
130 : * on_proc_exit hook for smgr cleanup during backend shutdown
131 : */
132 : static void
133 27812 : smgrshutdown(int code, Datum arg)
134 : {
135 : int i;
136 :
137 55624 : for (i = 0; i < NSmgr; i++)
138 : {
139 27812 : if (smgrsw[i].smgr_shutdown)
140 0 : smgrsw[i].smgr_shutdown();
141 : }
142 27812 : }
143 :
144 : /*
145 : * smgropen() -- Return an SMgrRelation object, creating it if need be.
146 : *
147 : * This does not attempt to actually open the underlying file.
148 : */
149 : SMgrRelation
150 14965292 : smgropen(RelFileLocator rlocator, BackendId backend)
151 : {
152 : RelFileLocatorBackend brlocator;
153 : SMgrRelation reln;
154 : bool found;
155 :
156 14965292 : if (SMgrRelationHash == NULL)
157 : {
158 : /* First time through: initialize the hash table */
159 : HASHCTL ctl;
160 :
161 25028 : ctl.keysize = sizeof(RelFileLocatorBackend);
162 25028 : ctl.entrysize = sizeof(SMgrRelationData);
163 25028 : SMgrRelationHash = hash_create("smgr relation table", 400,
164 : &ctl, HASH_ELEM | HASH_BLOBS);
165 25028 : dlist_init(&unowned_relns);
166 : }
167 :
168 : /* Look up or create an entry */
169 14965292 : brlocator.locator = rlocator;
170 14965292 : brlocator.backend = backend;
171 14965292 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
172 : &brlocator,
173 : HASH_ENTER, &found);
174 :
175 : /* Initialize it if not present before */
176 14965292 : if (!found)
177 : {
178 : /* hash_search already filled in the lookup key */
179 1709164 : reln->smgr_owner = NULL;
180 1709164 : reln->smgr_targblock = InvalidBlockNumber;
181 8545820 : for (int i = 0; i <= MAX_FORKNUM; ++i)
182 6836656 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
183 1709164 : reln->smgr_which = 0; /* we only have md.c at present */
184 :
185 : /* implementation-specific initialization */
186 1709164 : smgrsw[reln->smgr_which].smgr_open(reln);
187 :
188 : /* it has no owner yet */
189 1709164 : dlist_push_tail(&unowned_relns, &reln->node);
190 : }
191 :
192 14965292 : return reln;
193 : }
194 :
195 : /*
196 : * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
197 : *
198 : * There can be only one owner at a time; this is sufficient since currently
199 : * the only such owners exist in the relcache.
200 : */
201 : void
202 1266934 : smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
203 : {
204 : /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
205 : Assert(owner != NULL);
206 :
207 : /*
208 : * First, unhook any old owner. (Normally there shouldn't be any, but it
209 : * seems possible that this can happen during swap_relation_files()
210 : * depending on the order of processing. It's ok to close the old
211 : * relcache entry early in that case.)
212 : *
213 : * If there isn't an old owner, then the reln should be in the unowned
214 : * list, and we need to remove it.
215 : */
216 1266934 : if (reln->smgr_owner)
217 982 : *(reln->smgr_owner) = NULL;
218 : else
219 1265952 : dlist_delete(&reln->node);
220 :
221 : /* Now establish the ownership relationship. */
222 1266934 : reln->smgr_owner = owner;
223 1266934 : *owner = reln;
224 1266934 : }
225 :
226 : /*
227 : * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
228 : * if one exists
229 : */
230 : void
231 55838 : smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
232 : {
233 : /* Do nothing if the SMgrRelation object is not owned by the owner */
234 55838 : if (reln->smgr_owner != owner)
235 0 : return;
236 :
237 : /* unset the owner's reference */
238 55838 : *owner = NULL;
239 :
240 : /* unset our reference to the owner */
241 55838 : reln->smgr_owner = NULL;
242 :
243 : /* add to list of unowned relations */
244 55838 : dlist_push_tail(&unowned_relns, &reln->node);
245 : }
246 :
247 : /*
248 : * smgrexists() -- Does the underlying file for a fork exist?
249 : */
250 : bool
251 1750390 : smgrexists(SMgrRelation reln, ForkNumber forknum)
252 : {
253 1750390 : return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
254 : }
255 :
256 : /*
257 : * smgrclose() -- Close and delete an SMgrRelation object.
258 : */
259 : void
260 778092 : smgrclose(SMgrRelation reln)
261 : {
262 : SMgrRelation *owner;
263 : ForkNumber forknum;
264 :
265 3890460 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
266 3112368 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
267 :
268 778092 : owner = reln->smgr_owner;
269 :
270 778092 : if (!owner)
271 439286 : dlist_delete(&reln->node);
272 :
273 778092 : if (hash_search(SMgrRelationHash,
274 778092 : &(reln->smgr_rlocator),
275 : HASH_REMOVE, NULL) == NULL)
276 0 : elog(ERROR, "SMgrRelation hashtable corrupted");
277 :
278 : /*
279 : * Unhook the owner pointer, if any. We do this last since in the remote
280 : * possibility of failure above, the SMgrRelation object will still exist.
281 : */
282 778092 : if (owner)
283 338806 : *owner = NULL;
284 778092 : }
285 :
286 : /*
287 : * smgrrelease() -- Release all resources used by this object.
288 : *
289 : * The object remains valid.
290 : */
291 : void
292 22386 : smgrrelease(SMgrRelation reln)
293 : {
294 111930 : for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
295 : {
296 89544 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
297 89544 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
298 : }
299 22386 : reln->smgr_targblock = InvalidBlockNumber;
300 22386 : }
301 :
302 : /*
303 : * smgrreleaseall() -- Release resources used by all objects.
304 : *
305 : * This is called for PROCSIGNAL_BARRIER_SMGRRELEASE.
306 : */
307 : void
308 534 : smgrreleaseall(void)
309 : {
310 : HASH_SEQ_STATUS status;
311 : SMgrRelation reln;
312 :
313 : /* Nothing to do if hashtable not set up */
314 534 : if (SMgrRelationHash == NULL)
315 150 : return;
316 :
317 384 : hash_seq_init(&status, SMgrRelationHash);
318 :
319 22770 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
320 22386 : smgrrelease(reln);
321 : }
322 :
323 : /*
324 : * smgrcloseall() -- Close all existing SMgrRelation objects.
325 : */
326 : void
327 5170 : smgrcloseall(void)
328 : {
329 : HASH_SEQ_STATUS status;
330 : SMgrRelation reln;
331 :
332 : /* Nothing to do if hashtable not set up */
333 5170 : if (SMgrRelationHash == NULL)
334 290 : return;
335 :
336 4880 : hash_seq_init(&status, SMgrRelationHash);
337 :
338 39802 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
339 34922 : smgrclose(reln);
340 : }
341 :
342 : /*
343 : * smgrcloserellocator() -- Close SMgrRelation object for given RelFileLocator,
344 : * if one exists.
345 : *
346 : * This has the same effects as smgrclose(smgropen(rlocator)), but it avoids
347 : * uselessly creating a hashtable entry only to drop it again when no
348 : * such entry exists already.
349 : */
350 : void
351 538038 : smgrcloserellocator(RelFileLocatorBackend rlocator)
352 : {
353 : SMgrRelation reln;
354 :
355 : /* Nothing to do if hashtable not set up */
356 538038 : if (SMgrRelationHash == NULL)
357 192 : return;
358 :
359 537846 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
360 : &rlocator,
361 : HASH_FIND, NULL);
362 537846 : if (reln != NULL)
363 184974 : smgrclose(reln);
364 : }
365 :
366 : /*
367 : * smgrcreate() -- Create a new relation.
368 : *
369 : * Given an already-created (but presumably unused) SMgrRelation,
370 : * cause the underlying disk file or other storage for the fork
371 : * to be created.
372 : */
373 : void
374 5369172 : smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
375 : {
376 5369172 : smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
377 5369172 : }
378 :
379 : /*
380 : * smgrdosyncall() -- Immediately sync all forks of all given relations
381 : *
382 : * All forks of all given relations are synced out to the store.
383 : *
384 : * This is equivalent to FlushRelationBuffers() for each smgr relation,
385 : * then calling smgrimmedsync() for all forks of each relation, but it's
386 : * significantly quicker so should be preferred when possible.
387 : */
388 : void
389 18 : smgrdosyncall(SMgrRelation *rels, int nrels)
390 : {
391 18 : int i = 0;
392 : ForkNumber forknum;
393 :
394 18 : if (nrels == 0)
395 0 : return;
396 :
397 18 : FlushRelationsAllBuffers(rels, nrels);
398 :
399 : /*
400 : * Sync the physical file(s).
401 : */
402 36 : for (i = 0; i < nrels; i++)
403 : {
404 18 : int which = rels[i]->smgr_which;
405 :
406 90 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
407 : {
408 72 : if (smgrsw[which].smgr_exists(rels[i], forknum))
409 20 : smgrsw[which].smgr_immedsync(rels[i], forknum);
410 : }
411 : }
412 : }
413 :
414 : /*
415 : * smgrdounlinkall() -- Immediately unlink all forks of all given relations
416 : *
417 : * All forks of all given relations are removed from the store. This
418 : * should not be used during transactional operations, since it can't be
419 : * undone.
420 : *
421 : * If isRedo is true, it is okay for the underlying file(s) to be gone
422 : * already.
423 : */
424 : void
425 22664 : smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
426 : {
427 22664 : int i = 0;
428 : RelFileLocatorBackend *rlocators;
429 : ForkNumber forknum;
430 :
431 22664 : if (nrels == 0)
432 722 : return;
433 :
434 : /*
435 : * Get rid of any remaining buffers for the relations. bufmgr will just
436 : * drop them without bothering to write the contents.
437 : */
438 21942 : DropRelationsAllBuffers(rels, nrels);
439 :
440 : /*
441 : * create an array which contains all relations to be dropped, and close
442 : * each relation's forks at the smgr level while at it
443 : */
444 21942 : rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
445 97460 : for (i = 0; i < nrels; i++)
446 : {
447 75518 : RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
448 75518 : int which = rels[i]->smgr_which;
449 :
450 75518 : rlocators[i] = rlocator;
451 :
452 : /* Close the forks at smgr level */
453 377590 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
454 302072 : smgrsw[which].smgr_close(rels[i], forknum);
455 : }
456 :
457 : /*
458 : * Send a shared-inval message to force other backends to close any
459 : * dangling smgr references they may have for these rels. We should do
460 : * this before starting the actual unlinking, in case we fail partway
461 : * through that step. Note that the sinval messages will eventually come
462 : * back to this backend, too, and thereby provide a backstop that we
463 : * closed our own smgr rel.
464 : */
465 97460 : for (i = 0; i < nrels; i++)
466 75518 : CacheInvalidateSmgr(rlocators[i]);
467 :
468 : /*
469 : * Delete the physical file(s).
470 : *
471 : * Note: smgr_unlink must treat deletion failure as a WARNING, not an
472 : * ERROR, because we've already decided to commit or abort the current
473 : * xact.
474 : */
475 :
476 97460 : for (i = 0; i < nrels; i++)
477 : {
478 75518 : int which = rels[i]->smgr_which;
479 :
480 377590 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
481 302072 : smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
482 : }
483 :
484 21942 : pfree(rlocators);
485 : }
486 :
487 :
488 : /*
489 : * smgrextend() -- Add a new block to a file.
490 : *
491 : * The semantics are nearly the same as smgrwrite(): write at the
492 : * specified position. However, this is to be used for the case of
493 : * extending a relation (i.e., blocknum is at or beyond the current
494 : * EOF). Note that we assume writing a block beyond current EOF
495 : * causes intervening file space to become filled with zeroes.
496 : */
497 : void
498 195260 : smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
499 : const void *buffer, bool skipFsync)
500 : {
501 195260 : smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
502 : buffer, skipFsync);
503 :
504 : /*
505 : * Normally we expect this to increase nblocks by one, but if the cached
506 : * value isn't as expected, just invalidate it so the next call asks the
507 : * kernel.
508 : */
509 195260 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
510 95000 : reln->smgr_cached_nblocks[forknum] = blocknum + 1;
511 : else
512 100260 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
513 195260 : }
514 :
515 : /*
516 : * smgrzeroextend() -- Add new zeroed out blocks to a file.
517 : *
518 : * Similar to smgrextend(), except the relation can be extended by
519 : * multiple blocks at once and the added blocks will be filled with
520 : * zeroes.
521 : */
522 : void
523 349514 : smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
524 : int nblocks, bool skipFsync)
525 : {
526 349514 : smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
527 : nblocks, skipFsync);
528 :
529 : /*
530 : * Normally we expect this to increase the fork size by nblocks, but if
531 : * the cached value isn't as expected, just invalidate it so the next call
532 : * asks the kernel.
533 : */
534 349514 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
535 349514 : reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
536 : else
537 0 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
538 349514 : }
539 :
540 : /*
541 : * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
542 : *
543 : * In recovery only, this can return false to indicate that a file
544 : * doesn't exist (presumably it has been dropped by a later WAL
545 : * record).
546 : */
547 : bool
548 247452 : smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
549 : {
550 247452 : return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
551 : }
552 :
553 : /*
554 : * smgrread() -- read a particular block from a relation into the supplied
555 : * buffer.
556 : *
557 : * This routine is called from the buffer manager in order to
558 : * instantiate pages in the shared buffer cache. All storage managers
559 : * return pages in the format that POSTGRES expects.
560 : */
561 : void
562 2028038 : smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
563 : void *buffer)
564 : {
565 2028038 : smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer);
566 2028008 : }
567 :
568 : /*
569 : * smgrwrite() -- Write the supplied buffer out.
570 : *
571 : * This is to be used only for updating already-existing blocks of a
572 : * relation (ie, those before the current EOF). To extend a relation,
573 : * use smgrextend().
574 : *
575 : * This is not a synchronous write -- the block is not necessarily
576 : * on disk at return, only dumped out to the kernel. However,
577 : * provisions will be made to fsync the write before the next checkpoint.
578 : *
579 : * skipFsync indicates that the caller will make other provisions to
580 : * fsync the relation, so we needn't bother. Temporary relations also
581 : * do not require fsync.
582 : */
583 : void
584 847690 : smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
585 : const void *buffer, bool skipFsync)
586 : {
587 847690 : smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum,
588 : buffer, skipFsync);
589 847690 : }
590 :
591 :
592 : /*
593 : * smgrwriteback() -- Trigger kernel writeback for the supplied range of
594 : * blocks.
595 : */
596 : void
597 125726 : smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
598 : BlockNumber nblocks)
599 : {
600 125726 : smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
601 : nblocks);
602 125726 : }
603 :
604 : /*
605 : * smgrnblocks() -- Calculate the number of blocks in the
606 : * supplied relation.
607 : */
608 : BlockNumber
609 9425202 : smgrnblocks(SMgrRelation reln, ForkNumber forknum)
610 : {
611 : BlockNumber result;
612 :
613 : /* Check and return if we get the cached value for the number of blocks. */
614 9425202 : result = smgrnblocks_cached(reln, forknum);
615 9425202 : if (result != InvalidBlockNumber)
616 6206646 : return result;
617 :
618 3218556 : result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
619 :
620 3218518 : reln->smgr_cached_nblocks[forknum] = result;
621 :
622 3218518 : return result;
623 : }
624 :
625 : /*
626 : * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
627 : * relation.
628 : *
629 : * Returns an InvalidBlockNumber when not in recovery and when the relation
630 : * fork size is not cached.
631 : */
632 : BlockNumber
633 9459344 : smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
634 : {
635 : /*
636 : * For now, we only use cached values in recovery due to lack of a shared
637 : * invalidation mechanism for changes in file size.
638 : */
639 9459344 : if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
640 6210746 : return reln->smgr_cached_nblocks[forknum];
641 :
642 3248598 : return InvalidBlockNumber;
643 : }
644 :
645 : /*
646 : * smgrtruncate() -- Truncate the given forks of supplied relation to
647 : * each specified numbers of blocks
648 : *
649 : * The truncation is done immediately, so this can't be rolled back.
650 : *
651 : * The caller must hold AccessExclusiveLock on the relation, to ensure that
652 : * other backends receive the smgr invalidation event that this function sends
653 : * before they access any forks of the relation again.
654 : */
655 : void
656 1164 : smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
657 : {
658 : int i;
659 :
660 : /*
661 : * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
662 : * just drop them without bothering to write the contents.
663 : */
664 1164 : DropRelationBuffers(reln, forknum, nforks, nblocks);
665 :
666 : /*
667 : * Send a shared-inval message to force other backends to close any smgr
668 : * references they may have for this rel. This is useful because they
669 : * might have open file pointers to segments that got removed, and/or
670 : * smgr_targblock variables pointing past the new rel end. (The inval
671 : * message will come back to our backend, too, causing a
672 : * probably-unnecessary local smgr flush. But we don't expect that this
673 : * is a performance-critical path.) As in the unlink code, we want to be
674 : * sure the message is sent before we start changing things on-disk.
675 : */
676 1164 : CacheInvalidateSmgr(reln->smgr_rlocator);
677 :
678 : /* Do the truncation */
679 2776 : for (i = 0; i < nforks; i++)
680 : {
681 : /* Make the cached size is invalid if we encounter an error. */
682 1612 : reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
683 :
684 1612 : smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
685 :
686 : /*
687 : * We might as well update the local smgr_cached_nblocks values. The
688 : * smgr cache inval message that this function sent will cause other
689 : * backends to invalidate their copies of smgr_fsm_nblocks and
690 : * smgr_vm_nblocks, and these ones too at the next command boundary.
691 : * But these ensure they aren't outright wrong until then.
692 : */
693 1612 : reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
694 : }
695 1164 : }
696 :
697 : /*
698 : * smgrimmedsync() -- Force the specified relation to stable storage.
699 : *
700 : * Synchronously force all previous writes to the specified relation
701 : * down to disk.
702 : *
703 : * This is useful for building completely new relations (eg, new
704 : * indexes). Instead of incrementally WAL-logging the index build
705 : * steps, we can just write completed index pages to disk with smgrwrite
706 : * or smgrextend, and then fsync the completed index file before
707 : * committing the transaction. (This is sufficient for purposes of
708 : * crash recovery, since it effectively duplicates forcing a checkpoint
709 : * for the completed index. But it is *not* sufficient if one wishes
710 : * to use the WAL log for PITR or replication purposes: in that case
711 : * we have to make WAL entries as well.)
712 : *
713 : * The preceding writes should specify skipFsync = true to avoid
714 : * duplicative fsyncs.
715 : *
716 : * Note that you need to do FlushRelationBuffers() first if there is
717 : * any possibility that there are dirty buffers for the relation;
718 : * otherwise the sync is not very meaningful.
719 : */
720 : void
721 29076 : smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
722 : {
723 29076 : smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
724 29076 : }
725 :
726 : /*
727 : * AtEOXact_SMgr
728 : *
729 : * This routine is called during transaction commit or abort (it doesn't
730 : * particularly care which). All transient SMgrRelation objects are closed.
731 : *
732 : * We do this as a compromise between wanting transient SMgrRelations to
733 : * live awhile (to amortize the costs of blind writes of multiple blocks)
734 : * and needing them to not live forever (since we're probably holding open
735 : * a kernel file descriptor for the underlying file, and we need to ensure
736 : * that gets closed reasonably soon if the file gets deleted).
737 : */
738 : void
739 515564 : AtEOXact_SMgr(void)
740 : {
741 : dlist_mutable_iter iter;
742 :
743 : /*
744 : * Zap all unowned SMgrRelations. We rely on smgrclose() to remove each
745 : * one from the list.
746 : */
747 617338 : dlist_foreach_modify(iter, &unowned_relns)
748 : {
749 101774 : SMgrRelation rel = dlist_container(SMgrRelationData, node,
750 : iter.cur);
751 :
752 : Assert(rel->smgr_owner == NULL);
753 :
754 101774 : smgrclose(rel);
755 : }
756 515564 : }
757 :
758 : /*
759 : * This routine is called when we are ordered to release all open files by a
760 : * ProcSignalBarrier.
761 : */
762 : bool
763 534 : ProcessBarrierSmgrRelease(void)
764 : {
765 534 : smgrreleaseall();
766 534 : return true;
767 : }
|