Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * autoprewarm.c
4 : * Periodically dump information about the blocks present in
5 : * shared_buffers, and reload them on server restart.
6 : *
7 : * Due to locking considerations, we can't actually begin prewarming
8 : * until the server reaches a consistent state. We need the catalogs
9 : * to be consistent so that we can figure out which relation to lock,
10 : * and we need to lock the relations so that we don't try to prewarm
11 : * pages from a relation that is in the process of being dropped.
12 : *
13 : * While prewarming, autoprewarm will use two workers. There's a
14 : * leader worker that reads and sorts the list of blocks to be
15 : * prewarmed and then launches a per-database worker for each
16 : * relevant database in turn. The former keeps running after the
17 : * initial prewarm is complete to update the dump file periodically.
18 : *
19 : * Copyright (c) 2016-2025, PostgreSQL Global Development Group
20 : *
21 : * IDENTIFICATION
22 : * contrib/pg_prewarm/autoprewarm.c
23 : *
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include <unistd.h>
30 :
31 : #include "access/relation.h"
32 : #include "access/xact.h"
33 : #include "pgstat.h"
34 : #include "postmaster/bgworker.h"
35 : #include "postmaster/interrupt.h"
36 : #include "storage/buf_internals.h"
37 : #include "storage/dsm.h"
38 : #include "storage/dsm_registry.h"
39 : #include "storage/fd.h"
40 : #include "storage/ipc.h"
41 : #include "storage/latch.h"
42 : #include "storage/lwlock.h"
43 : #include "storage/procsignal.h"
44 : #include "storage/smgr.h"
45 : #include "tcop/tcopprot.h"
46 : #include "utils/guc.h"
47 : #include "utils/rel.h"
48 : #include "utils/relfilenumbermap.h"
49 : #include "utils/timestamp.h"
50 :
51 : #define AUTOPREWARM_FILE "autoprewarm.blocks"
52 :
53 : /* Metadata for each block we dump. */
54 : typedef struct BlockInfoRecord
55 : {
56 : Oid database;
57 : Oid tablespace;
58 : RelFileNumber filenumber;
59 : ForkNumber forknum;
60 : BlockNumber blocknum;
61 : } BlockInfoRecord;
62 :
63 : /* Shared state information for autoprewarm bgworker. */
64 : typedef struct AutoPrewarmSharedState
65 : {
66 : LWLock lock; /* mutual exclusion */
67 : pid_t bgworker_pid; /* for main bgworker */
68 : pid_t pid_using_dumpfile; /* for autoprewarm or block dump */
69 :
70 : /* Following items are for communication with per-database worker */
71 : dsm_handle block_info_handle;
72 : Oid database;
73 : int prewarm_start_idx;
74 : int prewarm_stop_idx;
75 : int prewarmed_blocks;
76 : } AutoPrewarmSharedState;
77 :
78 : PGDLLEXPORT void autoprewarm_main(Datum main_arg);
79 : PGDLLEXPORT void autoprewarm_database_main(Datum main_arg);
80 :
81 4 : PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
82 6 : PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
83 :
84 : static void apw_load_buffers(void);
85 : static int apw_dump_now(bool is_bgworker, bool dump_unlogged);
86 : static void apw_start_leader_worker(void);
87 : static void apw_start_database_worker(void);
88 : static bool apw_init_shmem(void);
89 : static void apw_detach_shmem(int code, Datum arg);
90 : static int apw_compare_blockinfo(const void *p, const void *q);
91 :
92 : /* Pointer to shared-memory state. */
93 : static AutoPrewarmSharedState *apw_state = NULL;
94 :
95 : /* GUC variables. */
96 : static bool autoprewarm = true; /* start worker? */
97 : static int autoprewarm_interval = 300; /* dump interval */
98 :
99 : /*
100 : * Module load callback.
101 : */
102 : void
103 10 : _PG_init(void)
104 : {
105 10 : DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
106 : "Sets the interval between dumps of shared buffers",
107 : "If set to zero, time-based dumping is disabled.",
108 : &autoprewarm_interval,
109 : 300,
110 : 0, INT_MAX / 1000,
111 : PGC_SIGHUP,
112 : GUC_UNIT_S,
113 : NULL,
114 : NULL,
115 : NULL);
116 :
117 10 : if (!process_shared_preload_libraries_in_progress)
118 6 : return;
119 :
120 : /* can't define PGC_POSTMASTER variable after startup */
121 4 : DefineCustomBoolVariable("pg_prewarm.autoprewarm",
122 : "Starts the autoprewarm worker.",
123 : NULL,
124 : &autoprewarm,
125 : true,
126 : PGC_POSTMASTER,
127 : 0,
128 : NULL,
129 : NULL,
130 : NULL);
131 :
132 4 : MarkGUCPrefixReserved("pg_prewarm");
133 :
134 : /* Register autoprewarm worker, if enabled. */
135 4 : if (autoprewarm)
136 4 : apw_start_leader_worker();
137 : }
138 :
139 : /*
140 : * Main entry point for the leader autoprewarm process. Per-database workers
141 : * have a separate entry point.
142 : */
143 : void
144 4 : autoprewarm_main(Datum main_arg)
145 : {
146 4 : bool first_time = true;
147 4 : bool final_dump_allowed = true;
148 4 : TimestampTz last_dump_time = 0;
149 :
150 : /* Establish signal handlers; once that's done, unblock signals. */
151 4 : pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
152 4 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
153 4 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
154 4 : BackgroundWorkerUnblockSignals();
155 :
156 : /* Create (if necessary) and attach to our shared memory area. */
157 4 : if (apw_init_shmem())
158 0 : first_time = false;
159 :
160 : /*
161 : * Set on-detach hook so that our PID will be cleared on exit.
162 : *
163 : * NB: Autoprewarm's state is stored in a DSM segment, and DSM segments
164 : * are detached before calling the on_shmem_exit callbacks, so we must put
165 : * apw_detach_shmem in the before_shmem_exit callback list.
166 : */
167 4 : before_shmem_exit(apw_detach_shmem, 0);
168 :
169 : /*
170 : * Store our PID in the shared memory area --- unless there's already
171 : * another worker running, in which case just exit.
172 : */
173 4 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
174 4 : if (apw_state->bgworker_pid != InvalidPid)
175 : {
176 0 : LWLockRelease(&apw_state->lock);
177 0 : ereport(LOG,
178 : (errmsg("autoprewarm worker is already running under PID %d",
179 : (int) apw_state->bgworker_pid)));
180 0 : return;
181 : }
182 4 : apw_state->bgworker_pid = MyProcPid;
183 4 : LWLockRelease(&apw_state->lock);
184 :
185 : /*
186 : * Preload buffers from the dump file only if we just created the shared
187 : * memory region. Otherwise, it's either already been done or shouldn't
188 : * be done - e.g. because the old dump file has been overwritten since the
189 : * server was started.
190 : *
191 : * There's not much point in performing a dump immediately after we finish
192 : * preloading; so, if we do end up preloading, consider the last dump time
193 : * to be equal to the current time.
194 : *
195 : * If apw_load_buffers() is terminated early by a shutdown request,
196 : * prevent dumping out our state below the loop, because we'd effectively
197 : * just truncate the saved state to however much we'd managed to preload.
198 : */
199 4 : if (first_time)
200 : {
201 4 : apw_load_buffers();
202 4 : final_dump_allowed = !ShutdownRequestPending;
203 4 : last_dump_time = GetCurrentTimestamp();
204 : }
205 :
206 : /* Periodically dump buffers until terminated. */
207 10 : while (!ShutdownRequestPending)
208 : {
209 : /* In case of a SIGHUP, just reload the configuration. */
210 6 : if (ConfigReloadPending)
211 : {
212 0 : ConfigReloadPending = false;
213 0 : ProcessConfigFile(PGC_SIGHUP);
214 : }
215 :
216 6 : if (autoprewarm_interval <= 0)
217 : {
218 : /* We're only dumping at shutdown, so just wait forever. */
219 6 : (void) WaitLatch(MyLatch,
220 : WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
221 : -1L,
222 : PG_WAIT_EXTENSION);
223 : }
224 : else
225 : {
226 : TimestampTz next_dump_time;
227 : long delay_in_ms;
228 :
229 : /* Compute the next dump time. */
230 0 : next_dump_time =
231 0 : TimestampTzPlusMilliseconds(last_dump_time,
232 : autoprewarm_interval * 1000);
233 : delay_in_ms =
234 0 : TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
235 : next_dump_time);
236 :
237 : /* Perform a dump if it's time. */
238 0 : if (delay_in_ms <= 0)
239 : {
240 0 : last_dump_time = GetCurrentTimestamp();
241 0 : apw_dump_now(true, false);
242 0 : continue;
243 : }
244 :
245 : /* Sleep until the next dump time. */
246 0 : (void) WaitLatch(MyLatch,
247 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
248 : delay_in_ms,
249 : PG_WAIT_EXTENSION);
250 : }
251 :
252 : /* Reset the latch, loop. */
253 6 : ResetLatch(MyLatch);
254 : }
255 :
256 : /*
257 : * Dump one last time. We assume this is probably the result of a system
258 : * shutdown, although it's possible that we've merely been terminated.
259 : */
260 4 : if (final_dump_allowed)
261 4 : apw_dump_now(true, true);
262 : }
263 :
264 : /*
265 : * Read the dump file and launch per-database workers one at a time to
266 : * prewarm the buffers found there.
267 : */
268 : static void
269 4 : apw_load_buffers(void)
270 : {
271 4 : FILE *file = NULL;
272 : int num_elements,
273 : i;
274 : BlockInfoRecord *blkinfo;
275 : dsm_segment *seg;
276 :
277 : /*
278 : * Skip the prewarm if the dump file is in use; otherwise, prevent any
279 : * other process from writing it while we're using it.
280 : */
281 4 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
282 4 : if (apw_state->pid_using_dumpfile == InvalidPid)
283 4 : apw_state->pid_using_dumpfile = MyProcPid;
284 : else
285 : {
286 0 : LWLockRelease(&apw_state->lock);
287 0 : ereport(LOG,
288 : (errmsg("skipping prewarm because block dump file is being written by PID %d",
289 : (int) apw_state->pid_using_dumpfile)));
290 2 : return;
291 : }
292 4 : LWLockRelease(&apw_state->lock);
293 :
294 : /*
295 : * Open the block dump file. Exit quietly if it doesn't exist, but report
296 : * any other error.
297 : */
298 4 : file = AllocateFile(AUTOPREWARM_FILE, "r");
299 4 : if (!file)
300 : {
301 2 : if (errno == ENOENT)
302 : {
303 2 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
304 2 : apw_state->pid_using_dumpfile = InvalidPid;
305 2 : LWLockRelease(&apw_state->lock);
306 2 : return; /* No file to load. */
307 : }
308 0 : ereport(ERROR,
309 : (errcode_for_file_access(),
310 : errmsg("could not read file \"%s\": %m",
311 : AUTOPREWARM_FILE)));
312 : }
313 :
314 : /* First line of the file is a record count. */
315 2 : if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
316 0 : ereport(ERROR,
317 : (errcode_for_file_access(),
318 : errmsg("could not read from file \"%s\": %m",
319 : AUTOPREWARM_FILE)));
320 :
321 : /* Allocate a dynamic shared memory segment to store the record data. */
322 2 : seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
323 2 : blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
324 :
325 : /* Read records, one per line. */
326 398 : for (i = 0; i < num_elements; i++)
327 : {
328 : unsigned forknum;
329 :
330 396 : if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
331 396 : &blkinfo[i].tablespace, &blkinfo[i].filenumber,
332 396 : &forknum, &blkinfo[i].blocknum) != 5)
333 0 : ereport(ERROR,
334 : (errmsg("autoprewarm block dump file is corrupted at line %d",
335 : i + 1)));
336 396 : blkinfo[i].forknum = forknum;
337 : }
338 :
339 2 : FreeFile(file);
340 :
341 : /* Sort the blocks to be loaded. */
342 2 : qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
343 : apw_compare_blockinfo);
344 :
345 : /* Populate shared memory state. */
346 2 : apw_state->block_info_handle = dsm_segment_handle(seg);
347 2 : apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
348 2 : apw_state->prewarmed_blocks = 0;
349 :
350 : /* Get the info position of the first block of the next database. */
351 4 : while (apw_state->prewarm_start_idx < num_elements)
352 : {
353 2 : int j = apw_state->prewarm_start_idx;
354 2 : Oid current_db = blkinfo[j].database;
355 :
356 : /*
357 : * Advance the prewarm_stop_idx to the first BlockInfoRecord that does
358 : * not belong to this database.
359 : */
360 2 : j++;
361 396 : while (j < num_elements)
362 : {
363 394 : if (current_db != blkinfo[j].database)
364 : {
365 : /*
366 : * Combine BlockInfoRecords for global objects with those of
367 : * the database.
368 : */
369 2 : if (current_db != InvalidOid)
370 0 : break;
371 2 : current_db = blkinfo[j].database;
372 : }
373 :
374 394 : j++;
375 : }
376 :
377 : /*
378 : * If we reach this point with current_db == InvalidOid, then only
379 : * BlockInfoRecords belonging to global objects exist. We can't
380 : * prewarm without a database connection, so just bail out.
381 : */
382 2 : if (current_db == InvalidOid)
383 0 : break;
384 :
385 : /* Configure stop point and database for next per-database worker. */
386 2 : apw_state->prewarm_stop_idx = j;
387 2 : apw_state->database = current_db;
388 : Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
389 :
390 : /* If we've run out of free buffers, don't launch another worker. */
391 2 : if (!have_free_buffer())
392 0 : break;
393 :
394 : /*
395 : * Likewise, don't launch if we've already been told to shut down.
396 : * (The launch would fail anyway, but we might as well skip it.)
397 : */
398 2 : if (ShutdownRequestPending)
399 0 : break;
400 :
401 : /*
402 : * Start a per-database worker to load blocks for this database; this
403 : * function will return once the per-database worker exits.
404 : */
405 2 : apw_start_database_worker();
406 :
407 : /* Prepare for next database. */
408 2 : apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
409 : }
410 :
411 : /* Clean up. */
412 2 : dsm_detach(seg);
413 2 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
414 2 : apw_state->block_info_handle = DSM_HANDLE_INVALID;
415 2 : apw_state->pid_using_dumpfile = InvalidPid;
416 2 : LWLockRelease(&apw_state->lock);
417 :
418 : /* Report our success, if we were able to finish. */
419 2 : if (!ShutdownRequestPending)
420 2 : ereport(LOG,
421 : (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
422 : apw_state->prewarmed_blocks, num_elements)));
423 : }
424 :
425 : /*
426 : * Prewarm all blocks for one database (and possibly also global objects, if
427 : * those got grouped with this database).
428 : */
429 : void
430 2 : autoprewarm_database_main(Datum main_arg)
431 : {
432 : int pos;
433 : BlockInfoRecord *block_info;
434 2 : Relation rel = NULL;
435 2 : BlockNumber nblocks = 0;
436 2 : BlockInfoRecord *old_blk = NULL;
437 : dsm_segment *seg;
438 :
439 : /* Establish signal handlers; once that's done, unblock signals. */
440 2 : pqsignal(SIGTERM, die);
441 2 : BackgroundWorkerUnblockSignals();
442 :
443 : /* Connect to correct database and get block information. */
444 2 : apw_init_shmem();
445 2 : seg = dsm_attach(apw_state->block_info_handle);
446 2 : if (seg == NULL)
447 0 : ereport(ERROR,
448 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
449 : errmsg("could not map dynamic shared memory segment")));
450 2 : BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
451 2 : block_info = (BlockInfoRecord *) dsm_segment_address(seg);
452 2 : pos = apw_state->prewarm_start_idx;
453 :
454 : /*
455 : * Loop until we run out of blocks to prewarm or until we run out of free
456 : * buffers.
457 : */
458 398 : while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
459 : {
460 396 : BlockInfoRecord *blk = &block_info[pos++];
461 : Buffer buf;
462 :
463 396 : CHECK_FOR_INTERRUPTS();
464 :
465 : /*
466 : * Quit if we've reached records for another database. If previous
467 : * blocks are of some global objects, then continue pre-warming.
468 : */
469 396 : if (old_blk != NULL && old_blk->database != blk->database &&
470 2 : old_blk->database != 0)
471 0 : break;
472 :
473 : /*
474 : * As soon as we encounter a block of a new relation, close the old
475 : * relation. Note that rel will be NULL if try_relation_open failed
476 : * previously; in that case, there is nothing to close.
477 : */
478 396 : if (old_blk != NULL && old_blk->filenumber != blk->filenumber &&
479 : rel != NULL)
480 : {
481 98 : relation_close(rel, AccessShareLock);
482 98 : rel = NULL;
483 98 : CommitTransactionCommand();
484 : }
485 :
486 : /*
487 : * Try to open each new relation, but only once, when we first
488 : * encounter it. If it's been dropped, skip the associated blocks.
489 : */
490 396 : if (old_blk == NULL || old_blk->filenumber != blk->filenumber)
491 : {
492 : Oid reloid;
493 :
494 : Assert(rel == NULL);
495 100 : StartTransactionCommand();
496 100 : reloid = RelidByRelfilenumber(blk->tablespace, blk->filenumber);
497 100 : if (OidIsValid(reloid))
498 100 : rel = try_relation_open(reloid, AccessShareLock);
499 :
500 100 : if (!rel)
501 0 : CommitTransactionCommand();
502 : }
503 396 : if (!rel)
504 : {
505 0 : old_blk = blk;
506 0 : continue;
507 : }
508 :
509 : /* Once per fork, check for fork existence and size. */
510 396 : if (old_blk == NULL ||
511 394 : old_blk->filenumber != blk->filenumber ||
512 296 : old_blk->forknum != blk->forknum)
513 : {
514 : /*
515 : * smgrexists is not safe for illegal forknum, hence check whether
516 : * the passed forknum is valid before using it in smgrexists.
517 : */
518 128 : if (blk->forknum > InvalidForkNumber &&
519 256 : blk->forknum <= MAX_FORKNUM &&
520 128 : smgrexists(RelationGetSmgr(rel), blk->forknum))
521 128 : nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
522 : else
523 0 : nblocks = 0;
524 : }
525 :
526 : /* Check whether blocknum is valid and within fork file size. */
527 396 : if (blk->blocknum >= nblocks)
528 : {
529 : /* Move to next forknum. */
530 0 : old_blk = blk;
531 0 : continue;
532 : }
533 :
534 : /* Prewarm buffer. */
535 396 : buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
536 : NULL);
537 396 : if (BufferIsValid(buf))
538 : {
539 396 : apw_state->prewarmed_blocks++;
540 396 : ReleaseBuffer(buf);
541 : }
542 :
543 396 : old_blk = blk;
544 : }
545 :
546 2 : dsm_detach(seg);
547 :
548 : /* Release lock on previous relation. */
549 2 : if (rel)
550 : {
551 2 : relation_close(rel, AccessShareLock);
552 2 : CommitTransactionCommand();
553 : }
554 2 : }
555 :
556 : /*
557 : * Dump information on blocks in shared buffers. We use a text format here
558 : * so that it's easy to understand and even change the file contents if
559 : * necessary.
560 : * Returns the number of blocks dumped.
561 : */
562 : static int
563 6 : apw_dump_now(bool is_bgworker, bool dump_unlogged)
564 : {
565 : int num_blocks;
566 : int i;
567 : int ret;
568 : BlockInfoRecord *block_info_array;
569 : BufferDesc *bufHdr;
570 : FILE *file;
571 : char transient_dump_file_path[MAXPGPATH];
572 : pid_t pid;
573 :
574 6 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
575 6 : pid = apw_state->pid_using_dumpfile;
576 6 : if (apw_state->pid_using_dumpfile == InvalidPid)
577 6 : apw_state->pid_using_dumpfile = MyProcPid;
578 6 : LWLockRelease(&apw_state->lock);
579 :
580 6 : if (pid != InvalidPid)
581 : {
582 0 : if (!is_bgworker)
583 0 : ereport(ERROR,
584 : (errmsg("could not perform block dump because dump file is being used by PID %d",
585 : (int) apw_state->pid_using_dumpfile)));
586 :
587 0 : ereport(LOG,
588 : (errmsg("skipping block dump because it is already being performed by PID %d",
589 : (int) apw_state->pid_using_dumpfile)));
590 0 : return 0;
591 : }
592 :
593 : block_info_array =
594 6 : (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
595 :
596 98310 : for (num_blocks = 0, i = 0; i < NBuffers; i++)
597 : {
598 : uint32 buf_state;
599 :
600 98304 : CHECK_FOR_INTERRUPTS();
601 :
602 98304 : bufHdr = GetBufferDescriptor(i);
603 :
604 : /* Lock each buffer header before inspecting. */
605 98304 : buf_state = LockBufHdr(bufHdr);
606 :
607 : /*
608 : * Unlogged tables will be automatically truncated after a crash or
609 : * unclean shutdown. In such cases we need not prewarm them. Dump them
610 : * only if requested by caller.
611 : */
612 98304 : if (buf_state & BM_TAG_VALID &&
613 1188 : ((buf_state & BM_PERMANENT) || dump_unlogged))
614 : {
615 1188 : block_info_array[num_blocks].database = bufHdr->tag.dbOid;
616 1188 : block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid;
617 2376 : block_info_array[num_blocks].filenumber =
618 1188 : BufTagGetRelNumber(&bufHdr->tag);
619 2376 : block_info_array[num_blocks].forknum =
620 1188 : BufTagGetForkNum(&bufHdr->tag);
621 1188 : block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
622 1188 : ++num_blocks;
623 : }
624 :
625 98304 : UnlockBufHdr(bufHdr, buf_state);
626 : }
627 :
628 6 : snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
629 6 : file = AllocateFile(transient_dump_file_path, "w");
630 6 : if (!file)
631 0 : ereport(ERROR,
632 : (errcode_for_file_access(),
633 : errmsg("could not open file \"%s\": %m",
634 : transient_dump_file_path)));
635 :
636 6 : ret = fprintf(file, "<<%d>>\n", num_blocks);
637 6 : if (ret < 0)
638 : {
639 0 : int save_errno = errno;
640 :
641 0 : FreeFile(file);
642 0 : unlink(transient_dump_file_path);
643 0 : errno = save_errno;
644 0 : ereport(ERROR,
645 : (errcode_for_file_access(),
646 : errmsg("could not write to file \"%s\": %m",
647 : transient_dump_file_path)));
648 : }
649 :
650 1194 : for (i = 0; i < num_blocks; i++)
651 : {
652 1188 : CHECK_FOR_INTERRUPTS();
653 :
654 1188 : ret = fprintf(file, "%u,%u,%u,%u,%u\n",
655 1188 : block_info_array[i].database,
656 1188 : block_info_array[i].tablespace,
657 1188 : block_info_array[i].filenumber,
658 1188 : (uint32) block_info_array[i].forknum,
659 1188 : block_info_array[i].blocknum);
660 1188 : if (ret < 0)
661 : {
662 0 : int save_errno = errno;
663 :
664 0 : FreeFile(file);
665 0 : unlink(transient_dump_file_path);
666 0 : errno = save_errno;
667 0 : ereport(ERROR,
668 : (errcode_for_file_access(),
669 : errmsg("could not write to file \"%s\": %m",
670 : transient_dump_file_path)));
671 : }
672 : }
673 :
674 6 : pfree(block_info_array);
675 :
676 : /*
677 : * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
678 : * permanent.
679 : */
680 6 : ret = FreeFile(file);
681 6 : if (ret != 0)
682 : {
683 0 : int save_errno = errno;
684 :
685 0 : unlink(transient_dump_file_path);
686 0 : errno = save_errno;
687 0 : ereport(ERROR,
688 : (errcode_for_file_access(),
689 : errmsg("could not close file \"%s\": %m",
690 : transient_dump_file_path)));
691 : }
692 :
693 6 : (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
694 6 : apw_state->pid_using_dumpfile = InvalidPid;
695 :
696 6 : ereport(DEBUG1,
697 : (errmsg_internal("wrote block details for %d blocks", num_blocks)));
698 6 : return num_blocks;
699 : }
700 :
701 : /*
702 : * SQL-callable function to launch autoprewarm.
703 : */
704 : Datum
705 0 : autoprewarm_start_worker(PG_FUNCTION_ARGS)
706 : {
707 : pid_t pid;
708 :
709 0 : if (!autoprewarm)
710 0 : ereport(ERROR,
711 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
712 : errmsg("autoprewarm is disabled")));
713 :
714 0 : apw_init_shmem();
715 0 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
716 0 : pid = apw_state->bgworker_pid;
717 0 : LWLockRelease(&apw_state->lock);
718 :
719 0 : if (pid != InvalidPid)
720 0 : ereport(ERROR,
721 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
722 : errmsg("autoprewarm worker is already running under PID %d",
723 : (int) pid)));
724 :
725 0 : apw_start_leader_worker();
726 :
727 0 : PG_RETURN_VOID();
728 : }
729 :
730 : /*
731 : * SQL-callable function to perform an immediate block dump.
732 : *
733 : * Note: this is declared to return int8, as insurance against some
734 : * very distant day when we might make NBuffers wider than int.
735 : */
736 : Datum
737 2 : autoprewarm_dump_now(PG_FUNCTION_ARGS)
738 : {
739 : int num_blocks;
740 :
741 2 : apw_init_shmem();
742 :
743 2 : PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
744 : {
745 2 : num_blocks = apw_dump_now(false, true);
746 : }
747 2 : PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
748 :
749 2 : PG_RETURN_INT64((int64) num_blocks);
750 : }
751 :
752 : static void
753 4 : apw_init_state(void *ptr)
754 : {
755 4 : AutoPrewarmSharedState *state = (AutoPrewarmSharedState *) ptr;
756 :
757 4 : LWLockInitialize(&state->lock, LWLockNewTrancheId());
758 4 : state->bgworker_pid = InvalidPid;
759 4 : state->pid_using_dumpfile = InvalidPid;
760 4 : }
761 :
762 : /*
763 : * Allocate and initialize autoprewarm related shared memory, if not already
764 : * done, and set up backend-local pointer to that state. Returns true if an
765 : * existing shared memory segment was found.
766 : */
767 : static bool
768 8 : apw_init_shmem(void)
769 : {
770 : bool found;
771 :
772 8 : apw_state = GetNamedDSMSegment("autoprewarm",
773 : sizeof(AutoPrewarmSharedState),
774 : apw_init_state,
775 : &found);
776 8 : LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
777 :
778 8 : return found;
779 : }
780 :
781 : /*
782 : * Clear our PID from autoprewarm shared state.
783 : */
784 : static void
785 4 : apw_detach_shmem(int code, Datum arg)
786 : {
787 4 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
788 4 : if (apw_state->pid_using_dumpfile == MyProcPid)
789 0 : apw_state->pid_using_dumpfile = InvalidPid;
790 4 : if (apw_state->bgworker_pid == MyProcPid)
791 4 : apw_state->bgworker_pid = InvalidPid;
792 4 : LWLockRelease(&apw_state->lock);
793 4 : }
794 :
795 : /*
796 : * Start autoprewarm leader worker process.
797 : */
798 : static void
799 4 : apw_start_leader_worker(void)
800 : {
801 : BackgroundWorker worker;
802 : BackgroundWorkerHandle *handle;
803 : BgwHandleStatus status;
804 : pid_t pid;
805 :
806 4 : MemSet(&worker, 0, sizeof(BackgroundWorker));
807 4 : worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
808 4 : worker.bgw_start_time = BgWorkerStart_ConsistentState;
809 4 : strcpy(worker.bgw_library_name, "pg_prewarm");
810 4 : strcpy(worker.bgw_function_name, "autoprewarm_main");
811 4 : strcpy(worker.bgw_name, "autoprewarm leader");
812 4 : strcpy(worker.bgw_type, "autoprewarm leader");
813 :
814 4 : if (process_shared_preload_libraries_in_progress)
815 : {
816 4 : RegisterBackgroundWorker(&worker);
817 4 : return;
818 : }
819 :
820 : /* must set notify PID to wait for startup */
821 0 : worker.bgw_notify_pid = MyProcPid;
822 :
823 0 : if (!RegisterDynamicBackgroundWorker(&worker, &handle))
824 0 : ereport(ERROR,
825 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
826 : errmsg("could not register background process"),
827 : errhint("You may need to increase \"max_worker_processes\".")));
828 :
829 0 : status = WaitForBackgroundWorkerStartup(handle, &pid);
830 0 : if (status != BGWH_STARTED)
831 0 : ereport(ERROR,
832 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
833 : errmsg("could not start background process"),
834 : errhint("More details may be available in the server log.")));
835 : }
836 :
837 : /*
838 : * Start autoprewarm per-database worker process.
839 : */
840 : static void
841 2 : apw_start_database_worker(void)
842 : {
843 : BackgroundWorker worker;
844 : BackgroundWorkerHandle *handle;
845 :
846 2 : MemSet(&worker, 0, sizeof(BackgroundWorker));
847 2 : worker.bgw_flags =
848 : BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
849 2 : worker.bgw_start_time = BgWorkerStart_ConsistentState;
850 2 : worker.bgw_restart_time = BGW_NEVER_RESTART;
851 2 : strcpy(worker.bgw_library_name, "pg_prewarm");
852 2 : strcpy(worker.bgw_function_name, "autoprewarm_database_main");
853 2 : strcpy(worker.bgw_name, "autoprewarm worker");
854 2 : strcpy(worker.bgw_type, "autoprewarm worker");
855 :
856 : /* must set notify PID to wait for shutdown */
857 2 : worker.bgw_notify_pid = MyProcPid;
858 :
859 2 : if (!RegisterDynamicBackgroundWorker(&worker, &handle))
860 0 : ereport(ERROR,
861 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
862 : errmsg("registering dynamic bgworker autoprewarm failed"),
863 : errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
864 :
865 : /*
866 : * Ignore return value; if it fails, postmaster has died, but we have
867 : * checks for that elsewhere.
868 : */
869 2 : WaitForBackgroundWorkerShutdown(handle);
870 2 : }
871 :
872 : /* Compare member elements to check whether they are not equal. */
873 : #define cmp_member_elem(fld) \
874 : do { \
875 : if (a->fld < b->fld) \
876 : return -1; \
877 : else if (a->fld > b->fld) \
878 : return 1; \
879 : } while(0)
880 :
881 : /*
882 : * apw_compare_blockinfo
883 : *
884 : * We depend on all records for a particular database being consecutive
885 : * in the dump file; each per-database worker will preload blocks until
886 : * it sees a block for some other database. Sorting by tablespace,
887 : * filenumber, forknum, and blocknum isn't critical for correctness, but
888 : * helps us get a sequential I/O pattern.
889 : */
890 : static int
891 3246 : apw_compare_blockinfo(const void *p, const void *q)
892 : {
893 3246 : const BlockInfoRecord *a = (const BlockInfoRecord *) p;
894 3246 : const BlockInfoRecord *b = (const BlockInfoRecord *) q;
895 :
896 3246 : cmp_member_elem(database);
897 3128 : cmp_member_elem(tablespace);
898 3128 : cmp_member_elem(filenumber);
899 1150 : cmp_member_elem(forknum);
900 990 : cmp_member_elem(blocknum);
901 :
902 0 : return 0;
903 : }
|