LCOV - code coverage report
Current view: top level - contrib/pg_prewarm - autoprewarm.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 230 300 76.7 %
Date: 2025-01-18 04:15:08 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * autoprewarm.c
       4             :  *      Periodically dump information about the blocks present in
       5             :  *      shared_buffers, and reload them on server restart.
       6             :  *
       7             :  *      Due to locking considerations, we can't actually begin prewarming
       8             :  *      until the server reaches a consistent state.  We need the catalogs
       9             :  *      to be consistent so that we can figure out which relation to lock,
      10             :  *      and we need to lock the relations so that we don't try to prewarm
      11             :  *      pages from a relation that is in the process of being dropped.
      12             :  *
      13             :  *      While prewarming, autoprewarm will use two workers.  There's a
      14             :  *      leader worker that reads and sorts the list of blocks to be
      15             :  *      prewarmed and then launches a per-database worker for each
      16             :  *      relevant database in turn.  The former keeps running after the
      17             :  *      initial prewarm is complete to update the dump file periodically.
      18             :  *
      19             :  *  Copyright (c) 2016-2025, PostgreSQL Global Development Group
      20             :  *
      21             :  *  IDENTIFICATION
      22             :  *      contrib/pg_prewarm/autoprewarm.c
      23             :  *
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include <unistd.h>
      30             : 
      31             : #include "access/relation.h"
      32             : #include "access/xact.h"
      33             : #include "pgstat.h"
      34             : #include "postmaster/bgworker.h"
      35             : #include "postmaster/interrupt.h"
      36             : #include "storage/buf_internals.h"
      37             : #include "storage/dsm.h"
      38             : #include "storage/dsm_registry.h"
      39             : #include "storage/fd.h"
      40             : #include "storage/ipc.h"
      41             : #include "storage/latch.h"
      42             : #include "storage/lwlock.h"
      43             : #include "storage/procsignal.h"
      44             : #include "storage/smgr.h"
      45             : #include "tcop/tcopprot.h"
      46             : #include "utils/guc.h"
      47             : #include "utils/rel.h"
      48             : #include "utils/relfilenumbermap.h"
      49             : #include "utils/timestamp.h"
      50             : 
      51             : #define AUTOPREWARM_FILE "autoprewarm.blocks"
      52             : 
      53             : /* Metadata for each block we dump. */
      54             : typedef struct BlockInfoRecord
      55             : {
      56             :     Oid         database;
      57             :     Oid         tablespace;
      58             :     RelFileNumber filenumber;
      59             :     ForkNumber  forknum;
      60             :     BlockNumber blocknum;
      61             : } BlockInfoRecord;
      62             : 
      63             : /* Shared state information for autoprewarm bgworker. */
      64             : typedef struct AutoPrewarmSharedState
      65             : {
      66             :     LWLock      lock;           /* mutual exclusion */
      67             :     pid_t       bgworker_pid;   /* for main bgworker */
      68             :     pid_t       pid_using_dumpfile; /* for autoprewarm or block dump */
      69             : 
      70             :     /* Following items are for communication with per-database worker */
      71             :     dsm_handle  block_info_handle;
      72             :     Oid         database;
      73             :     int         prewarm_start_idx;
      74             :     int         prewarm_stop_idx;
      75             :     int         prewarmed_blocks;
      76             : } AutoPrewarmSharedState;
      77             : 
      78             : PGDLLEXPORT void autoprewarm_main(Datum main_arg);
      79             : PGDLLEXPORT void autoprewarm_database_main(Datum main_arg);
      80             : 
      81           4 : PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
      82           6 : PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
      83             : 
      84             : static void apw_load_buffers(void);
      85             : static int  apw_dump_now(bool is_bgworker, bool dump_unlogged);
      86             : static void apw_start_leader_worker(void);
      87             : static void apw_start_database_worker(void);
      88             : static bool apw_init_shmem(void);
      89             : static void apw_detach_shmem(int code, Datum arg);
      90             : static int  apw_compare_blockinfo(const void *p, const void *q);
      91             : 
      92             : /* Pointer to shared-memory state. */
      93             : static AutoPrewarmSharedState *apw_state = NULL;
      94             : 
      95             : /* GUC variables. */
      96             : static bool autoprewarm = true; /* start worker? */
      97             : static int  autoprewarm_interval = 300; /* dump interval */
      98             : 
      99             : /*
     100             :  * Module load callback.
     101             :  */
     102             : void
     103          10 : _PG_init(void)
     104             : {
     105          10 :     DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
     106             :                             "Sets the interval between dumps of shared buffers",
     107             :                             "If set to zero, time-based dumping is disabled.",
     108             :                             &autoprewarm_interval,
     109             :                             300,
     110             :                             0, INT_MAX / 1000,
     111             :                             PGC_SIGHUP,
     112             :                             GUC_UNIT_S,
     113             :                             NULL,
     114             :                             NULL,
     115             :                             NULL);
     116             : 
     117          10 :     if (!process_shared_preload_libraries_in_progress)
     118           6 :         return;
     119             : 
     120             :     /* can't define PGC_POSTMASTER variable after startup */
     121           4 :     DefineCustomBoolVariable("pg_prewarm.autoprewarm",
     122             :                              "Starts the autoprewarm worker.",
     123             :                              NULL,
     124             :                              &autoprewarm,
     125             :                              true,
     126             :                              PGC_POSTMASTER,
     127             :                              0,
     128             :                              NULL,
     129             :                              NULL,
     130             :                              NULL);
     131             : 
     132           4 :     MarkGUCPrefixReserved("pg_prewarm");
     133             : 
     134             :     /* Register autoprewarm worker, if enabled. */
     135           4 :     if (autoprewarm)
     136           4 :         apw_start_leader_worker();
     137             : }
     138             : 
     139             : /*
     140             :  * Main entry point for the leader autoprewarm process.  Per-database workers
     141             :  * have a separate entry point.
     142             :  */
     143             : void
     144           4 : autoprewarm_main(Datum main_arg)
     145             : {
     146           4 :     bool        first_time = true;
     147           4 :     bool        final_dump_allowed = true;
     148           4 :     TimestampTz last_dump_time = 0;
     149             : 
     150             :     /* Establish signal handlers; once that's done, unblock signals. */
     151           4 :     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
     152           4 :     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     153           4 :     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
     154           4 :     BackgroundWorkerUnblockSignals();
     155             : 
     156             :     /* Create (if necessary) and attach to our shared memory area. */
     157           4 :     if (apw_init_shmem())
     158           0 :         first_time = false;
     159             : 
     160             :     /*
     161             :      * Set on-detach hook so that our PID will be cleared on exit.
     162             :      *
     163             :      * NB: Autoprewarm's state is stored in a DSM segment, and DSM segments
     164             :      * are detached before calling the on_shmem_exit callbacks, so we must put
     165             :      * apw_detach_shmem in the before_shmem_exit callback list.
     166             :      */
     167           4 :     before_shmem_exit(apw_detach_shmem, 0);
     168             : 
     169             :     /*
     170             :      * Store our PID in the shared memory area --- unless there's already
     171             :      * another worker running, in which case just exit.
     172             :      */
     173           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     174           4 :     if (apw_state->bgworker_pid != InvalidPid)
     175             :     {
     176           0 :         LWLockRelease(&apw_state->lock);
     177           0 :         ereport(LOG,
     178             :                 (errmsg("autoprewarm worker is already running under PID %d",
     179             :                         (int) apw_state->bgworker_pid)));
     180           0 :         return;
     181             :     }
     182           4 :     apw_state->bgworker_pid = MyProcPid;
     183           4 :     LWLockRelease(&apw_state->lock);
     184             : 
     185             :     /*
     186             :      * Preload buffers from the dump file only if we just created the shared
     187             :      * memory region.  Otherwise, it's either already been done or shouldn't
     188             :      * be done - e.g. because the old dump file has been overwritten since the
     189             :      * server was started.
     190             :      *
     191             :      * There's not much point in performing a dump immediately after we finish
     192             :      * preloading; so, if we do end up preloading, consider the last dump time
     193             :      * to be equal to the current time.
     194             :      *
     195             :      * If apw_load_buffers() is terminated early by a shutdown request,
     196             :      * prevent dumping out our state below the loop, because we'd effectively
     197             :      * just truncate the saved state to however much we'd managed to preload.
     198             :      */
     199           4 :     if (first_time)
     200             :     {
     201           4 :         apw_load_buffers();
     202           4 :         final_dump_allowed = !ShutdownRequestPending;
     203           4 :         last_dump_time = GetCurrentTimestamp();
     204             :     }
     205             : 
     206             :     /* Periodically dump buffers until terminated. */
     207          10 :     while (!ShutdownRequestPending)
     208             :     {
     209             :         /* In case of a SIGHUP, just reload the configuration. */
     210           6 :         if (ConfigReloadPending)
     211             :         {
     212           0 :             ConfigReloadPending = false;
     213           0 :             ProcessConfigFile(PGC_SIGHUP);
     214             :         }
     215             : 
     216           6 :         if (autoprewarm_interval <= 0)
     217             :         {
     218             :             /* We're only dumping at shutdown, so just wait forever. */
     219           6 :             (void) WaitLatch(MyLatch,
     220             :                              WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
     221             :                              -1L,
     222             :                              PG_WAIT_EXTENSION);
     223             :         }
     224             :         else
     225             :         {
     226             :             TimestampTz next_dump_time;
     227             :             long        delay_in_ms;
     228             : 
     229             :             /* Compute the next dump time. */
     230           0 :             next_dump_time =
     231           0 :                 TimestampTzPlusMilliseconds(last_dump_time,
     232             :                                             autoprewarm_interval * 1000);
     233             :             delay_in_ms =
     234           0 :                 TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
     235             :                                                 next_dump_time);
     236             : 
     237             :             /* Perform a dump if it's time. */
     238           0 :             if (delay_in_ms <= 0)
     239             :             {
     240           0 :                 last_dump_time = GetCurrentTimestamp();
     241           0 :                 apw_dump_now(true, false);
     242           0 :                 continue;
     243             :             }
     244             : 
     245             :             /* Sleep until the next dump time. */
     246           0 :             (void) WaitLatch(MyLatch,
     247             :                              WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
     248             :                              delay_in_ms,
     249             :                              PG_WAIT_EXTENSION);
     250             :         }
     251             : 
     252             :         /* Reset the latch, loop. */
     253           6 :         ResetLatch(MyLatch);
     254             :     }
     255             : 
     256             :     /*
     257             :      * Dump one last time.  We assume this is probably the result of a system
     258             :      * shutdown, although it's possible that we've merely been terminated.
     259             :      */
     260           4 :     if (final_dump_allowed)
     261           4 :         apw_dump_now(true, true);
     262             : }
     263             : 
     264             : /*
     265             :  * Read the dump file and launch per-database workers one at a time to
     266             :  * prewarm the buffers found there.
     267             :  */
     268             : static void
     269           4 : apw_load_buffers(void)
     270             : {
     271           4 :     FILE       *file = NULL;
     272             :     int         num_elements,
     273             :                 i;
     274             :     BlockInfoRecord *blkinfo;
     275             :     dsm_segment *seg;
     276             : 
     277             :     /*
     278             :      * Skip the prewarm if the dump file is in use; otherwise, prevent any
     279             :      * other process from writing it while we're using it.
     280             :      */
     281           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     282           4 :     if (apw_state->pid_using_dumpfile == InvalidPid)
     283           4 :         apw_state->pid_using_dumpfile = MyProcPid;
     284             :     else
     285             :     {
     286           0 :         LWLockRelease(&apw_state->lock);
     287           0 :         ereport(LOG,
     288             :                 (errmsg("skipping prewarm because block dump file is being written by PID %d",
     289             :                         (int) apw_state->pid_using_dumpfile)));
     290           2 :         return;
     291             :     }
     292           4 :     LWLockRelease(&apw_state->lock);
     293             : 
     294             :     /*
     295             :      * Open the block dump file.  Exit quietly if it doesn't exist, but report
     296             :      * any other error.
     297             :      */
     298           4 :     file = AllocateFile(AUTOPREWARM_FILE, "r");
     299           4 :     if (!file)
     300             :     {
     301           2 :         if (errno == ENOENT)
     302             :         {
     303           2 :             LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     304           2 :             apw_state->pid_using_dumpfile = InvalidPid;
     305           2 :             LWLockRelease(&apw_state->lock);
     306           2 :             return;             /* No file to load. */
     307             :         }
     308           0 :         ereport(ERROR,
     309             :                 (errcode_for_file_access(),
     310             :                  errmsg("could not read file \"%s\": %m",
     311             :                         AUTOPREWARM_FILE)));
     312             :     }
     313             : 
     314             :     /* First line of the file is a record count. */
     315           2 :     if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
     316           0 :         ereport(ERROR,
     317             :                 (errcode_for_file_access(),
     318             :                  errmsg("could not read from file \"%s\": %m",
     319             :                         AUTOPREWARM_FILE)));
     320             : 
     321             :     /* Allocate a dynamic shared memory segment to store the record data. */
     322           2 :     seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
     323           2 :     blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
     324             : 
     325             :     /* Read records, one per line. */
     326         398 :     for (i = 0; i < num_elements; i++)
     327             :     {
     328             :         unsigned    forknum;
     329             : 
     330         396 :         if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
     331         396 :                    &blkinfo[i].tablespace, &blkinfo[i].filenumber,
     332         396 :                    &forknum, &blkinfo[i].blocknum) != 5)
     333           0 :             ereport(ERROR,
     334             :                     (errmsg("autoprewarm block dump file is corrupted at line %d",
     335             :                             i + 1)));
     336         396 :         blkinfo[i].forknum = forknum;
     337             :     }
     338             : 
     339           2 :     FreeFile(file);
     340             : 
     341             :     /* Sort the blocks to be loaded. */
     342           2 :     qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
     343             :           apw_compare_blockinfo);
     344             : 
     345             :     /* Populate shared memory state. */
     346           2 :     apw_state->block_info_handle = dsm_segment_handle(seg);
     347           2 :     apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
     348           2 :     apw_state->prewarmed_blocks = 0;
     349             : 
     350             :     /* Get the info position of the first block of the next database. */
     351           4 :     while (apw_state->prewarm_start_idx < num_elements)
     352             :     {
     353           2 :         int         j = apw_state->prewarm_start_idx;
     354           2 :         Oid         current_db = blkinfo[j].database;
     355             : 
     356             :         /*
     357             :          * Advance the prewarm_stop_idx to the first BlockInfoRecord that does
     358             :          * not belong to this database.
     359             :          */
     360           2 :         j++;
     361         396 :         while (j < num_elements)
     362             :         {
     363         394 :             if (current_db != blkinfo[j].database)
     364             :             {
     365             :                 /*
     366             :                  * Combine BlockInfoRecords for global objects with those of
     367             :                  * the database.
     368             :                  */
     369           2 :                 if (current_db != InvalidOid)
     370           0 :                     break;
     371           2 :                 current_db = blkinfo[j].database;
     372             :             }
     373             : 
     374         394 :             j++;
     375             :         }
     376             : 
     377             :         /*
     378             :          * If we reach this point with current_db == InvalidOid, then only
     379             :          * BlockInfoRecords belonging to global objects exist.  We can't
     380             :          * prewarm without a database connection, so just bail out.
     381             :          */
     382           2 :         if (current_db == InvalidOid)
     383           0 :             break;
     384             : 
     385             :         /* Configure stop point and database for next per-database worker. */
     386           2 :         apw_state->prewarm_stop_idx = j;
     387           2 :         apw_state->database = current_db;
     388             :         Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
     389             : 
     390             :         /* If we've run out of free buffers, don't launch another worker. */
     391           2 :         if (!have_free_buffer())
     392           0 :             break;
     393             : 
     394             :         /*
     395             :          * Likewise, don't launch if we've already been told to shut down.
     396             :          * (The launch would fail anyway, but we might as well skip it.)
     397             :          */
     398           2 :         if (ShutdownRequestPending)
     399           0 :             break;
     400             : 
     401             :         /*
     402             :          * Start a per-database worker to load blocks for this database; this
     403             :          * function will return once the per-database worker exits.
     404             :          */
     405           2 :         apw_start_database_worker();
     406             : 
     407             :         /* Prepare for next database. */
     408           2 :         apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
     409             :     }
     410             : 
     411             :     /* Clean up. */
     412           2 :     dsm_detach(seg);
     413           2 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     414           2 :     apw_state->block_info_handle = DSM_HANDLE_INVALID;
     415           2 :     apw_state->pid_using_dumpfile = InvalidPid;
     416           2 :     LWLockRelease(&apw_state->lock);
     417             : 
     418             :     /* Report our success, if we were able to finish. */
     419           2 :     if (!ShutdownRequestPending)
     420           2 :         ereport(LOG,
     421             :                 (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
     422             :                         apw_state->prewarmed_blocks, num_elements)));
     423             : }
     424             : 
     425             : /*
     426             :  * Prewarm all blocks for one database (and possibly also global objects, if
     427             :  * those got grouped with this database).
     428             :  */
     429             : void
     430           2 : autoprewarm_database_main(Datum main_arg)
     431             : {
     432             :     int         pos;
     433             :     BlockInfoRecord *block_info;
     434           2 :     Relation    rel = NULL;
     435           2 :     BlockNumber nblocks = 0;
     436           2 :     BlockInfoRecord *old_blk = NULL;
     437             :     dsm_segment *seg;
     438             : 
     439             :     /* Establish signal handlers; once that's done, unblock signals. */
     440           2 :     pqsignal(SIGTERM, die);
     441           2 :     BackgroundWorkerUnblockSignals();
     442             : 
     443             :     /* Connect to correct database and get block information. */
     444           2 :     apw_init_shmem();
     445           2 :     seg = dsm_attach(apw_state->block_info_handle);
     446           2 :     if (seg == NULL)
     447           0 :         ereport(ERROR,
     448             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     449             :                  errmsg("could not map dynamic shared memory segment")));
     450           2 :     BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
     451           2 :     block_info = (BlockInfoRecord *) dsm_segment_address(seg);
     452           2 :     pos = apw_state->prewarm_start_idx;
     453             : 
     454             :     /*
     455             :      * Loop until we run out of blocks to prewarm or until we run out of free
     456             :      * buffers.
     457             :      */
     458         398 :     while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
     459             :     {
     460         396 :         BlockInfoRecord *blk = &block_info[pos++];
     461             :         Buffer      buf;
     462             : 
     463         396 :         CHECK_FOR_INTERRUPTS();
     464             : 
     465             :         /*
     466             :          * Quit if we've reached records for another database. If previous
     467             :          * blocks are of some global objects, then continue pre-warming.
     468             :          */
     469         396 :         if (old_blk != NULL && old_blk->database != blk->database &&
     470           2 :             old_blk->database != 0)
     471           0 :             break;
     472             : 
     473             :         /*
     474             :          * As soon as we encounter a block of a new relation, close the old
     475             :          * relation. Note that rel will be NULL if try_relation_open failed
     476             :          * previously; in that case, there is nothing to close.
     477             :          */
     478         396 :         if (old_blk != NULL && old_blk->filenumber != blk->filenumber &&
     479             :             rel != NULL)
     480             :         {
     481          98 :             relation_close(rel, AccessShareLock);
     482          98 :             rel = NULL;
     483          98 :             CommitTransactionCommand();
     484             :         }
     485             : 
     486             :         /*
     487             :          * Try to open each new relation, but only once, when we first
     488             :          * encounter it. If it's been dropped, skip the associated blocks.
     489             :          */
     490         396 :         if (old_blk == NULL || old_blk->filenumber != blk->filenumber)
     491             :         {
     492             :             Oid         reloid;
     493             : 
     494             :             Assert(rel == NULL);
     495         100 :             StartTransactionCommand();
     496         100 :             reloid = RelidByRelfilenumber(blk->tablespace, blk->filenumber);
     497         100 :             if (OidIsValid(reloid))
     498         100 :                 rel = try_relation_open(reloid, AccessShareLock);
     499             : 
     500         100 :             if (!rel)
     501           0 :                 CommitTransactionCommand();
     502             :         }
     503         396 :         if (!rel)
     504             :         {
     505           0 :             old_blk = blk;
     506           0 :             continue;
     507             :         }
     508             : 
     509             :         /* Once per fork, check for fork existence and size. */
     510         396 :         if (old_blk == NULL ||
     511         394 :             old_blk->filenumber != blk->filenumber ||
     512         296 :             old_blk->forknum != blk->forknum)
     513             :         {
     514             :             /*
     515             :              * smgrexists is not safe for illegal forknum, hence check whether
     516             :              * the passed forknum is valid before using it in smgrexists.
     517             :              */
     518         128 :             if (blk->forknum > InvalidForkNumber &&
     519         256 :                 blk->forknum <= MAX_FORKNUM &&
     520         128 :                 smgrexists(RelationGetSmgr(rel), blk->forknum))
     521         128 :                 nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
     522             :             else
     523           0 :                 nblocks = 0;
     524             :         }
     525             : 
     526             :         /* Check whether blocknum is valid and within fork file size. */
     527         396 :         if (blk->blocknum >= nblocks)
     528             :         {
     529             :             /* Move to next forknum. */
     530           0 :             old_blk = blk;
     531           0 :             continue;
     532             :         }
     533             : 
     534             :         /* Prewarm buffer. */
     535         396 :         buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
     536             :                                  NULL);
     537         396 :         if (BufferIsValid(buf))
     538             :         {
     539         396 :             apw_state->prewarmed_blocks++;
     540         396 :             ReleaseBuffer(buf);
     541             :         }
     542             : 
     543         396 :         old_blk = blk;
     544             :     }
     545             : 
     546           2 :     dsm_detach(seg);
     547             : 
     548             :     /* Release lock on previous relation. */
     549           2 :     if (rel)
     550             :     {
     551           2 :         relation_close(rel, AccessShareLock);
     552           2 :         CommitTransactionCommand();
     553             :     }
     554           2 : }
     555             : 
     556             : /*
     557             :  * Dump information on blocks in shared buffers.  We use a text format here
     558             :  * so that it's easy to understand and even change the file contents if
     559             :  * necessary.
     560             :  * Returns the number of blocks dumped.
     561             :  */
     562             : static int
     563           6 : apw_dump_now(bool is_bgworker, bool dump_unlogged)
     564             : {
     565             :     int         num_blocks;
     566             :     int         i;
     567             :     int         ret;
     568             :     BlockInfoRecord *block_info_array;
     569             :     BufferDesc *bufHdr;
     570             :     FILE       *file;
     571             :     char        transient_dump_file_path[MAXPGPATH];
     572             :     pid_t       pid;
     573             : 
     574           6 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     575           6 :     pid = apw_state->pid_using_dumpfile;
     576           6 :     if (apw_state->pid_using_dumpfile == InvalidPid)
     577           6 :         apw_state->pid_using_dumpfile = MyProcPid;
     578           6 :     LWLockRelease(&apw_state->lock);
     579             : 
     580           6 :     if (pid != InvalidPid)
     581             :     {
     582           0 :         if (!is_bgworker)
     583           0 :             ereport(ERROR,
     584             :                     (errmsg("could not perform block dump because dump file is being used by PID %d",
     585             :                             (int) apw_state->pid_using_dumpfile)));
     586             : 
     587           0 :         ereport(LOG,
     588             :                 (errmsg("skipping block dump because it is already being performed by PID %d",
     589             :                         (int) apw_state->pid_using_dumpfile)));
     590           0 :         return 0;
     591             :     }
     592             : 
     593             :     block_info_array =
     594           6 :         (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
     595             : 
     596       98310 :     for (num_blocks = 0, i = 0; i < NBuffers; i++)
     597             :     {
     598             :         uint32      buf_state;
     599             : 
     600       98304 :         CHECK_FOR_INTERRUPTS();
     601             : 
     602       98304 :         bufHdr = GetBufferDescriptor(i);
     603             : 
     604             :         /* Lock each buffer header before inspecting. */
     605       98304 :         buf_state = LockBufHdr(bufHdr);
     606             : 
     607             :         /*
     608             :          * Unlogged tables will be automatically truncated after a crash or
     609             :          * unclean shutdown. In such cases we need not prewarm them. Dump them
     610             :          * only if requested by caller.
     611             :          */
     612       98304 :         if (buf_state & BM_TAG_VALID &&
     613        1188 :             ((buf_state & BM_PERMANENT) || dump_unlogged))
     614             :         {
     615        1188 :             block_info_array[num_blocks].database = bufHdr->tag.dbOid;
     616        1188 :             block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid;
     617        2376 :             block_info_array[num_blocks].filenumber =
     618        1188 :                 BufTagGetRelNumber(&bufHdr->tag);
     619        2376 :             block_info_array[num_blocks].forknum =
     620        1188 :                 BufTagGetForkNum(&bufHdr->tag);
     621        1188 :             block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
     622        1188 :             ++num_blocks;
     623             :         }
     624             : 
     625       98304 :         UnlockBufHdr(bufHdr, buf_state);
     626             :     }
     627             : 
     628           6 :     snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
     629           6 :     file = AllocateFile(transient_dump_file_path, "w");
     630           6 :     if (!file)
     631           0 :         ereport(ERROR,
     632             :                 (errcode_for_file_access(),
     633             :                  errmsg("could not open file \"%s\": %m",
     634             :                         transient_dump_file_path)));
     635             : 
     636           6 :     ret = fprintf(file, "<<%d>>\n", num_blocks);
     637           6 :     if (ret < 0)
     638             :     {
     639           0 :         int         save_errno = errno;
     640             : 
     641           0 :         FreeFile(file);
     642           0 :         unlink(transient_dump_file_path);
     643           0 :         errno = save_errno;
     644           0 :         ereport(ERROR,
     645             :                 (errcode_for_file_access(),
     646             :                  errmsg("could not write to file \"%s\": %m",
     647             :                         transient_dump_file_path)));
     648             :     }
     649             : 
     650        1194 :     for (i = 0; i < num_blocks; i++)
     651             :     {
     652        1188 :         CHECK_FOR_INTERRUPTS();
     653             : 
     654        1188 :         ret = fprintf(file, "%u,%u,%u,%u,%u\n",
     655        1188 :                       block_info_array[i].database,
     656        1188 :                       block_info_array[i].tablespace,
     657        1188 :                       block_info_array[i].filenumber,
     658        1188 :                       (uint32) block_info_array[i].forknum,
     659        1188 :                       block_info_array[i].blocknum);
     660        1188 :         if (ret < 0)
     661             :         {
     662           0 :             int         save_errno = errno;
     663             : 
     664           0 :             FreeFile(file);
     665           0 :             unlink(transient_dump_file_path);
     666           0 :             errno = save_errno;
     667           0 :             ereport(ERROR,
     668             :                     (errcode_for_file_access(),
     669             :                      errmsg("could not write to file \"%s\": %m",
     670             :                             transient_dump_file_path)));
     671             :         }
     672             :     }
     673             : 
     674           6 :     pfree(block_info_array);
     675             : 
     676             :     /*
     677             :      * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
     678             :      * permanent.
     679             :      */
     680           6 :     ret = FreeFile(file);
     681           6 :     if (ret != 0)
     682             :     {
     683           0 :         int         save_errno = errno;
     684             : 
     685           0 :         unlink(transient_dump_file_path);
     686           0 :         errno = save_errno;
     687           0 :         ereport(ERROR,
     688             :                 (errcode_for_file_access(),
     689             :                  errmsg("could not close file \"%s\": %m",
     690             :                         transient_dump_file_path)));
     691             :     }
     692             : 
     693           6 :     (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
     694           6 :     apw_state->pid_using_dumpfile = InvalidPid;
     695             : 
     696           6 :     ereport(DEBUG1,
     697             :             (errmsg_internal("wrote block details for %d blocks", num_blocks)));
     698           6 :     return num_blocks;
     699             : }
     700             : 
     701             : /*
     702             :  * SQL-callable function to launch autoprewarm.
     703             :  */
     704             : Datum
     705           0 : autoprewarm_start_worker(PG_FUNCTION_ARGS)
     706             : {
     707             :     pid_t       pid;
     708             : 
     709           0 :     if (!autoprewarm)
     710           0 :         ereport(ERROR,
     711             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     712             :                  errmsg("autoprewarm is disabled")));
     713             : 
     714           0 :     apw_init_shmem();
     715           0 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     716           0 :     pid = apw_state->bgworker_pid;
     717           0 :     LWLockRelease(&apw_state->lock);
     718             : 
     719           0 :     if (pid != InvalidPid)
     720           0 :         ereport(ERROR,
     721             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     722             :                  errmsg("autoprewarm worker is already running under PID %d",
     723             :                         (int) pid)));
     724             : 
     725           0 :     apw_start_leader_worker();
     726             : 
     727           0 :     PG_RETURN_VOID();
     728             : }
     729             : 
     730             : /*
     731             :  * SQL-callable function to perform an immediate block dump.
     732             :  *
     733             :  * Note: this is declared to return int8, as insurance against some
     734             :  * very distant day when we might make NBuffers wider than int.
     735             :  */
     736             : Datum
     737           2 : autoprewarm_dump_now(PG_FUNCTION_ARGS)
     738             : {
     739             :     int         num_blocks;
     740             : 
     741           2 :     apw_init_shmem();
     742             : 
     743           2 :     PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
     744             :     {
     745           2 :         num_blocks = apw_dump_now(false, true);
     746             :     }
     747           2 :     PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
     748             : 
     749           2 :     PG_RETURN_INT64((int64) num_blocks);
     750             : }
     751             : 
     752             : static void
     753           4 : apw_init_state(void *ptr)
     754             : {
     755           4 :     AutoPrewarmSharedState *state = (AutoPrewarmSharedState *) ptr;
     756             : 
     757           4 :     LWLockInitialize(&state->lock, LWLockNewTrancheId());
     758           4 :     state->bgworker_pid = InvalidPid;
     759           4 :     state->pid_using_dumpfile = InvalidPid;
     760           4 : }
     761             : 
     762             : /*
     763             :  * Allocate and initialize autoprewarm related shared memory, if not already
     764             :  * done, and set up backend-local pointer to that state.  Returns true if an
     765             :  * existing shared memory segment was found.
     766             :  */
     767             : static bool
     768           8 : apw_init_shmem(void)
     769             : {
     770             :     bool        found;
     771             : 
     772           8 :     apw_state = GetNamedDSMSegment("autoprewarm",
     773             :                                    sizeof(AutoPrewarmSharedState),
     774             :                                    apw_init_state,
     775             :                                    &found);
     776           8 :     LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
     777             : 
     778           8 :     return found;
     779             : }
     780             : 
     781             : /*
     782             :  * Clear our PID from autoprewarm shared state.
     783             :  */
     784             : static void
     785           4 : apw_detach_shmem(int code, Datum arg)
     786             : {
     787           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     788           4 :     if (apw_state->pid_using_dumpfile == MyProcPid)
     789           0 :         apw_state->pid_using_dumpfile = InvalidPid;
     790           4 :     if (apw_state->bgworker_pid == MyProcPid)
     791           4 :         apw_state->bgworker_pid = InvalidPid;
     792           4 :     LWLockRelease(&apw_state->lock);
     793           4 : }
     794             : 
     795             : /*
     796             :  * Start autoprewarm leader worker process.
     797             :  */
     798             : static void
     799           4 : apw_start_leader_worker(void)
     800             : {
     801             :     BackgroundWorker worker;
     802             :     BackgroundWorkerHandle *handle;
     803             :     BgwHandleStatus status;
     804             :     pid_t       pid;
     805             : 
     806           4 :     MemSet(&worker, 0, sizeof(BackgroundWorker));
     807           4 :     worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
     808           4 :     worker.bgw_start_time = BgWorkerStart_ConsistentState;
     809           4 :     strcpy(worker.bgw_library_name, "pg_prewarm");
     810           4 :     strcpy(worker.bgw_function_name, "autoprewarm_main");
     811           4 :     strcpy(worker.bgw_name, "autoprewarm leader");
     812           4 :     strcpy(worker.bgw_type, "autoprewarm leader");
     813             : 
     814           4 :     if (process_shared_preload_libraries_in_progress)
     815             :     {
     816           4 :         RegisterBackgroundWorker(&worker);
     817           4 :         return;
     818             :     }
     819             : 
     820             :     /* must set notify PID to wait for startup */
     821           0 :     worker.bgw_notify_pid = MyProcPid;
     822             : 
     823           0 :     if (!RegisterDynamicBackgroundWorker(&worker, &handle))
     824           0 :         ereport(ERROR,
     825             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     826             :                  errmsg("could not register background process"),
     827             :                  errhint("You may need to increase \"max_worker_processes\".")));
     828             : 
     829           0 :     status = WaitForBackgroundWorkerStartup(handle, &pid);
     830           0 :     if (status != BGWH_STARTED)
     831           0 :         ereport(ERROR,
     832             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     833             :                  errmsg("could not start background process"),
     834             :                  errhint("More details may be available in the server log.")));
     835             : }
     836             : 
     837             : /*
     838             :  * Start autoprewarm per-database worker process.
     839             :  */
     840             : static void
     841           2 : apw_start_database_worker(void)
     842             : {
     843             :     BackgroundWorker worker;
     844             :     BackgroundWorkerHandle *handle;
     845             : 
     846           2 :     MemSet(&worker, 0, sizeof(BackgroundWorker));
     847           2 :     worker.bgw_flags =
     848             :         BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
     849           2 :     worker.bgw_start_time = BgWorkerStart_ConsistentState;
     850           2 :     worker.bgw_restart_time = BGW_NEVER_RESTART;
     851           2 :     strcpy(worker.bgw_library_name, "pg_prewarm");
     852           2 :     strcpy(worker.bgw_function_name, "autoprewarm_database_main");
     853           2 :     strcpy(worker.bgw_name, "autoprewarm worker");
     854           2 :     strcpy(worker.bgw_type, "autoprewarm worker");
     855             : 
     856             :     /* must set notify PID to wait for shutdown */
     857           2 :     worker.bgw_notify_pid = MyProcPid;
     858             : 
     859           2 :     if (!RegisterDynamicBackgroundWorker(&worker, &handle))
     860           0 :         ereport(ERROR,
     861             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     862             :                  errmsg("registering dynamic bgworker autoprewarm failed"),
     863             :                  errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
     864             : 
     865             :     /*
     866             :      * Ignore return value; if it fails, postmaster has died, but we have
     867             :      * checks for that elsewhere.
     868             :      */
     869           2 :     WaitForBackgroundWorkerShutdown(handle);
     870           2 : }
     871             : 
     872             : /* Compare member elements to check whether they are not equal. */
     873             : #define cmp_member_elem(fld)    \
     874             : do { \
     875             :     if (a->fld < b->fld)       \
     876             :         return -1;              \
     877             :     else if (a->fld > b->fld)  \
     878             :         return 1;               \
     879             : } while(0)
     880             : 
     881             : /*
     882             :  * apw_compare_blockinfo
     883             :  *
     884             :  * We depend on all records for a particular database being consecutive
     885             :  * in the dump file; each per-database worker will preload blocks until
     886             :  * it sees a block for some other database.  Sorting by tablespace,
     887             :  * filenumber, forknum, and blocknum isn't critical for correctness, but
     888             :  * helps us get a sequential I/O pattern.
     889             :  */
     890             : static int
     891        3246 : apw_compare_blockinfo(const void *p, const void *q)
     892             : {
     893        3246 :     const BlockInfoRecord *a = (const BlockInfoRecord *) p;
     894        3246 :     const BlockInfoRecord *b = (const BlockInfoRecord *) q;
     895             : 
     896        3246 :     cmp_member_elem(database);
     897        3128 :     cmp_member_elem(tablespace);
     898        3128 :     cmp_member_elem(filenumber);
     899        1150 :     cmp_member_elem(forknum);
     900         990 :     cmp_member_elem(blocknum);
     901             : 
     902           0 :     return 0;
     903             : }

Generated by: LCOV version 1.14