LCOV - code coverage report
Current view: top level - contrib/pg_prewarm - autoprewarm.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 236 307 76.9 %
Date: 2023-12-10 05:11:07 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * autoprewarm.c
       4             :  *      Periodically dump information about the blocks present in
       5             :  *      shared_buffers, and reload them on server restart.
       6             :  *
       7             :  *      Due to locking considerations, we can't actually begin prewarming
       8             :  *      until the server reaches a consistent state.  We need the catalogs
       9             :  *      to be consistent so that we can figure out which relation to lock,
      10             :  *      and we need to lock the relations so that we don't try to prewarm
      11             :  *      pages from a relation that is in the process of being dropped.
      12             :  *
      13             :  *      While prewarming, autoprewarm will use two workers.  There's a
      14             :  *      leader worker that reads and sorts the list of blocks to be
      15             :  *      prewarmed and then launches a per-database worker for each
      16             :  *      relevant database in turn.  The former keeps running after the
      17             :  *      initial prewarm is complete to update the dump file periodically.
      18             :  *
      19             :  *  Copyright (c) 2016-2023, PostgreSQL Global Development Group
      20             :  *
      21             :  *  IDENTIFICATION
      22             :  *      contrib/pg_prewarm/autoprewarm.c
      23             :  *
      24             :  *-------------------------------------------------------------------------
      25             :  */
      26             : 
      27             : #include "postgres.h"
      28             : 
      29             : #include <unistd.h>
      30             : 
      31             : #include "access/relation.h"
      32             : #include "access/xact.h"
      33             : #include "catalog/pg_class.h"
      34             : #include "catalog/pg_type.h"
      35             : #include "miscadmin.h"
      36             : #include "pgstat.h"
      37             : #include "postmaster/bgworker.h"
      38             : #include "postmaster/interrupt.h"
      39             : #include "storage/buf_internals.h"
      40             : #include "storage/dsm.h"
      41             : #include "storage/fd.h"
      42             : #include "storage/ipc.h"
      43             : #include "storage/latch.h"
      44             : #include "storage/lwlock.h"
      45             : #include "storage/proc.h"
      46             : #include "storage/procsignal.h"
      47             : #include "storage/shmem.h"
      48             : #include "storage/smgr.h"
      49             : #include "tcop/tcopprot.h"
      50             : #include "utils/acl.h"
      51             : #include "utils/datetime.h"
      52             : #include "utils/guc.h"
      53             : #include "utils/memutils.h"
      54             : #include "utils/rel.h"
      55             : #include "utils/relfilenumbermap.h"
      56             : #include "utils/resowner.h"
      57             : 
      58             : #define AUTOPREWARM_FILE "autoprewarm.blocks"
      59             : 
      60             : /* Metadata for each block we dump. */
      61             : typedef struct BlockInfoRecord
      62             : {
      63             :     Oid         database;
      64             :     Oid         tablespace;
      65             :     RelFileNumber filenumber;
      66             :     ForkNumber  forknum;
      67             :     BlockNumber blocknum;
      68             : } BlockInfoRecord;
      69             : 
      70             : /* Shared state information for autoprewarm bgworker. */
      71             : typedef struct AutoPrewarmSharedState
      72             : {
      73             :     LWLock      lock;           /* mutual exclusion */
      74             :     pid_t       bgworker_pid;   /* for main bgworker */
      75             :     pid_t       pid_using_dumpfile; /* for autoprewarm or block dump */
      76             : 
      77             :     /* Following items are for communication with per-database worker */
      78             :     dsm_handle  block_info_handle;
      79             :     Oid         database;
      80             :     int         prewarm_start_idx;
      81             :     int         prewarm_stop_idx;
      82             :     int         prewarmed_blocks;
      83             : } AutoPrewarmSharedState;
      84             : 
      85             : PGDLLEXPORT void autoprewarm_main(Datum main_arg);
      86             : PGDLLEXPORT void autoprewarm_database_main(Datum main_arg);
      87             : 
      88           4 : PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
      89           6 : PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
      90             : 
      91             : static void apw_load_buffers(void);
      92             : static int  apw_dump_now(bool is_bgworker, bool dump_unlogged);
      93             : static void apw_start_leader_worker(void);
      94             : static void apw_start_database_worker(void);
      95             : static bool apw_init_shmem(void);
      96             : static void apw_detach_shmem(int code, Datum arg);
      97             : static int  apw_compare_blockinfo(const void *p, const void *q);
      98             : static void autoprewarm_shmem_request(void);
      99             : static shmem_request_hook_type prev_shmem_request_hook = NULL;
     100             : 
     101             : /* Pointer to shared-memory state. */
     102             : static AutoPrewarmSharedState *apw_state = NULL;
     103             : 
     104             : /* GUC variables. */
     105             : static bool autoprewarm = true; /* start worker? */
     106             : static int  autoprewarm_interval = 300; /* dump interval */
     107             : 
     108             : /*
     109             :  * Module load callback.
     110             :  */
     111             : void
     112          10 : _PG_init(void)
     113             : {
     114          10 :     DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
     115             :                             "Sets the interval between dumps of shared buffers",
     116             :                             "If set to zero, time-based dumping is disabled.",
     117             :                             &autoprewarm_interval,
     118             :                             300,
     119             :                             0, INT_MAX / 1000,
     120             :                             PGC_SIGHUP,
     121             :                             GUC_UNIT_S,
     122             :                             NULL,
     123             :                             NULL,
     124             :                             NULL);
     125             : 
     126          10 :     if (!process_shared_preload_libraries_in_progress)
     127           6 :         return;
     128             : 
     129             :     /* can't define PGC_POSTMASTER variable after startup */
     130           4 :     DefineCustomBoolVariable("pg_prewarm.autoprewarm",
     131             :                              "Starts the autoprewarm worker.",
     132             :                              NULL,
     133             :                              &autoprewarm,
     134             :                              true,
     135             :                              PGC_POSTMASTER,
     136             :                              0,
     137             :                              NULL,
     138             :                              NULL,
     139             :                              NULL);
     140             : 
     141           4 :     MarkGUCPrefixReserved("pg_prewarm");
     142             : 
     143           4 :     prev_shmem_request_hook = shmem_request_hook;
     144           4 :     shmem_request_hook = autoprewarm_shmem_request;
     145             : 
     146             :     /* Register autoprewarm worker, if enabled. */
     147           4 :     if (autoprewarm)
     148           4 :         apw_start_leader_worker();
     149             : }
     150             : 
     151             : /*
     152             :  * Requests any additional shared memory required for autoprewarm.
     153             :  */
     154             : static void
     155           4 : autoprewarm_shmem_request(void)
     156             : {
     157           4 :     if (prev_shmem_request_hook)
     158           0 :         prev_shmem_request_hook();
     159             : 
     160           4 :     RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
     161           4 : }
     162             : 
     163             : /*
     164             :  * Main entry point for the leader autoprewarm process.  Per-database workers
     165             :  * have a separate entry point.
     166             :  */
     167             : void
     168           4 : autoprewarm_main(Datum main_arg)
     169             : {
     170           4 :     bool        first_time = true;
     171           4 :     bool        final_dump_allowed = true;
     172           4 :     TimestampTz last_dump_time = 0;
     173             : 
     174             :     /* Establish signal handlers; once that's done, unblock signals. */
     175           4 :     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
     176           4 :     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     177           4 :     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
     178           4 :     BackgroundWorkerUnblockSignals();
     179             : 
     180             :     /* Create (if necessary) and attach to our shared memory area. */
     181           4 :     if (apw_init_shmem())
     182           0 :         first_time = false;
     183             : 
     184             :     /* Set on-detach hook so that our PID will be cleared on exit. */
     185           4 :     on_shmem_exit(apw_detach_shmem, 0);
     186             : 
     187             :     /*
     188             :      * Store our PID in the shared memory area --- unless there's already
     189             :      * another worker running, in which case just exit.
     190             :      */
     191           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     192           4 :     if (apw_state->bgworker_pid != InvalidPid)
     193             :     {
     194           0 :         LWLockRelease(&apw_state->lock);
     195           0 :         ereport(LOG,
     196             :                 (errmsg("autoprewarm worker is already running under PID %d",
     197             :                         (int) apw_state->bgworker_pid)));
     198           0 :         return;
     199             :     }
     200           4 :     apw_state->bgworker_pid = MyProcPid;
     201           4 :     LWLockRelease(&apw_state->lock);
     202             : 
     203             :     /*
     204             :      * Preload buffers from the dump file only if we just created the shared
     205             :      * memory region.  Otherwise, it's either already been done or shouldn't
     206             :      * be done - e.g. because the old dump file has been overwritten since the
     207             :      * server was started.
     208             :      *
     209             :      * There's not much point in performing a dump immediately after we finish
     210             :      * preloading; so, if we do end up preloading, consider the last dump time
     211             :      * to be equal to the current time.
     212             :      *
     213             :      * If apw_load_buffers() is terminated early by a shutdown request,
     214             :      * prevent dumping out our state below the loop, because we'd effectively
     215             :      * just truncate the saved state to however much we'd managed to preload.
     216             :      */
     217           4 :     if (first_time)
     218             :     {
     219           4 :         apw_load_buffers();
     220           4 :         final_dump_allowed = !ShutdownRequestPending;
     221           4 :         last_dump_time = GetCurrentTimestamp();
     222             :     }
     223             : 
     224             :     /* Periodically dump buffers until terminated. */
     225          10 :     while (!ShutdownRequestPending)
     226             :     {
     227             :         /* In case of a SIGHUP, just reload the configuration. */
     228           6 :         if (ConfigReloadPending)
     229             :         {
     230           0 :             ConfigReloadPending = false;
     231           0 :             ProcessConfigFile(PGC_SIGHUP);
     232             :         }
     233             : 
     234           6 :         if (autoprewarm_interval <= 0)
     235             :         {
     236             :             /* We're only dumping at shutdown, so just wait forever. */
     237           6 :             (void) WaitLatch(MyLatch,
     238             :                              WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
     239             :                              -1L,
     240             :                              WAIT_EVENT_EXTENSION);
     241             :         }
     242             :         else
     243             :         {
     244             :             TimestampTz next_dump_time;
     245             :             long        delay_in_ms;
     246             : 
     247             :             /* Compute the next dump time. */
     248           0 :             next_dump_time =
     249           0 :                 TimestampTzPlusMilliseconds(last_dump_time,
     250             :                                             autoprewarm_interval * 1000);
     251             :             delay_in_ms =
     252           0 :                 TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
     253             :                                                 next_dump_time);
     254             : 
     255             :             /* Perform a dump if it's time. */
     256           0 :             if (delay_in_ms <= 0)
     257             :             {
     258           0 :                 last_dump_time = GetCurrentTimestamp();
     259           0 :                 apw_dump_now(true, false);
     260           0 :                 continue;
     261             :             }
     262             : 
     263             :             /* Sleep until the next dump time. */
     264           0 :             (void) WaitLatch(MyLatch,
     265             :                              WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
     266             :                              delay_in_ms,
     267             :                              WAIT_EVENT_EXTENSION);
     268             :         }
     269             : 
     270             :         /* Reset the latch, loop. */
     271           6 :         ResetLatch(MyLatch);
     272             :     }
     273             : 
     274             :     /*
     275             :      * Dump one last time.  We assume this is probably the result of a system
     276             :      * shutdown, although it's possible that we've merely been terminated.
     277             :      */
     278           4 :     if (final_dump_allowed)
     279           4 :         apw_dump_now(true, true);
     280             : }
     281             : 
     282             : /*
     283             :  * Read the dump file and launch per-database workers one at a time to
     284             :  * prewarm the buffers found there.
     285             :  */
     286             : static void
     287           4 : apw_load_buffers(void)
     288             : {
     289           4 :     FILE       *file = NULL;
     290             :     int         num_elements,
     291             :                 i;
     292             :     BlockInfoRecord *blkinfo;
     293             :     dsm_segment *seg;
     294             : 
     295             :     /*
     296             :      * Skip the prewarm if the dump file is in use; otherwise, prevent any
     297             :      * other process from writing it while we're using it.
     298             :      */
     299           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     300           4 :     if (apw_state->pid_using_dumpfile == InvalidPid)
     301           4 :         apw_state->pid_using_dumpfile = MyProcPid;
     302             :     else
     303             :     {
     304           0 :         LWLockRelease(&apw_state->lock);
     305           0 :         ereport(LOG,
     306             :                 (errmsg("skipping prewarm because block dump file is being written by PID %d",
     307             :                         (int) apw_state->pid_using_dumpfile)));
     308           2 :         return;
     309             :     }
     310           4 :     LWLockRelease(&apw_state->lock);
     311             : 
     312             :     /*
     313             :      * Open the block dump file.  Exit quietly if it doesn't exist, but report
     314             :      * any other error.
     315             :      */
     316           4 :     file = AllocateFile(AUTOPREWARM_FILE, "r");
     317           4 :     if (!file)
     318             :     {
     319           2 :         if (errno == ENOENT)
     320             :         {
     321           2 :             LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     322           2 :             apw_state->pid_using_dumpfile = InvalidPid;
     323           2 :             LWLockRelease(&apw_state->lock);
     324           2 :             return;             /* No file to load. */
     325             :         }
     326           0 :         ereport(ERROR,
     327             :                 (errcode_for_file_access(),
     328             :                  errmsg("could not read file \"%s\": %m",
     329             :                         AUTOPREWARM_FILE)));
     330             :     }
     331             : 
     332             :     /* First line of the file is a record count. */
     333           2 :     if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
     334           0 :         ereport(ERROR,
     335             :                 (errcode_for_file_access(),
     336             :                  errmsg("could not read from file \"%s\": %m",
     337             :                         AUTOPREWARM_FILE)));
     338             : 
     339             :     /* Allocate a dynamic shared memory segment to store the record data. */
     340           2 :     seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
     341           2 :     blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
     342             : 
     343             :     /* Read records, one per line. */
     344         356 :     for (i = 0; i < num_elements; i++)
     345             :     {
     346             :         unsigned    forknum;
     347             : 
     348         354 :         if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
     349         354 :                    &blkinfo[i].tablespace, &blkinfo[i].filenumber,
     350         354 :                    &forknum, &blkinfo[i].blocknum) != 5)
     351           0 :             ereport(ERROR,
     352             :                     (errmsg("autoprewarm block dump file is corrupted at line %d",
     353             :                             i + 1)));
     354         354 :         blkinfo[i].forknum = forknum;
     355             :     }
     356             : 
     357           2 :     FreeFile(file);
     358             : 
     359             :     /* Sort the blocks to be loaded. */
     360           2 :     pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
     361             :              apw_compare_blockinfo);
     362             : 
     363             :     /* Populate shared memory state. */
     364           2 :     apw_state->block_info_handle = dsm_segment_handle(seg);
     365           2 :     apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
     366           2 :     apw_state->prewarmed_blocks = 0;
     367             : 
     368             :     /* Get the info position of the first block of the next database. */
     369           4 :     while (apw_state->prewarm_start_idx < num_elements)
     370             :     {
     371           2 :         int         j = apw_state->prewarm_start_idx;
     372           2 :         Oid         current_db = blkinfo[j].database;
     373             : 
     374             :         /*
     375             :          * Advance the prewarm_stop_idx to the first BlockInfoRecord that does
     376             :          * not belong to this database.
     377             :          */
     378           2 :         j++;
     379         354 :         while (j < num_elements)
     380             :         {
     381         352 :             if (current_db != blkinfo[j].database)
     382             :             {
     383             :                 /*
     384             :                  * Combine BlockInfoRecords for global objects with those of
     385             :                  * the database.
     386             :                  */
     387           2 :                 if (current_db != InvalidOid)
     388           0 :                     break;
     389           2 :                 current_db = blkinfo[j].database;
     390             :             }
     391             : 
     392         352 :             j++;
     393             :         }
     394             : 
     395             :         /*
     396             :          * If we reach this point with current_db == InvalidOid, then only
     397             :          * BlockInfoRecords belonging to global objects exist.  We can't
     398             :          * prewarm without a database connection, so just bail out.
     399             :          */
     400           2 :         if (current_db == InvalidOid)
     401           0 :             break;
     402             : 
     403             :         /* Configure stop point and database for next per-database worker. */
     404           2 :         apw_state->prewarm_stop_idx = j;
     405           2 :         apw_state->database = current_db;
     406             :         Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
     407             : 
     408             :         /* If we've run out of free buffers, don't launch another worker. */
     409           2 :         if (!have_free_buffer())
     410           0 :             break;
     411             : 
     412             :         /*
     413             :          * Likewise, don't launch if we've already been told to shut down.
     414             :          * (The launch would fail anyway, but we might as well skip it.)
     415             :          */
     416           2 :         if (ShutdownRequestPending)
     417           0 :             break;
     418             : 
     419             :         /*
     420             :          * Start a per-database worker to load blocks for this database; this
     421             :          * function will return once the per-database worker exits.
     422             :          */
     423           2 :         apw_start_database_worker();
     424             : 
     425             :         /* Prepare for next database. */
     426           2 :         apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
     427             :     }
     428             : 
     429             :     /* Clean up. */
     430           2 :     dsm_detach(seg);
     431           2 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     432           2 :     apw_state->block_info_handle = DSM_HANDLE_INVALID;
     433           2 :     apw_state->pid_using_dumpfile = InvalidPid;
     434           2 :     LWLockRelease(&apw_state->lock);
     435             : 
     436             :     /* Report our success, if we were able to finish. */
     437           2 :     if (!ShutdownRequestPending)
     438           2 :         ereport(LOG,
     439             :                 (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
     440             :                         apw_state->prewarmed_blocks, num_elements)));
     441             : }
     442             : 
     443             : /*
     444             :  * Prewarm all blocks for one database (and possibly also global objects, if
     445             :  * those got grouped with this database).
     446             :  */
     447             : void
     448           2 : autoprewarm_database_main(Datum main_arg)
     449             : {
     450             :     int         pos;
     451             :     BlockInfoRecord *block_info;
     452           2 :     Relation    rel = NULL;
     453           2 :     BlockNumber nblocks = 0;
     454           2 :     BlockInfoRecord *old_blk = NULL;
     455             :     dsm_segment *seg;
     456             : 
     457             :     /* Establish signal handlers; once that's done, unblock signals. */
     458           2 :     pqsignal(SIGTERM, die);
     459           2 :     BackgroundWorkerUnblockSignals();
     460             : 
     461             :     /* Connect to correct database and get block information. */
     462           2 :     apw_init_shmem();
     463           2 :     seg = dsm_attach(apw_state->block_info_handle);
     464           2 :     if (seg == NULL)
     465           0 :         ereport(ERROR,
     466             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     467             :                  errmsg("could not map dynamic shared memory segment")));
     468           2 :     BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
     469           2 :     block_info = (BlockInfoRecord *) dsm_segment_address(seg);
     470           2 :     pos = apw_state->prewarm_start_idx;
     471             : 
     472             :     /*
     473             :      * Loop until we run out of blocks to prewarm or until we run out of free
     474             :      * buffers.
     475             :      */
     476         356 :     while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
     477             :     {
     478         354 :         BlockInfoRecord *blk = &block_info[pos++];
     479             :         Buffer      buf;
     480             : 
     481         354 :         CHECK_FOR_INTERRUPTS();
     482             : 
     483             :         /*
     484             :          * Quit if we've reached records for another database. If previous
     485             :          * blocks are of some global objects, then continue pre-warming.
     486             :          */
     487         354 :         if (old_blk != NULL && old_blk->database != blk->database &&
     488           2 :             old_blk->database != 0)
     489           0 :             break;
     490             : 
     491             :         /*
     492             :          * As soon as we encounter a block of a new relation, close the old
     493             :          * relation. Note that rel will be NULL if try_relation_open failed
     494             :          * previously; in that case, there is nothing to close.
     495             :          */
     496         354 :         if (old_blk != NULL && old_blk->filenumber != blk->filenumber &&
     497             :             rel != NULL)
     498             :         {
     499          90 :             relation_close(rel, AccessShareLock);
     500          90 :             rel = NULL;
     501          90 :             CommitTransactionCommand();
     502             :         }
     503             : 
     504             :         /*
     505             :          * Try to open each new relation, but only once, when we first
     506             :          * encounter it. If it's been dropped, skip the associated blocks.
     507             :          */
     508         354 :         if (old_blk == NULL || old_blk->filenumber != blk->filenumber)
     509             :         {
     510             :             Oid         reloid;
     511             : 
     512             :             Assert(rel == NULL);
     513          92 :             StartTransactionCommand();
     514          92 :             reloid = RelidByRelfilenumber(blk->tablespace, blk->filenumber);
     515          92 :             if (OidIsValid(reloid))
     516          92 :                 rel = try_relation_open(reloid, AccessShareLock);
     517             : 
     518          92 :             if (!rel)
     519           0 :                 CommitTransactionCommand();
     520             :         }
     521         354 :         if (!rel)
     522             :         {
     523           0 :             old_blk = blk;
     524           0 :             continue;
     525             :         }
     526             : 
     527             :         /* Once per fork, check for fork existence and size. */
     528         354 :         if (old_blk == NULL ||
     529         352 :             old_blk->filenumber != blk->filenumber ||
     530         262 :             old_blk->forknum != blk->forknum)
     531             :         {
     532             :             /*
     533             :              * smgrexists is not safe for illegal forknum, hence check whether
     534             :              * the passed forknum is valid before using it in smgrexists.
     535             :              */
     536         120 :             if (blk->forknum > InvalidForkNumber &&
     537         240 :                 blk->forknum <= MAX_FORKNUM &&
     538         120 :                 smgrexists(RelationGetSmgr(rel), blk->forknum))
     539         120 :                 nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
     540             :             else
     541           0 :                 nblocks = 0;
     542             :         }
     543             : 
     544             :         /* Check whether blocknum is valid and within fork file size. */
     545         354 :         if (blk->blocknum >= nblocks)
     546             :         {
     547             :             /* Move to next forknum. */
     548           0 :             old_blk = blk;
     549           0 :             continue;
     550             :         }
     551             : 
     552             :         /* Prewarm buffer. */
     553         354 :         buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
     554             :                                  NULL);
     555         354 :         if (BufferIsValid(buf))
     556             :         {
     557         354 :             apw_state->prewarmed_blocks++;
     558         354 :             ReleaseBuffer(buf);
     559             :         }
     560             : 
     561         354 :         old_blk = blk;
     562             :     }
     563             : 
     564           2 :     dsm_detach(seg);
     565             : 
     566             :     /* Release lock on previous relation. */
     567           2 :     if (rel)
     568             :     {
     569           2 :         relation_close(rel, AccessShareLock);
     570           2 :         CommitTransactionCommand();
     571             :     }
     572           2 : }
     573             : 
     574             : /*
     575             :  * Dump information on blocks in shared buffers.  We use a text format here
     576             :  * so that it's easy to understand and even change the file contents if
     577             :  * necessary.
     578             :  * Returns the number of blocks dumped.
     579             :  */
     580             : static int
     581           6 : apw_dump_now(bool is_bgworker, bool dump_unlogged)
     582             : {
     583             :     int         num_blocks;
     584             :     int         i;
     585             :     int         ret;
     586             :     BlockInfoRecord *block_info_array;
     587             :     BufferDesc *bufHdr;
     588             :     FILE       *file;
     589             :     char        transient_dump_file_path[MAXPGPATH];
     590             :     pid_t       pid;
     591             : 
     592           6 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     593           6 :     pid = apw_state->pid_using_dumpfile;
     594           6 :     if (apw_state->pid_using_dumpfile == InvalidPid)
     595           6 :         apw_state->pid_using_dumpfile = MyProcPid;
     596           6 :     LWLockRelease(&apw_state->lock);
     597             : 
     598           6 :     if (pid != InvalidPid)
     599             :     {
     600           0 :         if (!is_bgworker)
     601           0 :             ereport(ERROR,
     602             :                     (errmsg("could not perform block dump because dump file is being used by PID %d",
     603             :                             (int) apw_state->pid_using_dumpfile)));
     604             : 
     605           0 :         ereport(LOG,
     606             :                 (errmsg("skipping block dump because it is already being performed by PID %d",
     607             :                         (int) apw_state->pid_using_dumpfile)));
     608           0 :         return 0;
     609             :     }
     610             : 
     611             :     block_info_array =
     612           6 :         (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
     613             : 
     614       98310 :     for (num_blocks = 0, i = 0; i < NBuffers; i++)
     615             :     {
     616             :         uint32      buf_state;
     617             : 
     618       98304 :         CHECK_FOR_INTERRUPTS();
     619             : 
     620       98304 :         bufHdr = GetBufferDescriptor(i);
     621             : 
     622             :         /* Lock each buffer header before inspecting. */
     623       98304 :         buf_state = LockBufHdr(bufHdr);
     624             : 
     625             :         /*
     626             :          * Unlogged tables will be automatically truncated after a crash or
     627             :          * unclean shutdown. In such cases we need not prewarm them. Dump them
     628             :          * only if requested by caller.
     629             :          */
     630       98304 :         if (buf_state & BM_TAG_VALID &&
     631        1062 :             ((buf_state & BM_PERMANENT) || dump_unlogged))
     632             :         {
     633        1062 :             block_info_array[num_blocks].database = bufHdr->tag.dbOid;
     634        1062 :             block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid;
     635        2124 :             block_info_array[num_blocks].filenumber =
     636        1062 :                 BufTagGetRelNumber(&bufHdr->tag);
     637        2124 :             block_info_array[num_blocks].forknum =
     638        1062 :                 BufTagGetForkNum(&bufHdr->tag);
     639        1062 :             block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
     640        1062 :             ++num_blocks;
     641             :         }
     642             : 
     643       98304 :         UnlockBufHdr(bufHdr, buf_state);
     644             :     }
     645             : 
     646           6 :     snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
     647           6 :     file = AllocateFile(transient_dump_file_path, "w");
     648           6 :     if (!file)
     649           0 :         ereport(ERROR,
     650             :                 (errcode_for_file_access(),
     651             :                  errmsg("could not open file \"%s\": %m",
     652             :                         transient_dump_file_path)));
     653             : 
     654           6 :     ret = fprintf(file, "<<%d>>\n", num_blocks);
     655           6 :     if (ret < 0)
     656             :     {
     657           0 :         int         save_errno = errno;
     658             : 
     659           0 :         FreeFile(file);
     660           0 :         unlink(transient_dump_file_path);
     661           0 :         errno = save_errno;
     662           0 :         ereport(ERROR,
     663             :                 (errcode_for_file_access(),
     664             :                  errmsg("could not write to file \"%s\": %m",
     665             :                         transient_dump_file_path)));
     666             :     }
     667             : 
     668        1068 :     for (i = 0; i < num_blocks; i++)
     669             :     {
     670        1062 :         CHECK_FOR_INTERRUPTS();
     671             : 
     672        1062 :         ret = fprintf(file, "%u,%u,%u,%u,%u\n",
     673        1062 :                       block_info_array[i].database,
     674        1062 :                       block_info_array[i].tablespace,
     675        1062 :                       block_info_array[i].filenumber,
     676        1062 :                       (uint32) block_info_array[i].forknum,
     677        1062 :                       block_info_array[i].blocknum);
     678        1062 :         if (ret < 0)
     679             :         {
     680           0 :             int         save_errno = errno;
     681             : 
     682           0 :             FreeFile(file);
     683           0 :             unlink(transient_dump_file_path);
     684           0 :             errno = save_errno;
     685           0 :             ereport(ERROR,
     686             :                     (errcode_for_file_access(),
     687             :                      errmsg("could not write to file \"%s\": %m",
     688             :                             transient_dump_file_path)));
     689             :         }
     690             :     }
     691             : 
     692           6 :     pfree(block_info_array);
     693             : 
     694             :     /*
     695             :      * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
     696             :      * permanent.
     697             :      */
     698           6 :     ret = FreeFile(file);
     699           6 :     if (ret != 0)
     700             :     {
     701           0 :         int         save_errno = errno;
     702             : 
     703           0 :         unlink(transient_dump_file_path);
     704           0 :         errno = save_errno;
     705           0 :         ereport(ERROR,
     706             :                 (errcode_for_file_access(),
     707             :                  errmsg("could not close file \"%s\": %m",
     708             :                         transient_dump_file_path)));
     709             :     }
     710             : 
     711           6 :     (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
     712           6 :     apw_state->pid_using_dumpfile = InvalidPid;
     713             : 
     714           6 :     ereport(DEBUG1,
     715             :             (errmsg_internal("wrote block details for %d blocks", num_blocks)));
     716           6 :     return num_blocks;
     717             : }
     718             : 
     719             : /*
     720             :  * SQL-callable function to launch autoprewarm.
     721             :  */
     722             : Datum
     723           0 : autoprewarm_start_worker(PG_FUNCTION_ARGS)
     724             : {
     725             :     pid_t       pid;
     726             : 
     727           0 :     if (!autoprewarm)
     728           0 :         ereport(ERROR,
     729             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     730             :                  errmsg("autoprewarm is disabled")));
     731             : 
     732           0 :     apw_init_shmem();
     733           0 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     734           0 :     pid = apw_state->bgworker_pid;
     735           0 :     LWLockRelease(&apw_state->lock);
     736             : 
     737           0 :     if (pid != InvalidPid)
     738           0 :         ereport(ERROR,
     739             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     740             :                  errmsg("autoprewarm worker is already running under PID %d",
     741             :                         (int) pid)));
     742             : 
     743           0 :     apw_start_leader_worker();
     744             : 
     745           0 :     PG_RETURN_VOID();
     746             : }
     747             : 
     748             : /*
     749             :  * SQL-callable function to perform an immediate block dump.
     750             :  *
     751             :  * Note: this is declared to return int8, as insurance against some
     752             :  * very distant day when we might make NBuffers wider than int.
     753             :  */
     754             : Datum
     755           2 : autoprewarm_dump_now(PG_FUNCTION_ARGS)
     756             : {
     757             :     int         num_blocks;
     758             : 
     759           2 :     apw_init_shmem();
     760             : 
     761           2 :     PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
     762             :     {
     763           2 :         num_blocks = apw_dump_now(false, true);
     764             :     }
     765           2 :     PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
     766             : 
     767           2 :     PG_RETURN_INT64((int64) num_blocks);
     768             : }
     769             : 
     770             : /*
     771             :  * Allocate and initialize autoprewarm related shared memory, if not already
     772             :  * done, and set up backend-local pointer to that state.  Returns true if an
     773             :  * existing shared memory segment was found.
     774             :  */
     775             : static bool
     776           8 : apw_init_shmem(void)
     777             : {
     778             :     bool        found;
     779             : 
     780           8 :     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
     781           8 :     apw_state = ShmemInitStruct("autoprewarm",
     782             :                                 sizeof(AutoPrewarmSharedState),
     783             :                                 &found);
     784           8 :     if (!found)
     785             :     {
     786             :         /* First time through ... */
     787           4 :         LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
     788           4 :         apw_state->bgworker_pid = InvalidPid;
     789           4 :         apw_state->pid_using_dumpfile = InvalidPid;
     790             :     }
     791           8 :     LWLockRelease(AddinShmemInitLock);
     792             : 
     793           8 :     LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
     794             : 
     795           8 :     return found;
     796             : }
     797             : 
     798             : /*
     799             :  * Clear our PID from autoprewarm shared state.
     800             :  */
     801             : static void
     802           4 : apw_detach_shmem(int code, Datum arg)
     803             : {
     804           4 :     LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
     805           4 :     if (apw_state->pid_using_dumpfile == MyProcPid)
     806           0 :         apw_state->pid_using_dumpfile = InvalidPid;
     807           4 :     if (apw_state->bgworker_pid == MyProcPid)
     808           4 :         apw_state->bgworker_pid = InvalidPid;
     809           4 :     LWLockRelease(&apw_state->lock);
     810           4 : }
     811             : 
     812             : /*
     813             :  * Start autoprewarm leader worker process.
     814             :  */
     815             : static void
     816           4 : apw_start_leader_worker(void)
     817             : {
     818             :     BackgroundWorker worker;
     819             :     BackgroundWorkerHandle *handle;
     820             :     BgwHandleStatus status;
     821             :     pid_t       pid;
     822             : 
     823           4 :     MemSet(&worker, 0, sizeof(BackgroundWorker));
     824           4 :     worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
     825           4 :     worker.bgw_start_time = BgWorkerStart_ConsistentState;
     826           4 :     strcpy(worker.bgw_library_name, "pg_prewarm");
     827           4 :     strcpy(worker.bgw_function_name, "autoprewarm_main");
     828           4 :     strcpy(worker.bgw_name, "autoprewarm leader");
     829           4 :     strcpy(worker.bgw_type, "autoprewarm leader");
     830             : 
     831           4 :     if (process_shared_preload_libraries_in_progress)
     832             :     {
     833           4 :         RegisterBackgroundWorker(&worker);
     834           4 :         return;
     835             :     }
     836             : 
     837             :     /* must set notify PID to wait for startup */
     838           0 :     worker.bgw_notify_pid = MyProcPid;
     839             : 
     840           0 :     if (!RegisterDynamicBackgroundWorker(&worker, &handle))
     841           0 :         ereport(ERROR,
     842             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     843             :                  errmsg("could not register background process"),
     844             :                  errhint("You may need to increase max_worker_processes.")));
     845             : 
     846           0 :     status = WaitForBackgroundWorkerStartup(handle, &pid);
     847           0 :     if (status != BGWH_STARTED)
     848           0 :         ereport(ERROR,
     849             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     850             :                  errmsg("could not start background process"),
     851             :                  errhint("More details may be available in the server log.")));
     852             : }
     853             : 
     854             : /*
     855             :  * Start autoprewarm per-database worker process.
     856             :  */
     857             : static void
     858           2 : apw_start_database_worker(void)
     859             : {
     860             :     BackgroundWorker worker;
     861             :     BackgroundWorkerHandle *handle;
     862             : 
     863           2 :     MemSet(&worker, 0, sizeof(BackgroundWorker));
     864           2 :     worker.bgw_flags =
     865             :         BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
     866           2 :     worker.bgw_start_time = BgWorkerStart_ConsistentState;
     867           2 :     worker.bgw_restart_time = BGW_NEVER_RESTART;
     868           2 :     strcpy(worker.bgw_library_name, "pg_prewarm");
     869           2 :     strcpy(worker.bgw_function_name, "autoprewarm_database_main");
     870           2 :     strcpy(worker.bgw_name, "autoprewarm worker");
     871           2 :     strcpy(worker.bgw_type, "autoprewarm worker");
     872             : 
     873             :     /* must set notify PID to wait for shutdown */
     874           2 :     worker.bgw_notify_pid = MyProcPid;
     875             : 
     876           2 :     if (!RegisterDynamicBackgroundWorker(&worker, &handle))
     877           0 :         ereport(ERROR,
     878             :                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
     879             :                  errmsg("registering dynamic bgworker autoprewarm failed"),
     880             :                  errhint("Consider increasing configuration parameter max_worker_processes.")));
     881             : 
     882             :     /*
     883             :      * Ignore return value; if it fails, postmaster has died, but we have
     884             :      * checks for that elsewhere.
     885             :      */
     886           2 :     WaitForBackgroundWorkerShutdown(handle);
     887           2 : }
     888             : 
     889             : /* Compare member elements to check whether they are not equal. */
     890             : #define cmp_member_elem(fld)    \
     891             : do { \
     892             :     if (a->fld < b->fld)       \
     893             :         return -1;              \
     894             :     else if (a->fld > b->fld)  \
     895             :         return 1;               \
     896             : } while(0)
     897             : 
     898             : /*
     899             :  * apw_compare_blockinfo
     900             :  *
     901             :  * We depend on all records for a particular database being consecutive
     902             :  * in the dump file; each per-database worker will preload blocks until
     903             :  * it sees a block for some other database.  Sorting by tablespace,
     904             :  * filenumber, forknum, and blocknum isn't critical for correctness, but
     905             :  * helps us get a sequential I/O pattern.
     906             :  */
     907             : static int
     908        2722 : apw_compare_blockinfo(const void *p, const void *q)
     909             : {
     910        2722 :     const BlockInfoRecord *a = (const BlockInfoRecord *) p;
     911        2722 :     const BlockInfoRecord *b = (const BlockInfoRecord *) q;
     912             : 
     913        2722 :     cmp_member_elem(database);
     914        2620 :     cmp_member_elem(tablespace);
     915        2620 :     cmp_member_elem(filenumber);
     916         908 :     cmp_member_elem(forknum);
     917         730 :     cmp_member_elem(blocknum);
     918             : 
     919           0 :     return 0;
     920             : }

Generated by: LCOV version 1.14