Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pmchild.c 4 : * Functions for keeping track of postmaster child processes. 5 : * 6 : * Postmaster keeps track of all child processes so that when a process exits, 7 : * it knows what kind of a process it was and can clean up accordingly. Every 8 : * child process is allocated a PMChild struct from a fixed pool of structs. 9 : * The size of the pool is determined by various settings that configure how 10 : * many worker processes and backend connections are allowed, i.e. 11 : * autovacuum_worker_slots, max_worker_processes, max_wal_senders, and 12 : * max_connections. 13 : * 14 : * Dead-end backends are handled slightly differently. There is no limit 15 : * on the number of dead-end backends, and they do not need unique IDs, so 16 : * their PMChild structs are allocated dynamically, not from a pool. 17 : * 18 : * The structures and functions in this file are private to the postmaster 19 : * process. But note that there is an array in shared memory, managed by 20 : * pmsignal.c, that mirrors this. 21 : * 22 : * 23 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 24 : * Portions Copyright (c) 1994, Regents of the University of California 25 : * 26 : * IDENTIFICATION 27 : * src/backend/postmaster/pmchild.c 28 : * 29 : *------------------------------------------------------------------------- 30 : */ 31 : 32 : #include "postgres.h" 33 : 34 : #include "miscadmin.h" 35 : #include "postmaster/autovacuum.h" 36 : #include "postmaster/postmaster.h" 37 : #include "replication/walsender.h" 38 : #include "storage/pmsignal.h" 39 : #include "storage/proc.h" 40 : 41 : /* 42 : * Freelists for different kinds of child processes. We maintain separate 43 : * pools for each, so that for example launching a lot of regular backends 44 : * cannot prevent autovacuum or an aux process from launching. 45 : */ 46 : typedef struct PMChildPool 47 : { 48 : int size; /* number of PMChild slots reserved for this 49 : * kind of processes */ 50 : int first_slotno; /* first slot belonging to this pool */ 51 : dlist_head freelist; /* currently unused PMChild entries */ 52 : } PMChildPool; 53 : 54 : static PMChildPool pmchild_pools[BACKEND_NUM_TYPES]; 55 : NON_EXEC_STATIC int num_pmchild_slots = 0; 56 : 57 : /* 58 : * List of active child processes. This includes dead-end children. 59 : */ 60 : dlist_head ActiveChildList; 61 : 62 : /* 63 : * Dummy pointer to persuade Valgrind that we've not leaked the array of 64 : * PMChild structs. Make it global to ensure the compiler doesn't 65 : * optimize it away. 66 : */ 67 : #ifdef USE_VALGRIND 68 : extern PMChild *pmchild_array; 69 : PMChild *pmchild_array; 70 : #endif 71 : 72 : 73 : /* 74 : * MaxLivePostmasterChildren 75 : * 76 : * This reports the number of postmaster child processes that can be active. 77 : * It includes all children except for dead-end children. This allows the 78 : * array in shared memory (PMChildFlags) to have a fixed maximum size. 79 : */ 80 : int 81 10460 : MaxLivePostmasterChildren(void) 82 : { 83 10460 : if (num_pmchild_slots == 0) 84 0 : elog(ERROR, "PM child array not initialized yet"); 85 10460 : return num_pmchild_slots; 86 : } 87 : 88 : /* 89 : * Initialize at postmaster startup 90 : * 91 : * Note: This is not called on crash restart. We rely on PMChild entries to 92 : * remain valid through the restart process. This is important because the 93 : * syslogger survives through the crash restart process, so we must not 94 : * invalidate its PMChild slot. 95 : */ 96 : void 97 2148 : InitPostmasterChildSlots(void) 98 : { 99 : int slotno; 100 : PMChild *slots; 101 : 102 : /* 103 : * We allow more connections here than we can have backends because some 104 : * might still be authenticating; they might fail auth, or some existing 105 : * backend might exit before the auth cycle is completed. The exact 106 : * MaxConnections limit is enforced when a new backend tries to join the 107 : * PGPROC array. 108 : * 109 : * WAL senders start out as regular backends, so they share the same pool. 110 : */ 111 2148 : pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders); 112 : 113 2148 : pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_worker_slots; 114 2148 : pmchild_pools[B_BG_WORKER].size = max_worker_processes; 115 2148 : pmchild_pools[B_IO_WORKER].size = MAX_IO_WORKERS; 116 : 117 : /* 118 : * There can be only one of each of these running at a time. They each 119 : * get their own pool of just one entry. 120 : */ 121 2148 : pmchild_pools[B_AUTOVAC_LAUNCHER].size = 1; 122 2148 : pmchild_pools[B_SLOTSYNC_WORKER].size = 1; 123 2148 : pmchild_pools[B_ARCHIVER].size = 1; 124 2148 : pmchild_pools[B_BG_WRITER].size = 1; 125 2148 : pmchild_pools[B_CHECKPOINTER].size = 1; 126 2148 : pmchild_pools[B_STARTUP].size = 1; 127 2148 : pmchild_pools[B_WAL_RECEIVER].size = 1; 128 2148 : pmchild_pools[B_WAL_SUMMARIZER].size = 1; 129 2148 : pmchild_pools[B_WAL_WRITER].size = 1; 130 2148 : pmchild_pools[B_LOGGER].size = 1; 131 : 132 : /* The rest of the pmchild_pools are left at zero size */ 133 : 134 : /* Count the total number of slots */ 135 2148 : num_pmchild_slots = 0; 136 40812 : for (int i = 0; i < BACKEND_NUM_TYPES; i++) 137 38664 : num_pmchild_slots += pmchild_pools[i].size; 138 : 139 : /* Allocate enough slots, and make sure Valgrind doesn't complain */ 140 2148 : slots = palloc(num_pmchild_slots * sizeof(PMChild)); 141 : #ifdef USE_VALGRIND 142 : pmchild_array = slots; 143 : #endif 144 : 145 : /* Initialize them */ 146 2148 : slotno = 0; 147 40812 : for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++) 148 : { 149 38664 : pmchild_pools[btype].first_slotno = slotno + 1; 150 38664 : dlist_init(&pmchild_pools[btype].freelist); 151 : 152 475650 : for (int j = 0; j < pmchild_pools[btype].size; j++) 153 : { 154 436986 : slots[slotno].pid = 0; 155 436986 : slots[slotno].child_slot = slotno + 1; 156 436986 : slots[slotno].bkend_type = B_INVALID; 157 436986 : slots[slotno].rw = NULL; 158 436986 : slots[slotno].bgworker_notify = false; 159 436986 : dlist_push_tail(&pmchild_pools[btype].freelist, &slots[slotno].elem); 160 436986 : slotno++; 161 : } 162 : } 163 : Assert(slotno == num_pmchild_slots); 164 : 165 : /* Initialize other structures */ 166 2148 : dlist_init(&ActiveChildList); 167 2148 : } 168 : 169 : /* 170 : * Allocate a PMChild entry for a postmaster child process of given type. 171 : * 172 : * The entry is taken from the right pool for the type. 173 : * 174 : * pmchild->child_slot in the returned struct is unique among all active child 175 : * processes. 176 : */ 177 : PMChild * 178 50158 : AssignPostmasterChildSlot(BackendType btype) 179 : { 180 : dlist_head *freelist; 181 : PMChild *pmchild; 182 : 183 50158 : if (pmchild_pools[btype].size == 0) 184 0 : elog(ERROR, "cannot allocate a PMChild slot for backend type %d", btype); 185 : 186 50158 : freelist = &pmchild_pools[btype].freelist; 187 50158 : if (dlist_is_empty(freelist)) 188 56 : return NULL; 189 : 190 50102 : pmchild = dlist_container(PMChild, elem, dlist_pop_head_node(freelist)); 191 50102 : pmchild->pid = 0; 192 50102 : pmchild->bkend_type = btype; 193 50102 : pmchild->rw = NULL; 194 50102 : pmchild->bgworker_notify = true; 195 : 196 : /* 197 : * pmchild->child_slot for each entry was initialized when the array of 198 : * slots was allocated. Sanity check it. 199 : */ 200 50102 : if (!(pmchild->child_slot >= pmchild_pools[btype].first_slotno && 201 50102 : pmchild->child_slot < pmchild_pools[btype].first_slotno + pmchild_pools[btype].size)) 202 : { 203 0 : elog(ERROR, "pmchild freelist for backend type %d is corrupt", 204 : pmchild->bkend_type); 205 : } 206 : 207 50102 : dlist_push_head(&ActiveChildList, &pmchild->elem); 208 : 209 : /* Update the status in the shared memory array */ 210 50102 : MarkPostmasterChildSlotAssigned(pmchild->child_slot); 211 : 212 50102 : elog(DEBUG2, "assigned pm child slot %d for %s", 213 : pmchild->child_slot, PostmasterChildName(btype)); 214 : 215 50102 : return pmchild; 216 : } 217 : 218 : /* 219 : * Allocate a PMChild struct for a dead-end backend. Dead-end children are 220 : * not assigned a child_slot number. The struct is palloc'd; returns NULL if 221 : * out of memory. 222 : */ 223 : PMChild * 224 676 : AllocDeadEndChild(void) 225 : { 226 : PMChild *pmchild; 227 : 228 676 : elog(DEBUG2, "allocating dead-end child"); 229 : 230 676 : pmchild = (PMChild *) palloc_extended(sizeof(PMChild), MCXT_ALLOC_NO_OOM); 231 676 : if (pmchild) 232 : { 233 676 : pmchild->pid = 0; 234 676 : pmchild->child_slot = 0; 235 676 : pmchild->bkend_type = B_DEAD_END_BACKEND; 236 676 : pmchild->rw = NULL; 237 676 : pmchild->bgworker_notify = false; 238 : 239 676 : dlist_push_head(&ActiveChildList, &pmchild->elem); 240 : } 241 : 242 676 : return pmchild; 243 : } 244 : 245 : /* 246 : * Release a PMChild slot, after the child process has exited. 247 : * 248 : * Returns true if the child detached cleanly from shared memory, false 249 : * otherwise (see MarkPostmasterChildSlotUnassigned). 250 : */ 251 : bool 252 50766 : ReleasePostmasterChildSlot(PMChild *pmchild) 253 : { 254 50766 : dlist_delete(&pmchild->elem); 255 50766 : if (pmchild->bkend_type == B_DEAD_END_BACKEND) 256 : { 257 676 : elog(DEBUG2, "releasing dead-end backend"); 258 676 : pfree(pmchild); 259 676 : return true; 260 : } 261 : else 262 : { 263 : PMChildPool *pool; 264 : 265 50090 : elog(DEBUG2, "releasing pm child slot %d", pmchild->child_slot); 266 : 267 : /* WAL senders start out as regular backends, and share the pool */ 268 50090 : if (pmchild->bkend_type == B_WAL_SENDER) 269 74 : pool = &pmchild_pools[B_BACKEND]; 270 : else 271 50016 : pool = &pmchild_pools[pmchild->bkend_type]; 272 : 273 : /* sanity check that we return the entry to the right pool */ 274 50090 : if (!(pmchild->child_slot >= pool->first_slotno && 275 50090 : pmchild->child_slot < pool->first_slotno + pool->size)) 276 : { 277 0 : elog(ERROR, "pmchild freelist for backend type %d is corrupt", 278 : pmchild->bkend_type); 279 : } 280 : 281 50090 : dlist_push_head(&pool->freelist, &pmchild->elem); 282 50090 : return MarkPostmasterChildSlotUnassigned(pmchild->child_slot); 283 : } 284 : } 285 : 286 : /* 287 : * Find the PMChild entry of a running child process by PID. 288 : */ 289 : PMChild * 290 36988 : FindPostmasterChildByPid(int pid) 291 : { 292 : dlist_iter iter; 293 : 294 104746 : dlist_foreach(iter, &ActiveChildList) 295 : { 296 104746 : PMChild *bp = dlist_container(PMChild, elem, iter.cur); 297 : 298 104746 : if (bp->pid == pid) 299 36988 : return bp; 300 : } 301 0 : return NULL; 302 : }