Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pmsignal.c
4 : * routines for signaling between the postmaster and its child processes
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/pmsignal.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <signal.h>
18 : #include <unistd.h>
19 :
20 : #ifdef HAVE_SYS_PRCTL_H
21 : #include <sys/prctl.h>
22 : #endif
23 :
24 : #include "miscadmin.h"
25 : #include "postmaster/postmaster.h"
26 : #include "replication/walsender.h"
27 : #include "storage/ipc.h"
28 : #include "storage/pmsignal.h"
29 : #include "storage/shmem.h"
30 : #include "utils/memutils.h"
31 :
32 :
33 : /*
34 : * The postmaster is signaled by its children by sending SIGUSR1. The
35 : * specific reason is communicated via flags in shared memory. We keep
36 : * a boolean flag for each possible "reason", so that different reasons
37 : * can be signaled by different backends at the same time. (However,
38 : * if the same reason is signaled more than once simultaneously, the
39 : * postmaster will observe it only once.)
40 : *
41 : * The flags are actually declared as "volatile sig_atomic_t" for maximum
42 : * portability. This should ensure that loads and stores of the flag
43 : * values are atomic, allowing us to dispense with any explicit locking.
44 : *
45 : * In addition to the per-reason flags, we store a set of per-child-process
46 : * flags that are currently used only for detecting whether a backend has
47 : * exited without performing proper shutdown. The per-child-process flags
48 : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
49 : * available for assignment. An ASSIGNED slot is associated with a postmaster
50 : * child process, but either the process has not touched shared memory yet, or
51 : * it has successfully cleaned up after itself. An ACTIVE slot means the
52 : * process is actively using shared memory. The slots are assigned to child
53 : * processes by postmaster, and pmchild.c is responsible for tracking which
54 : * one goes with which PID.
55 : *
56 : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
57 : * but carries the extra information that the child is a WAL sender.
58 : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
59 : * start streaming the WAL (and they never go back to ACTIVE after that).
60 : *
61 : * We also have a shared-memory field that is used for communication in
62 : * the opposite direction, from postmaster to children: it tells why the
63 : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
64 : */
65 :
66 : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
67 : #define PM_CHILD_ASSIGNED 1
68 : #define PM_CHILD_ACTIVE 2
69 : #define PM_CHILD_WALSENDER 3
70 :
71 : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
72 : struct PMSignalData
73 : {
74 : /* per-reason flags for signaling the postmaster */
75 : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
76 : /* global flags for signals from postmaster to children */
77 : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
78 : /* per-child-process flags */
79 : int num_child_flags; /* # of entries in PMChildFlags[] */
80 : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
81 : };
82 :
83 : /* PMSignalState pointer is valid in both postmaster and child processes */
84 : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
85 :
86 : /*
87 : * Local copy of PMSignalState->num_child_flags, only valid in the
88 : * postmaster. Postmaster keeps a local copy so that it doesn't need to
89 : * trust the value in shared memory.
90 : */
91 : static int num_child_flags;
92 :
93 : /*
94 : * Signal handler to be notified if postmaster dies.
95 : */
96 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
97 : volatile sig_atomic_t postmaster_possibly_dead = false;
98 :
99 : static void
100 32 : postmaster_death_handler(SIGNAL_ARGS)
101 : {
102 32 : postmaster_possibly_dead = true;
103 32 : }
104 :
105 : /*
106 : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
107 : * used for other things, so choose another one.
108 : *
109 : * Currently, we assume that we can always find a signal to use. That
110 : * seems like a reasonable assumption for all platforms that are modern
111 : * enough to have a parent-death signaling mechanism.
112 : */
113 : #if defined(SIGINFO)
114 : #define POSTMASTER_DEATH_SIGNAL SIGINFO
115 : #elif defined(SIGPWR)
116 : #define POSTMASTER_DEATH_SIGNAL SIGPWR
117 : #else
118 : #error "cannot find a signal to use for postmaster death"
119 : #endif
120 :
121 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
122 :
123 : static void MarkPostmasterChildInactive(int code, Datum arg);
124 :
125 : /*
126 : * PMSignalShmemSize
127 : * Compute space needed for pmsignal.c's shared memory
128 : */
129 : Size
130 7402 : PMSignalShmemSize(void)
131 : {
132 : Size size;
133 :
134 7402 : size = offsetof(PMSignalData, PMChildFlags);
135 7402 : size = add_size(size, mul_size(MaxLivePostmasterChildren(),
136 : sizeof(sig_atomic_t)));
137 :
138 7402 : return size;
139 : }
140 :
141 : /*
142 : * PMSignalShmemInit - initialize during shared-memory creation
143 : */
144 : void
145 1918 : PMSignalShmemInit(void)
146 : {
147 : bool found;
148 :
149 1918 : PMSignalState = (PMSignalData *)
150 1918 : ShmemInitStruct("PMSignalState", PMSignalShmemSize(), &found);
151 :
152 1918 : if (!found)
153 : {
154 : /* initialize all flags to zeroes */
155 75694 : MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
156 1918 : num_child_flags = MaxLivePostmasterChildren();
157 1918 : PMSignalState->num_child_flags = num_child_flags;
158 : }
159 1918 : }
160 :
161 : /*
162 : * SendPostmasterSignal - signal the postmaster from a child process
163 : */
164 : void
165 208604 : SendPostmasterSignal(PMSignalReason reason)
166 : {
167 : /* If called in a standalone backend, do nothing */
168 208604 : if (!IsUnderPostmaster)
169 0 : return;
170 : /* Atomically set the proper flag */
171 208604 : PMSignalState->PMSignalFlags[reason] = true;
172 : /* Send signal to postmaster */
173 208604 : kill(PostmasterPid, SIGUSR1);
174 : }
175 :
176 : /*
177 : * CheckPostmasterSignal - check to see if a particular reason has been
178 : * signaled, and clear the signal flag. Should be called by postmaster
179 : * after receiving SIGUSR1.
180 : */
181 : bool
182 1267742 : CheckPostmasterSignal(PMSignalReason reason)
183 : {
184 : /* Careful here --- don't clear flag if we haven't seen it set */
185 1267742 : if (PMSignalState->PMSignalFlags[reason])
186 : {
187 181482 : PMSignalState->PMSignalFlags[reason] = false;
188 181482 : return true;
189 : }
190 1086260 : return false;
191 : }
192 :
193 : /*
194 : * SetQuitSignalReason - broadcast the reason for a system shutdown.
195 : * Should be called by postmaster before sending SIGQUIT to children.
196 : *
197 : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
198 : * as a part of rebuilding shared memory; the postmaster need not do it
199 : * explicitly.
200 : */
201 : void
202 640 : SetQuitSignalReason(QuitSignalReason reason)
203 : {
204 640 : PMSignalState->sigquit_reason = reason;
205 640 : }
206 :
207 : /*
208 : * GetQuitSignalReason - obtain the reason for a system shutdown.
209 : * Called by child processes when they receive SIGQUIT.
210 : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
211 : */
212 : QuitSignalReason
213 0 : GetQuitSignalReason(void)
214 : {
215 : /* This is called in signal handlers, so be extra paranoid. */
216 0 : if (!IsUnderPostmaster || PMSignalState == NULL)
217 0 : return PMQUIT_NOT_SENT;
218 0 : return PMSignalState->sigquit_reason;
219 : }
220 :
221 :
222 : /*
223 : * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a
224 : * new postmaster child process.
225 : *
226 : * Only the postmaster is allowed to execute this routine, so we need no
227 : * special locking.
228 : */
229 : void
230 40680 : MarkPostmasterChildSlotAssigned(int slot)
231 : {
232 : Assert(slot > 0 && slot <= num_child_flags);
233 40680 : slot--;
234 :
235 40680 : if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED)
236 0 : elog(FATAL, "postmaster child slot is already in use");
237 :
238 40680 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
239 40680 : }
240 :
241 : /*
242 : * MarkPostmasterChildSlotUnassigned - release a slot after death of a
243 : * postmaster child process. This must be called in the postmaster process.
244 : *
245 : * Returns true if the slot had been in ASSIGNED state (the expected case),
246 : * false otherwise (implying that the child failed to clean itself up).
247 : */
248 : bool
249 40642 : MarkPostmasterChildSlotUnassigned(int slot)
250 : {
251 : bool result;
252 :
253 : Assert(slot > 0 && slot <= num_child_flags);
254 40642 : slot--;
255 :
256 : /*
257 : * Note: the slot state might already be unused, because the logic in
258 : * postmaster.c is such that this might get called twice when a child
259 : * crashes. So we don't try to Assert anything about the state.
260 : */
261 40642 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
262 40642 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
263 40642 : return result;
264 : }
265 :
266 : /*
267 : * IsPostmasterChildWalSender - check if given slot is in use by a
268 : * walsender process. This is called only by the postmaster.
269 : */
270 : bool
271 2818 : IsPostmasterChildWalSender(int slot)
272 : {
273 : Assert(slot > 0 && slot <= num_child_flags);
274 2818 : slot--;
275 :
276 2818 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
277 168 : return true;
278 : else
279 2650 : return false;
280 : }
281 :
282 : /*
283 : * RegisterPostmasterChildActive - mark a postmaster child as about to begin
284 : * actively using shared memory. This is called in the child process.
285 : *
286 : * This register an shmem exit hook to mark us as inactive again when the
287 : * process exits normally.
288 : */
289 : void
290 37094 : RegisterPostmasterChildActive(void)
291 : {
292 37094 : int slot = MyPMChildSlot;
293 :
294 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
295 37094 : slot--;
296 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
297 37094 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
298 :
299 : /* Arrange to clean up at exit. */
300 37094 : on_shmem_exit(MarkPostmasterChildInactive, 0);
301 37094 : }
302 :
303 : /*
304 : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
305 : * process. This is called in the child process, sometime after marking the
306 : * child as active.
307 : */
308 : void
309 2120 : MarkPostmasterChildWalSender(void)
310 : {
311 2120 : int slot = MyPMChildSlot;
312 :
313 : Assert(am_walsender);
314 :
315 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
316 2120 : slot--;
317 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
318 2120 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
319 2120 : }
320 :
321 : /*
322 : * MarkPostmasterChildInactive - mark a postmaster child as done using
323 : * shared memory. This is called in the child process.
324 : */
325 : static void
326 37094 : MarkPostmasterChildInactive(int code, Datum arg)
327 : {
328 37094 : int slot = MyPMChildSlot;
329 :
330 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
331 37094 : slot--;
332 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
333 : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
334 37094 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
335 37094 : }
336 :
337 :
338 : /*
339 : * PostmasterIsAliveInternal - check whether postmaster process is still alive
340 : *
341 : * This is the slow path of PostmasterIsAlive(), where the caller has already
342 : * checked 'postmaster_possibly_dead'. (On platforms that don't support
343 : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
344 : */
345 : bool
346 624 : PostmasterIsAliveInternal(void)
347 : {
348 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
349 : /*
350 : * Reset the flag before checking, so that we don't miss a signal if
351 : * postmaster dies right after the check. If postmaster was indeed dead,
352 : * we'll re-arm it before returning to caller.
353 : */
354 624 : postmaster_possibly_dead = false;
355 : #endif
356 :
357 : #ifndef WIN32
358 : {
359 : char c;
360 : ssize_t rc;
361 :
362 624 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
363 :
364 : /*
365 : * In the usual case, the postmaster is still alive, and there is no
366 : * data in the pipe.
367 : */
368 624 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
369 594 : return true;
370 : else
371 : {
372 : /*
373 : * Postmaster is dead, or something went wrong with the read()
374 : * call.
375 : */
376 :
377 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
378 30 : postmaster_possibly_dead = true;
379 : #endif
380 :
381 30 : if (rc < 0)
382 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
383 30 : else if (rc > 0)
384 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
385 :
386 30 : return false;
387 : }
388 : }
389 :
390 : #else /* WIN32 */
391 : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
392 : return true;
393 : else
394 : {
395 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
396 : postmaster_possibly_dead = true;
397 : #endif
398 : return false;
399 : }
400 : #endif /* WIN32 */
401 : }
402 :
403 : /*
404 : * PostmasterDeathSignalInit - request signal on postmaster death if possible
405 : */
406 : void
407 37406 : PostmasterDeathSignalInit(void)
408 : {
409 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
410 37406 : int signum = POSTMASTER_DEATH_SIGNAL;
411 :
412 : /* Register our signal handler. */
413 37406 : pqsignal(signum, postmaster_death_handler);
414 :
415 : /* Request a signal on parent exit. */
416 : #if defined(PR_SET_PDEATHSIG)
417 37406 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
418 0 : elog(ERROR, "could not request parent death signal: %m");
419 : #elif defined(PROC_PDEATHSIG_CTL)
420 : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
421 : elog(ERROR, "could not request parent death signal: %m");
422 : #else
423 : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
424 : #endif
425 :
426 : /*
427 : * Just in case the parent was gone already and we missed it, we'd better
428 : * check the slow way on the first call.
429 : */
430 37406 : postmaster_possibly_dead = true;
431 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
432 37406 : }
|