Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pmsignal.c
4 : * routines for signaling between the postmaster and its child processes
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/pmsignal.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <signal.h>
18 : #include <unistd.h>
19 :
20 : #ifdef HAVE_SYS_PRCTL_H
21 : #include <sys/prctl.h>
22 : #endif
23 :
24 : #include "miscadmin.h"
25 : #include "postmaster/postmaster.h"
26 : #include "replication/walsender.h"
27 : #include "storage/ipc.h"
28 : #include "storage/pmsignal.h"
29 : #include "storage/shmem.h"
30 : #include "storage/subsystems.h"
31 : #include "utils/memutils.h"
32 :
33 :
34 : /*
35 : * The postmaster is signaled by its children by sending SIGUSR1. The
36 : * specific reason is communicated via flags in shared memory. We keep
37 : * a boolean flag for each possible "reason", so that different reasons
38 : * can be signaled by different backends at the same time. (However,
39 : * if the same reason is signaled more than once simultaneously, the
40 : * postmaster will observe it only once.)
41 : *
42 : * The flags are actually declared as "volatile sig_atomic_t" for maximum
43 : * portability. This should ensure that loads and stores of the flag
44 : * values are atomic, allowing us to dispense with any explicit locking.
45 : *
46 : * In addition to the per-reason flags, we store a set of per-child-process
47 : * flags that are currently used only for detecting whether a backend has
48 : * exited without performing proper shutdown. The per-child-process flags
49 : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
50 : * available for assignment. An ASSIGNED slot is associated with a postmaster
51 : * child process, but either the process has not touched shared memory yet, or
52 : * it has successfully cleaned up after itself. An ACTIVE slot means the
53 : * process is actively using shared memory. The slots are assigned to child
54 : * processes by postmaster, and pmchild.c is responsible for tracking which
55 : * one goes with which PID.
56 : *
57 : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
58 : * but carries the extra information that the child is a WAL sender.
59 : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
60 : * start streaming the WAL (and they never go back to ACTIVE after that).
61 : *
62 : * We also have a shared-memory field that is used for communication in
63 : * the opposite direction, from postmaster to children: it tells why the
64 : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
65 : */
66 :
67 : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
68 : #define PM_CHILD_ASSIGNED 1
69 : #define PM_CHILD_ACTIVE 2
70 : #define PM_CHILD_WALSENDER 3
71 :
72 : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
73 : struct PMSignalData
74 : {
75 : /* per-reason flags for signaling the postmaster */
76 : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
77 : /* global flags for signals from postmaster to children */
78 : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
79 : /* per-child-process flags */
80 : int num_child_flags; /* # of entries in PMChildFlags[] */
81 : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
82 : };
83 :
84 : /* PMSignalState pointer is valid in both postmaster and child processes */
85 : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
86 :
87 : static void PMSignalShmemRequest(void *);
88 : static void PMSignalShmemInit(void *);
89 :
90 : const ShmemCallbacks PMSignalShmemCallbacks = {
91 : .request_fn = PMSignalShmemRequest,
92 : .init_fn = PMSignalShmemInit,
93 : };
94 :
95 : /*
96 : * Local copy of PMSignalState->num_child_flags, only valid in the
97 : * postmaster. Postmaster keeps a local copy so that it doesn't need to
98 : * trust the value in shared memory.
99 : */
100 : static int num_child_flags;
101 :
102 : /*
103 : * Signal handler to be notified if postmaster dies.
104 : */
105 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
106 : volatile sig_atomic_t postmaster_possibly_dead = false;
107 :
108 : static void
109 45 : postmaster_death_handler(SIGNAL_ARGS)
110 : {
111 45 : postmaster_possibly_dead = true;
112 45 : }
113 :
114 : /*
115 : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
116 : * used for other things, so choose another one.
117 : *
118 : * Currently, we assume that we can always find a signal to use. That
119 : * seems like a reasonable assumption for all platforms that are modern
120 : * enough to have a parent-death signaling mechanism.
121 : */
122 : #if defined(SIGINFO)
123 : #define POSTMASTER_DEATH_SIGNAL SIGINFO
124 : #elif defined(SIGPWR)
125 : #define POSTMASTER_DEATH_SIGNAL SIGPWR
126 : #else
127 : #error "cannot find a signal to use for postmaster death"
128 : #endif
129 :
130 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
131 :
132 : static void MarkPostmasterChildInactive(int code, Datum arg);
133 :
134 : /*
135 : * PMSignalShmemRequest - Register pmsignal.c's shared memory needs
136 : */
137 : static void
138 1233 : PMSignalShmemRequest(void *arg)
139 : {
140 : size_t size;
141 :
142 1233 : num_child_flags = MaxLivePostmasterChildren();
143 :
144 1233 : size = add_size(offsetof(PMSignalData, PMChildFlags),
145 : mul_size(num_child_flags, sizeof(sig_atomic_t)));
146 1233 : ShmemRequestStruct(.name = "PMSignalState",
147 : .size = size,
148 : .ptr = (void **) &PMSignalState,
149 : );
150 1233 : }
151 :
152 : static void
153 1230 : PMSignalShmemInit(void *arg)
154 : {
155 : Assert(PMSignalState);
156 : Assert(num_child_flags > 0);
157 1230 : PMSignalState->num_child_flags = num_child_flags;
158 1230 : }
159 :
160 : /*
161 : * SendPostmasterSignal - signal the postmaster from a child process
162 : */
163 : void
164 131242 : SendPostmasterSignal(PMSignalReason reason)
165 : {
166 : /* If called in a standalone backend, do nothing */
167 131242 : if (!IsUnderPostmaster)
168 0 : return;
169 : /* Atomically set the proper flag */
170 131242 : PMSignalState->PMSignalFlags[reason] = true;
171 : /* Send signal to postmaster */
172 131242 : kill(PostmasterPid, SIGUSR1);
173 : }
174 :
175 : /*
176 : * CheckPostmasterSignal - check to see if a particular reason has been
177 : * signaled, and clear the signal flag. Should be called by postmaster
178 : * after receiving SIGUSR1.
179 : */
180 : bool
181 1157815 : CheckPostmasterSignal(PMSignalReason reason)
182 : {
183 : /* Careful here --- don't clear flag if we haven't seen it set */
184 1157815 : if (PMSignalState->PMSignalFlags[reason])
185 : {
186 129295 : PMSignalState->PMSignalFlags[reason] = false;
187 129295 : return true;
188 : }
189 1028520 : return false;
190 : }
191 :
192 : /*
193 : * SetQuitSignalReason - broadcast the reason for a system shutdown.
194 : * Should be called by postmaster before sending SIGQUIT to children.
195 : *
196 : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
197 : * as a part of rebuilding shared memory; the postmaster need not do it
198 : * explicitly.
199 : */
200 : void
201 354 : SetQuitSignalReason(QuitSignalReason reason)
202 : {
203 354 : PMSignalState->sigquit_reason = reason;
204 354 : }
205 :
206 : /*
207 : * GetQuitSignalReason - obtain the reason for a system shutdown.
208 : * Called by child processes when they receive SIGQUIT.
209 : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
210 : */
211 : QuitSignalReason
212 0 : GetQuitSignalReason(void)
213 : {
214 : /* This is called in signal handlers, so be extra paranoid. */
215 0 : if (!IsUnderPostmaster || PMSignalState == NULL)
216 0 : return PMQUIT_NOT_SENT;
217 0 : return PMSignalState->sigquit_reason;
218 : }
219 :
220 :
221 : /*
222 : * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a
223 : * new postmaster child process.
224 : *
225 : * Only the postmaster is allowed to execute this routine, so we need no
226 : * special locking.
227 : */
228 : void
229 27636 : MarkPostmasterChildSlotAssigned(int slot)
230 : {
231 : Assert(slot > 0 && slot <= num_child_flags);
232 27636 : slot--;
233 :
234 27636 : if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED)
235 0 : elog(FATAL, "postmaster child slot is already in use");
236 :
237 27636 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
238 27636 : }
239 :
240 : /*
241 : * MarkPostmasterChildSlotUnassigned - release a slot after death of a
242 : * postmaster child process. This must be called in the postmaster process.
243 : *
244 : * Returns true if the slot had been in ASSIGNED state (the expected case),
245 : * false otherwise (implying that the child failed to clean itself up).
246 : */
247 : bool
248 27615 : MarkPostmasterChildSlotUnassigned(int slot)
249 : {
250 : bool result;
251 :
252 : Assert(slot > 0 && slot <= num_child_flags);
253 27615 : slot--;
254 :
255 : /*
256 : * Note: the slot state might already be unused, because the logic in
257 : * postmaster.c is such that this might get called twice when a child
258 : * crashes. So we don't try to Assert anything about the state.
259 : */
260 27615 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
261 27615 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
262 27615 : return result;
263 : }
264 :
265 : /*
266 : * IsPostmasterChildWalSender - check if given slot is in use by a
267 : * walsender process. This is called only by the postmaster.
268 : */
269 : bool
270 1333 : IsPostmasterChildWalSender(int slot)
271 : {
272 : Assert(slot > 0 && slot <= num_child_flags);
273 1333 : slot--;
274 :
275 1333 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
276 47 : return true;
277 : else
278 1286 : return false;
279 : }
280 :
281 : /*
282 : * RegisterPostmasterChildActive - mark a postmaster child as about to begin
283 : * actively using shared memory. This is called in the child process.
284 : *
285 : * This register an shmem exit hook to mark us as inactive again when the
286 : * process exits normally.
287 : */
288 : void
289 24677 : RegisterPostmasterChildActive(void)
290 : {
291 24677 : int slot = MyPMChildSlot;
292 :
293 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
294 24677 : slot--;
295 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
296 24677 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
297 :
298 : /* Arrange to clean up at exit. */
299 24677 : on_shmem_exit(MarkPostmasterChildInactive, 0);
300 24677 : }
301 :
302 : /*
303 : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
304 : * process. This is called in the child process, sometime after marking the
305 : * child as active.
306 : */
307 : void
308 1290 : MarkPostmasterChildWalSender(void)
309 : {
310 1290 : int slot = MyPMChildSlot;
311 :
312 : Assert(am_walsender);
313 :
314 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
315 1290 : slot--;
316 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
317 1290 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
318 1290 : }
319 :
320 : /*
321 : * MarkPostmasterChildInactive - mark a postmaster child as done using
322 : * shared memory. This is called in the child process.
323 : */
324 : static void
325 24677 : MarkPostmasterChildInactive(int code, Datum arg)
326 : {
327 24677 : int slot = MyPMChildSlot;
328 :
329 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
330 24677 : slot--;
331 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
332 : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
333 24677 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
334 24677 : }
335 :
336 :
337 : /*
338 : * PostmasterIsAliveInternal - check whether postmaster process is still alive
339 : *
340 : * This is the slow path of PostmasterIsAlive(), where the caller has already
341 : * checked 'postmaster_possibly_dead'. (On platforms that don't support
342 : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
343 : */
344 : bool
345 746 : PostmasterIsAliveInternal(void)
346 : {
347 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
348 : /*
349 : * Reset the flag before checking, so that we don't miss a signal if
350 : * postmaster dies right after the check. If postmaster was indeed dead,
351 : * we'll re-arm it before returning to caller.
352 : */
353 746 : postmaster_possibly_dead = false;
354 : #endif
355 :
356 : #ifndef WIN32
357 : {
358 : char c;
359 : ssize_t rc;
360 :
361 746 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
362 :
363 : /*
364 : * In the usual case, the postmaster is still alive, and there is no
365 : * data in the pipe.
366 : */
367 746 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
368 702 : return true;
369 : else
370 : {
371 : /*
372 : * Postmaster is dead, or something went wrong with the read()
373 : * call.
374 : */
375 :
376 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
377 44 : postmaster_possibly_dead = true;
378 : #endif
379 :
380 44 : if (rc < 0)
381 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
382 44 : else if (rc > 0)
383 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
384 :
385 44 : return false;
386 : }
387 : }
388 :
389 : #else /* WIN32 */
390 : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
391 : return true;
392 : else
393 : {
394 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
395 : postmaster_possibly_dead = true;
396 : #endif
397 : return false;
398 : }
399 : #endif /* WIN32 */
400 : }
401 :
402 : /*
403 : * PostmasterDeathSignalInit - request signal on postmaster death if possible
404 : */
405 : void
406 24961 : PostmasterDeathSignalInit(void)
407 : {
408 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
409 24961 : int signum = POSTMASTER_DEATH_SIGNAL;
410 :
411 : /* Register our signal handler. */
412 24961 : pqsignal(signum, postmaster_death_handler);
413 :
414 : /* Request a signal on parent exit. */
415 : #if defined(PR_SET_PDEATHSIG)
416 24961 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
417 0 : elog(ERROR, "could not request parent death signal: %m");
418 : #elif defined(PROC_PDEATHSIG_CTL)
419 : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
420 : elog(ERROR, "could not request parent death signal: %m");
421 : #else
422 : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
423 : #endif
424 :
425 : /*
426 : * Just in case the parent was gone already and we missed it, we'd better
427 : * check the slow way on the first call.
428 : */
429 24961 : postmaster_possibly_dead = true;
430 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
431 24961 : }
|