Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * postmaster.c
4 : * This program acts as a clearing house for requests to the
5 : * POSTGRES system. Frontend programs connect to the Postmaster,
6 : * and postmaster forks a new backend process to handle the
7 : * connection.
8 : *
9 : * The postmaster also manages system-wide operations such as
10 : * startup and shutdown. The postmaster itself doesn't do those
11 : * operations, mind you --- it just forks off a subprocess to do them
12 : * at the right times. It also takes care of resetting the system
13 : * if a backend crashes.
14 : *
15 : * The postmaster process creates the shared memory and semaphore
16 : * pools during startup, but as a rule does not touch them itself.
17 : * In particular, it is not a member of the PGPROC array of backends
18 : * and so it cannot participate in lock-manager operations. Keeping
19 : * the postmaster away from shared memory operations makes it simpler
20 : * and more reliable. The postmaster is almost always able to recover
21 : * from crashes of individual backends by resetting shared memory;
22 : * if it did much with shared memory then it would be prone to crashing
23 : * along with the backends.
24 : *
25 : * When a request message is received, we now fork() immediately.
26 : * The child process performs authentication of the request, and
27 : * then becomes a backend if successful. This allows the auth code
28 : * to be written in a simple single-threaded style (as opposed to the
29 : * crufty "poor man's multitasking" code that used to be needed).
30 : * More importantly, it ensures that blockages in non-multithreaded
31 : * libraries like SSL or PAM cannot cause denial of service to other
32 : * clients.
33 : *
34 : *
35 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/postmaster/postmaster.c
41 : *
42 : * NOTES
43 : *
44 : * Initialization:
45 : * The Postmaster sets up shared memory data structures
46 : * for the backends.
47 : *
48 : * Synchronization:
49 : * The Postmaster shares memory with the backends but should avoid
50 : * touching shared memory, so as not to become stuck if a crashing
51 : * backend screws up locks or shared memory. Likewise, the Postmaster
52 : * should never block on messages from frontend clients.
53 : *
54 : * Garbage Collection:
55 : * The Postmaster cleans up after backends if they have an emergency
56 : * exit and/or core dump.
57 : *
58 : * Error Reporting:
59 : * Use write_stderr() only for reporting "interactive" errors
60 : * (essentially, bogus arguments on the command line). Once the
61 : * postmaster is launched, use ereport().
62 : *
63 : *-------------------------------------------------------------------------
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include <unistd.h>
69 : #include <signal.h>
70 : #include <time.h>
71 : #include <sys/wait.h>
72 : #include <ctype.h>
73 : #include <sys/stat.h>
74 : #include <sys/socket.h>
75 : #include <fcntl.h>
76 : #include <sys/param.h>
77 : #include <netdb.h>
78 : #include <limits.h>
79 :
80 : #ifdef USE_BONJOUR
81 : #include <dns_sd.h>
82 : #endif
83 :
84 : #ifdef USE_SYSTEMD
85 : #include <systemd/sd-daemon.h>
86 : #endif
87 :
88 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 : #include <pthread.h>
90 : #endif
91 :
92 : #include "access/xlog.h"
93 : #include "access/xlogrecovery.h"
94 : #include "common/file_perm.h"
95 : #include "common/file_utils.h"
96 : #include "common/ip.h"
97 : #include "common/pg_prng.h"
98 : #include "lib/ilist.h"
99 : #include "libpq/libpq.h"
100 : #include "libpq/pqsignal.h"
101 : #include "pg_getopt.h"
102 : #include "pgstat.h"
103 : #include "port/pg_bswap.h"
104 : #include "postmaster/autovacuum.h"
105 : #include "postmaster/auxprocess.h"
106 : #include "postmaster/bgworker_internals.h"
107 : #include "postmaster/pgarch.h"
108 : #include "postmaster/postmaster.h"
109 : #include "postmaster/syslogger.h"
110 : #include "postmaster/walsummarizer.h"
111 : #include "replication/logicallauncher.h"
112 : #include "replication/slotsync.h"
113 : #include "replication/walsender.h"
114 : #include "storage/fd.h"
115 : #include "storage/ipc.h"
116 : #include "storage/pmsignal.h"
117 : #include "storage/proc.h"
118 : #include "tcop/backend_startup.h"
119 : #include "tcop/tcopprot.h"
120 : #include "utils/datetime.h"
121 : #include "utils/memutils.h"
122 : #include "utils/pidfile.h"
123 : #include "utils/timestamp.h"
124 : #include "utils/varlena.h"
125 :
126 : #ifdef EXEC_BACKEND
127 : #include "storage/pg_shmem.h"
128 : #endif
129 :
130 :
131 : /*
132 : * Possible types of a backend. Beyond being the possible bkend_type values in
133 : * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
134 : * and CountChildren().
135 : */
136 : #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
137 : #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
138 : #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
139 : #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
140 : #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
141 :
142 : /*
143 : * List of active backends (or child processes anyway; we don't actually
144 : * know whether a given child has become a backend or is still in the
145 : * authorization phase). This is used mainly to keep track of how many
146 : * children we have and send them appropriate signals when necessary.
147 : *
148 : * As shown in the above set of backend types, this list includes not only
149 : * "normal" client sessions, but also autovacuum workers, walsenders, and
150 : * background workers. (Note that at the time of launch, walsenders are
151 : * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
152 : * upon noticing they've changed their PMChildFlags entry. Hence that check
153 : * must be done before any operation that needs to distinguish walsenders
154 : * from normal backends.)
155 : *
156 : * Also, "dead_end" children are in it: these are children launched just for
157 : * the purpose of sending a friendly rejection message to a would-be client.
158 : * We must track them because they are attached to shared memory, but we know
159 : * they will never become live backends. dead_end children are not assigned a
160 : * PMChildSlot. dead_end children have bkend_type NORMAL.
161 : *
162 : * "Special" children such as the startup, bgwriter, autovacuum launcher, and
163 : * slot sync worker tasks are not in this list. They are tracked via StartupPID
164 : * and other pid_t variables below. (Thus, there can't be more than one of any
165 : * given "special" child process type. We use BackendList entries for any
166 : * child process there can be more than one of.)
167 : */
168 : typedef struct bkend
169 : {
170 : pid_t pid; /* process id of backend */
171 : int32 cancel_key; /* cancel key for cancels for this backend */
172 : int child_slot; /* PMChildSlot for this backend, if any */
173 : int bkend_type; /* child process flavor, see above */
174 : bool dead_end; /* is it going to send an error and quit? */
175 : bool bgworker_notify; /* gets bgworker start/stop notifications */
176 : dlist_node elem; /* list link in BackendList */
177 : } Backend;
178 :
179 : static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
180 :
181 : #ifdef EXEC_BACKEND
182 : Backend *ShmemBackendArray;
183 : #endif
184 :
185 : BackgroundWorker *MyBgworkerEntry = NULL;
186 :
187 :
188 :
189 : /* The socket number we are listening for connections on */
190 : int PostPortNumber = DEF_PGPORT;
191 :
192 : /* The directory names for Unix socket(s) */
193 : char *Unix_socket_directories;
194 :
195 : /* The TCP listen address(es) */
196 : char *ListenAddresses;
197 :
198 : /*
199 : * SuperuserReservedConnections is the number of backends reserved for
200 : * superuser use, and ReservedConnections is the number of backends reserved
201 : * for use by roles with privileges of the pg_use_reserved_connections
202 : * predefined role. These are taken out of the pool of MaxConnections backend
203 : * slots, so the number of backend slots available for roles that are neither
204 : * superuser nor have privileges of pg_use_reserved_connections is
205 : * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
206 : *
207 : * If the number of remaining slots is less than or equal to
208 : * SuperuserReservedConnections, only superusers can make new connections. If
209 : * the number of remaining slots is greater than SuperuserReservedConnections
210 : * but less than or equal to
211 : * (SuperuserReservedConnections + ReservedConnections), only superusers and
212 : * roles with privileges of pg_use_reserved_connections can make new
213 : * connections. Note that pre-existing superuser and
214 : * pg_use_reserved_connections connections don't count against the limits.
215 : */
216 : int SuperuserReservedConnections;
217 : int ReservedConnections;
218 :
219 : /* The socket(s) we're listening to. */
220 : #define MAXLISTEN 64
221 : static int NumListenSockets = 0;
222 : static pgsocket *ListenSockets = NULL;
223 :
224 : /* still more option variables */
225 : bool EnableSSL = false;
226 :
227 : int PreAuthDelay = 0;
228 : int AuthenticationTimeout = 60;
229 :
230 : bool log_hostname; /* for ps display and logging */
231 : bool Log_connections = false;
232 :
233 : bool enable_bonjour = false;
234 : char *bonjour_name;
235 : bool restart_after_crash = true;
236 : bool remove_temp_files_after_crash = true;
237 : bool send_abort_for_crash = false;
238 : bool send_abort_for_kill = false;
239 :
240 : /* PIDs of special child processes; 0 when not running */
241 : static pid_t StartupPID = 0,
242 : BgWriterPID = 0,
243 : CheckpointerPID = 0,
244 : WalWriterPID = 0,
245 : WalReceiverPID = 0,
246 : WalSummarizerPID = 0,
247 : AutoVacPID = 0,
248 : PgArchPID = 0,
249 : SysLoggerPID = 0,
250 : SlotSyncWorkerPID = 0;
251 :
252 : /* Startup process's status */
253 : typedef enum
254 : {
255 : STARTUP_NOT_RUNNING,
256 : STARTUP_RUNNING,
257 : STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
258 : STARTUP_CRASHED,
259 : } StartupStatusEnum;
260 :
261 : static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
262 :
263 : /* Startup/shutdown state */
264 : #define NoShutdown 0
265 : #define SmartShutdown 1
266 : #define FastShutdown 2
267 : #define ImmediateShutdown 3
268 :
269 : static int Shutdown = NoShutdown;
270 :
271 : static bool FatalError = false; /* T if recovering from backend crash */
272 :
273 : /*
274 : * We use a simple state machine to control startup, shutdown, and
275 : * crash recovery (which is rather like shutdown followed by startup).
276 : *
277 : * After doing all the postmaster initialization work, we enter PM_STARTUP
278 : * state and the startup process is launched. The startup process begins by
279 : * reading the control file and other preliminary initialization steps.
280 : * In a normal startup, or after crash recovery, the startup process exits
281 : * with exit code 0 and we switch to PM_RUN state. However, archive recovery
282 : * is handled specially since it takes much longer and we would like to support
283 : * hot standby during archive recovery.
284 : *
285 : * When the startup process is ready to start archive recovery, it signals the
286 : * postmaster, and we switch to PM_RECOVERY state. The background writer and
287 : * checkpointer are launched, while the startup process continues applying WAL.
288 : * If Hot Standby is enabled, then, after reaching a consistent point in WAL
289 : * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
290 : * state and begin accepting connections to perform read-only queries. When
291 : * archive recovery is finished, the startup process exits with exit code 0
292 : * and we switch to PM_RUN state.
293 : *
294 : * Normal child backends can only be launched when we are in PM_RUN or
295 : * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
296 : * In other states we handle connection requests by launching "dead_end"
297 : * child processes, which will simply send the client an error message and
298 : * quit. (We track these in the BackendList so that we can know when they
299 : * are all gone; this is important because they're still connected to shared
300 : * memory, and would interfere with an attempt to destroy the shmem segment,
301 : * possibly leading to SHMALL failure when we try to make a new one.)
302 : * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
303 : * to drain out of the system, and therefore stop accepting connection
304 : * requests at all until the last existing child has quit (which hopefully
305 : * will not be very long).
306 : *
307 : * Notice that this state variable does not distinguish *why* we entered
308 : * states later than PM_RUN --- Shutdown and FatalError must be consulted
309 : * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
310 : * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
311 : * states when trying to recover from a crash). It can be true in PM_STARTUP
312 : * state, because we don't clear it until we've successfully started WAL redo.
313 : */
314 : typedef enum
315 : {
316 : PM_INIT, /* postmaster starting */
317 : PM_STARTUP, /* waiting for startup subprocess */
318 : PM_RECOVERY, /* in archive recovery mode */
319 : PM_HOT_STANDBY, /* in hot standby mode */
320 : PM_RUN, /* normal "database is alive" state */
321 : PM_STOP_BACKENDS, /* need to stop remaining backends */
322 : PM_WAIT_BACKENDS, /* waiting for live backends to exit */
323 : PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
324 : * ckpt */
325 : PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
326 : * finish */
327 : PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
328 : PM_NO_CHILDREN, /* all important children have exited */
329 : } PMState;
330 :
331 : static PMState pmState = PM_INIT;
332 :
333 : /*
334 : * While performing a "smart shutdown", we restrict new connections but stay
335 : * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
336 : * connsAllowed is a sub-state indicator showing the active restriction.
337 : * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
338 : */
339 : static bool connsAllowed = true;
340 :
341 : /* Start time of SIGKILL timeout during immediate shutdown or child crash */
342 : /* Zero means timeout is not running */
343 : static time_t AbortStartTime = 0;
344 :
345 : /* Length of said timeout */
346 : #define SIGKILL_CHILDREN_AFTER_SECS 5
347 :
348 : static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
349 :
350 : bool ClientAuthInProgress = false; /* T during new-client
351 : * authentication */
352 :
353 : bool redirection_done = false; /* stderr redirected for syslogger? */
354 :
355 : /* received START_AUTOVAC_LAUNCHER signal */
356 : static bool start_autovac_launcher = false;
357 :
358 : /* the launcher needs to be signaled to communicate some condition */
359 : static bool avlauncher_needs_signal = false;
360 :
361 : /* received START_WALRECEIVER signal */
362 : static bool WalReceiverRequested = false;
363 :
364 : /* set when there's a worker that needs to be started up */
365 : static bool StartWorkerNeeded = true;
366 : static bool HaveCrashedWorker = false;
367 :
368 : /* set when signals arrive */
369 : static volatile sig_atomic_t pending_pm_pmsignal;
370 : static volatile sig_atomic_t pending_pm_child_exit;
371 : static volatile sig_atomic_t pending_pm_reload_request;
372 : static volatile sig_atomic_t pending_pm_shutdown_request;
373 : static volatile sig_atomic_t pending_pm_fast_shutdown_request;
374 : static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
375 :
376 : /* event multiplexing object */
377 : static WaitEventSet *pm_wait_set;
378 :
379 : #ifdef USE_SSL
380 : /* Set when and if SSL has been initialized properly */
381 : bool LoadedSSL = false;
382 : #endif
383 :
384 : #ifdef USE_BONJOUR
385 : static DNSServiceRef bonjour_sdref = NULL;
386 : #endif
387 :
388 : /*
389 : * postmaster.c - function prototypes
390 : */
391 : static void CloseServerPorts(int status, Datum arg);
392 : static void unlink_external_pid_file(int status, Datum arg);
393 : static void getInstallationPaths(const char *argv0);
394 : static void checkControlFile(void);
395 : static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
396 : static void handle_pm_child_exit_signal(SIGNAL_ARGS);
397 : static void handle_pm_reload_request_signal(SIGNAL_ARGS);
398 : static void handle_pm_shutdown_request_signal(SIGNAL_ARGS);
399 : static void process_pm_pmsignal(void);
400 : static void process_pm_child_exit(void);
401 : static void process_pm_reload_request(void);
402 : static void process_pm_shutdown_request(void);
403 : static void dummy_handler(SIGNAL_ARGS);
404 : static void CleanupBackend(int pid, int exitstatus);
405 : static bool CleanupBackgroundWorker(int pid, int exitstatus);
406 : static void HandleChildCrash(int pid, int exitstatus, const char *procname);
407 : static void LogChildExit(int lev, const char *procname,
408 : int pid, int exitstatus);
409 : static void PostmasterStateMachine(void);
410 :
411 : static void ExitPostmaster(int status) pg_attribute_noreturn();
412 : static int ServerLoop(void);
413 : static int BackendStartup(ClientSocket *client_sock);
414 : static void report_fork_failure_to_client(ClientSocket *client_sock, int errnum);
415 : static CAC_state canAcceptConnections(int backend_type);
416 : static bool RandomCancelKey(int32 *cancel_key);
417 : static void signal_child(pid_t pid, int signal);
418 : static void sigquit_child(pid_t pid);
419 : static bool SignalSomeChildren(int signal, int target);
420 : static void TerminateChildren(int signal);
421 :
422 : #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
423 :
424 : static int CountChildren(int target);
425 : static bool assign_backendlist_entry(RegisteredBgWorker *rw);
426 : static void maybe_start_bgworkers(void);
427 : static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
428 : static pid_t StartChildProcess(BackendType type);
429 : static void StartAutovacuumWorker(void);
430 : static void MaybeStartWalReceiver(void);
431 : static void MaybeStartWalSummarizer(void);
432 : static void InitPostmasterDeathWatchHandle(void);
433 : static void MaybeStartSlotSyncWorker(void);
434 :
435 : /*
436 : * Archiver is allowed to start up at the current postmaster state?
437 : *
438 : * If WAL archiving is enabled always, we are allowed to start archiver
439 : * even during recovery.
440 : */
441 : #define PgArchStartupAllowed() \
442 : (((XLogArchivingActive() && pmState == PM_RUN) || \
443 : (XLogArchivingAlways() && \
444 : (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
445 : PgArchCanRestart())
446 :
447 : #ifdef EXEC_BACKEND
448 :
449 : #ifdef WIN32
450 : #define WNOHANG 0 /* ignored, so any integer value will do */
451 :
452 : static pid_t waitpid(pid_t pid, int *exitstatus, int options);
453 : static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
454 :
455 : static HANDLE win32ChildQueue;
456 :
457 : typedef struct
458 : {
459 : HANDLE waitHandle;
460 : HANDLE procHandle;
461 : DWORD procId;
462 : } win32_deadchild_waitinfo;
463 : #endif /* WIN32 */
464 :
465 : static void ShmemBackendArrayAdd(Backend *bn);
466 : static void ShmemBackendArrayRemove(Backend *bn);
467 : #endif /* EXEC_BACKEND */
468 :
469 : /* Macros to check exit status of a child process */
470 : #define EXIT_STATUS_0(st) ((st) == 0)
471 : #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
472 : #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
473 :
474 : #ifndef WIN32
475 : /*
476 : * File descriptors for pipe used to monitor if postmaster is alive.
477 : * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
478 : */
479 : int postmaster_alive_fds[2] = {-1, -1};
480 : #else
481 : /* Process handle of postmaster used for the same purpose on Windows */
482 : HANDLE PostmasterHandle;
483 : #endif
484 :
485 : /*
486 : * Postmaster main entry point
487 : */
488 : void
489 1438 : PostmasterMain(int argc, char *argv[])
490 : {
491 : int opt;
492 : int status;
493 1438 : char *userDoption = NULL;
494 1438 : bool listen_addr_saved = false;
495 1438 : char *output_config_variable = NULL;
496 :
497 1438 : InitProcessGlobals();
498 :
499 1438 : PostmasterPid = MyProcPid;
500 :
501 1438 : IsPostmasterEnvironment = true;
502 :
503 : /*
504 : * Start our win32 signal implementation
505 : */
506 : #ifdef WIN32
507 : pgwin32_signal_initialize();
508 : #endif
509 :
510 : /*
511 : * We should not be creating any files or directories before we check the
512 : * data directory (see checkDataDir()), but just in case set the umask to
513 : * the most restrictive (owner-only) permissions.
514 : *
515 : * checkDataDir() will reset the umask based on the data directory
516 : * permissions.
517 : */
518 1438 : umask(PG_MODE_MASK_OWNER);
519 :
520 : /*
521 : * By default, palloc() requests in the postmaster will be allocated in
522 : * the PostmasterContext, which is space that can be recycled by backends.
523 : * Allocated data that needs to be available to backends should be
524 : * allocated in TopMemoryContext.
525 : */
526 1438 : PostmasterContext = AllocSetContextCreate(TopMemoryContext,
527 : "Postmaster",
528 : ALLOCSET_DEFAULT_SIZES);
529 1438 : MemoryContextSwitchTo(PostmasterContext);
530 :
531 : /* Initialize paths to installation files */
532 1438 : getInstallationPaths(argv[0]);
533 :
534 : /*
535 : * Set up signal handlers for the postmaster process.
536 : *
537 : * CAUTION: when changing this list, check for side-effects on the signal
538 : * handling setup of child processes. See tcop/postgres.c,
539 : * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
540 : * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
541 : * postmaster/bgworker.c and postmaster/checkpointer.c.
542 : */
543 1438 : pqinitmask();
544 1438 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
545 :
546 1438 : pqsignal(SIGHUP, handle_pm_reload_request_signal);
547 1438 : pqsignal(SIGINT, handle_pm_shutdown_request_signal);
548 1438 : pqsignal(SIGQUIT, handle_pm_shutdown_request_signal);
549 1438 : pqsignal(SIGTERM, handle_pm_shutdown_request_signal);
550 1438 : pqsignal(SIGALRM, SIG_IGN); /* ignored */
551 1438 : pqsignal(SIGPIPE, SIG_IGN); /* ignored */
552 1438 : pqsignal(SIGUSR1, handle_pm_pmsignal_signal);
553 1438 : pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
554 1438 : pqsignal(SIGCHLD, handle_pm_child_exit_signal);
555 :
556 : /* This may configure SIGURG, depending on platform. */
557 1438 : InitializeLatchSupport();
558 1438 : InitProcessLocalLatch();
559 :
560 : /*
561 : * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
562 : * ignore those signals in a postmaster environment, so that there is no
563 : * risk of a child process freezing up due to writing to stderr. But for
564 : * a standalone backend, their default handling is reasonable. Hence, all
565 : * child processes should just allow the inherited settings to stand.
566 : */
567 : #ifdef SIGTTIN
568 1438 : pqsignal(SIGTTIN, SIG_IGN); /* ignored */
569 : #endif
570 : #ifdef SIGTTOU
571 1438 : pqsignal(SIGTTOU, SIG_IGN); /* ignored */
572 : #endif
573 :
574 : /* ignore SIGXFSZ, so that ulimit violations work like disk full */
575 : #ifdef SIGXFSZ
576 1438 : pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
577 : #endif
578 :
579 : /* Begin accepting signals. */
580 1438 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
581 :
582 : /*
583 : * Options setup
584 : */
585 1438 : InitializeGUCOptions();
586 :
587 1438 : opterr = 1;
588 :
589 : /*
590 : * Parse command-line options. CAUTION: keep this in sync with
591 : * tcop/postgres.c (the option sets should not conflict) and with the
592 : * common help() function in main/main.c.
593 : */
594 4986 : while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
595 : {
596 3548 : switch (opt)
597 : {
598 0 : case 'B':
599 0 : SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
600 0 : break;
601 :
602 44 : case 'b':
603 : /* Undocumented flag used for binary upgrades */
604 44 : IsBinaryUpgrade = true;
605 44 : break;
606 :
607 4 : case 'C':
608 4 : output_config_variable = strdup(optarg);
609 4 : break;
610 :
611 1666 : case 'c':
612 : case '-':
613 : {
614 : char *name,
615 : *value;
616 :
617 1666 : ParseLongOption(optarg, &name, &value);
618 1666 : if (!value)
619 : {
620 0 : if (opt == '-')
621 0 : ereport(ERROR,
622 : (errcode(ERRCODE_SYNTAX_ERROR),
623 : errmsg("--%s requires a value",
624 : optarg)));
625 : else
626 0 : ereport(ERROR,
627 : (errcode(ERRCODE_SYNTAX_ERROR),
628 : errmsg("-c %s requires a value",
629 : optarg)));
630 : }
631 :
632 1666 : SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
633 1666 : pfree(name);
634 1666 : pfree(value);
635 1666 : break;
636 : }
637 :
638 1438 : case 'D':
639 1438 : userDoption = strdup(optarg);
640 1438 : break;
641 :
642 0 : case 'd':
643 0 : set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
644 0 : break;
645 :
646 0 : case 'E':
647 0 : SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
648 0 : break;
649 :
650 0 : case 'e':
651 0 : SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
652 0 : break;
653 :
654 166 : case 'F':
655 166 : SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
656 166 : break;
657 :
658 0 : case 'f':
659 0 : if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
660 : {
661 0 : write_stderr("%s: invalid argument for option -f: \"%s\"\n",
662 : progname, optarg);
663 0 : ExitPostmaster(1);
664 : }
665 0 : break;
666 :
667 0 : case 'h':
668 0 : SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
669 0 : break;
670 :
671 0 : case 'i':
672 0 : SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
673 0 : break;
674 :
675 0 : case 'j':
676 : /* only used by interactive backend */
677 0 : break;
678 :
679 166 : case 'k':
680 166 : SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
681 166 : break;
682 :
683 0 : case 'l':
684 0 : SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
685 0 : break;
686 :
687 0 : case 'N':
688 0 : SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
689 0 : break;
690 :
691 0 : case 'O':
692 0 : SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
693 0 : break;
694 :
695 0 : case 'P':
696 0 : SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
697 0 : break;
698 :
699 64 : case 'p':
700 64 : SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
701 64 : break;
702 :
703 0 : case 'r':
704 : /* only used by single-user backend */
705 0 : break;
706 :
707 0 : case 'S':
708 0 : SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
709 0 : break;
710 :
711 0 : case 's':
712 0 : SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
713 0 : break;
714 :
715 0 : case 'T':
716 :
717 : /*
718 : * This option used to be defined as sending SIGSTOP after a
719 : * backend crash, but sending SIGABRT seems more useful.
720 : */
721 0 : SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
722 0 : break;
723 :
724 0 : case 't':
725 : {
726 0 : const char *tmp = get_stats_option_name(optarg);
727 :
728 0 : if (tmp)
729 : {
730 0 : SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
731 : }
732 : else
733 : {
734 0 : write_stderr("%s: invalid argument for option -t: \"%s\"\n",
735 : progname, optarg);
736 0 : ExitPostmaster(1);
737 : }
738 0 : break;
739 : }
740 :
741 0 : case 'W':
742 0 : SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
743 0 : break;
744 :
745 0 : default:
746 0 : write_stderr("Try \"%s --help\" for more information.\n",
747 : progname);
748 0 : ExitPostmaster(1);
749 : }
750 : }
751 :
752 : /*
753 : * Postmaster accepts no non-option switch arguments.
754 : */
755 1438 : if (optind < argc)
756 : {
757 0 : write_stderr("%s: invalid argument: \"%s\"\n",
758 0 : progname, argv[optind]);
759 0 : write_stderr("Try \"%s --help\" for more information.\n",
760 : progname);
761 0 : ExitPostmaster(1);
762 : }
763 :
764 : /*
765 : * Locate the proper configuration files and data directory, and read
766 : * postgresql.conf for the first time.
767 : */
768 1438 : if (!SelectConfigFiles(userDoption, progname))
769 0 : ExitPostmaster(2);
770 :
771 1436 : if (output_config_variable != NULL)
772 : {
773 : /*
774 : * If this is a runtime-computed GUC, it hasn't yet been initialized,
775 : * and the present value is not useful. However, this is a convenient
776 : * place to print the value for most GUCs because it is safe to run
777 : * postmaster startup to this point even if the server is already
778 : * running. For the handful of runtime-computed GUCs that we cannot
779 : * provide meaningful values for yet, we wait until later in
780 : * postmaster startup to print the value. We won't be able to use -C
781 : * on running servers for those GUCs, but using this option now would
782 : * lead to incorrect results for them.
783 : */
784 4 : int flags = GetConfigOptionFlags(output_config_variable, true);
785 :
786 4 : if ((flags & GUC_RUNTIME_COMPUTED) == 0)
787 : {
788 : /*
789 : * "-C guc" was specified, so print GUC's value and exit. No
790 : * extra permission check is needed because the user is reading
791 : * inside the data dir.
792 : */
793 2 : const char *config_val = GetConfigOption(output_config_variable,
794 : false, false);
795 :
796 2 : puts(config_val ? config_val : "");
797 2 : ExitPostmaster(0);
798 : }
799 :
800 : /*
801 : * A runtime-computed GUC will be printed later on. As we initialize
802 : * a server startup sequence, silence any log messages that may show
803 : * up in the output generated. FATAL and more severe messages are
804 : * useful to show, even if one would only expect at least PANIC. LOG
805 : * entries are hidden.
806 : */
807 2 : SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
808 : PGC_S_OVERRIDE);
809 : }
810 :
811 : /* Verify that DataDir looks reasonable */
812 1434 : checkDataDir();
813 :
814 : /* Check that pg_control exists */
815 1434 : checkControlFile();
816 :
817 : /* And switch working directory into it */
818 1434 : ChangeToDataDir();
819 :
820 : /*
821 : * Check for invalid combinations of GUC settings.
822 : */
823 1434 : if (SuperuserReservedConnections + ReservedConnections >= MaxConnections)
824 : {
825 0 : write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
826 : progname,
827 : SuperuserReservedConnections, ReservedConnections,
828 : MaxConnections);
829 0 : ExitPostmaster(1);
830 : }
831 1434 : if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
832 0 : ereport(ERROR,
833 : (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
834 1434 : if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
835 0 : ereport(ERROR,
836 : (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
837 1434 : if (summarize_wal && wal_level == WAL_LEVEL_MINIMAL)
838 0 : ereport(ERROR,
839 : (errmsg("WAL cannot be summarized when wal_level is \"minimal\"")));
840 :
841 : /*
842 : * Other one-time internal sanity checks can go here, if they are fast.
843 : * (Put any slow processing further down, after postmaster.pid creation.)
844 : */
845 1434 : if (!CheckDateTokenTables())
846 : {
847 0 : write_stderr("%s: invalid datetoken tables, please fix\n", progname);
848 0 : ExitPostmaster(1);
849 : }
850 :
851 : /*
852 : * Now that we are done processing the postmaster arguments, reset
853 : * getopt(3) library so that it will work correctly in subprocesses.
854 : */
855 1434 : optind = 1;
856 : #ifdef HAVE_INT_OPTRESET
857 : optreset = 1; /* some systems need this too */
858 : #endif
859 :
860 : /* For debugging: display postmaster environment */
861 : {
862 : extern char **environ;
863 : char **p;
864 :
865 1434 : ereport(DEBUG3,
866 : (errmsg_internal("%s: PostmasterMain: initial environment dump:",
867 : progname)));
868 1434 : ereport(DEBUG3,
869 : (errmsg_internal("-----------------------------------------")));
870 58478 : for (p = environ; *p; ++p)
871 57044 : ereport(DEBUG3,
872 : (errmsg_internal("\t%s", *p)));
873 1434 : ereport(DEBUG3,
874 : (errmsg_internal("-----------------------------------------")));
875 : }
876 :
877 : /*
878 : * Create lockfile for data directory.
879 : *
880 : * We want to do this before we try to grab the input sockets, because the
881 : * data directory interlock is more reliable than the socket-file
882 : * interlock (thanks to whoever decided to put socket files in /tmp :-().
883 : * For the same reason, it's best to grab the TCP socket(s) before the
884 : * Unix socket(s).
885 : *
886 : * Also note that this internally sets up the on_proc_exit function that
887 : * is responsible for removing both data directory and socket lockfiles;
888 : * so it must happen before opening sockets so that at exit, the socket
889 : * lockfiles go away after CloseServerPorts runs.
890 : */
891 1434 : CreateDataDirLockFile(true);
892 :
893 : /*
894 : * Read the control file (for error checking and config info).
895 : *
896 : * Since we verify the control file's CRC, this has a useful side effect
897 : * on machines where we need a run-time test for CRC support instructions.
898 : * The postmaster will do the test once at startup, and then its child
899 : * processes will inherit the correct function pointer and not need to
900 : * repeat the test.
901 : */
902 1432 : LocalProcessControlFile(false);
903 :
904 : /*
905 : * Register the apply launcher. It's probably a good idea to call this
906 : * before any modules had a chance to take the background worker slots.
907 : */
908 1432 : ApplyLauncherRegister();
909 :
910 : /*
911 : * process any libraries that should be preloaded at postmaster start
912 : */
913 1432 : process_shared_preload_libraries();
914 :
915 : /*
916 : * Initialize SSL library, if specified.
917 : */
918 : #ifdef USE_SSL
919 1432 : if (EnableSSL)
920 : {
921 52 : (void) secure_initialize(true);
922 46 : LoadedSSL = true;
923 : }
924 : #endif
925 :
926 : /*
927 : * Now that loadable modules have had their chance to alter any GUCs,
928 : * calculate MaxBackends.
929 : */
930 1426 : InitializeMaxBackends();
931 :
932 : /*
933 : * Give preloaded libraries a chance to request additional shared memory.
934 : */
935 1426 : process_shmem_requests();
936 :
937 : /*
938 : * Now that loadable modules have had their chance to request additional
939 : * shared memory, determine the value of any runtime-computed GUCs that
940 : * depend on the amount of shared memory required.
941 : */
942 1426 : InitializeShmemGUCs();
943 :
944 : /*
945 : * Now that modules have been loaded, we can process any custom resource
946 : * managers specified in the wal_consistency_checking GUC.
947 : */
948 1426 : InitializeWalConsistencyChecking();
949 :
950 : /*
951 : * If -C was specified with a runtime-computed GUC, we held off printing
952 : * the value earlier, as the GUC was not yet initialized. We handle -C
953 : * for most GUCs before we lock the data directory so that the option may
954 : * be used on a running server. However, a handful of GUCs are runtime-
955 : * computed and do not have meaningful values until after locking the data
956 : * directory, and we cannot safely calculate their values earlier on a
957 : * running server. At this point, such GUCs should be properly
958 : * initialized, and we haven't yet set up shared memory, so this is a good
959 : * time to handle the -C option for these special GUCs.
960 : */
961 1426 : if (output_config_variable != NULL)
962 : {
963 2 : const char *config_val = GetConfigOption(output_config_variable,
964 : false, false);
965 :
966 2 : puts(config_val ? config_val : "");
967 2 : ExitPostmaster(0);
968 : }
969 :
970 : /*
971 : * Set up shared memory and semaphores.
972 : *
973 : * Note: if using SysV shmem and/or semas, each postmaster startup will
974 : * normally choose the same IPC keys. This helps ensure that we will
975 : * clean up dead IPC objects if the postmaster crashes and is restarted.
976 : */
977 1424 : CreateSharedMemoryAndSemaphores();
978 :
979 : /*
980 : * Estimate number of openable files. This must happen after setting up
981 : * semaphores, because on some platforms semaphores count as open files.
982 : */
983 1422 : set_max_safe_fds();
984 :
985 : /*
986 : * Set reference point for stack-depth checking.
987 : */
988 1422 : (void) set_stack_base();
989 :
990 : /*
991 : * Initialize pipe (or process handle on Windows) that allows children to
992 : * wake up from sleep on postmaster death.
993 : */
994 1422 : InitPostmasterDeathWatchHandle();
995 :
996 : #ifdef WIN32
997 :
998 : /*
999 : * Initialize I/O completion port used to deliver list of dead children.
1000 : */
1001 : win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1002 : if (win32ChildQueue == NULL)
1003 : ereport(FATAL,
1004 : (errmsg("could not create I/O completion port for child queue")));
1005 : #endif
1006 :
1007 : #ifdef EXEC_BACKEND
1008 : /* Write out nondefault GUC settings for child processes to use */
1009 : write_nondefault_variables(PGC_POSTMASTER);
1010 :
1011 : /*
1012 : * Clean out the temp directory used to transmit parameters to child
1013 : * processes (see internal_forkexec). We must do this before launching
1014 : * any child processes, else we have a race condition: we could remove a
1015 : * parameter file before the child can read it. It should be safe to do
1016 : * so now, because we verified earlier that there are no conflicting
1017 : * Postgres processes in this data directory.
1018 : */
1019 : RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false);
1020 : #endif
1021 :
1022 : /*
1023 : * Forcibly remove the files signaling a standby promotion request.
1024 : * Otherwise, the existence of those files triggers a promotion too early,
1025 : * whether a user wants that or not.
1026 : *
1027 : * This removal of files is usually unnecessary because they can exist
1028 : * only during a few moments during a standby promotion. However there is
1029 : * a race condition: if pg_ctl promote is executed and creates the files
1030 : * during a promotion, the files can stay around even after the server is
1031 : * brought up to be the primary. Then, if a new standby starts by using
1032 : * the backup taken from the new primary, the files can exist at server
1033 : * startup and must be removed in order to avoid an unexpected promotion.
1034 : *
1035 : * Note that promotion signal files need to be removed before the startup
1036 : * process is invoked. Because, after that, they can be used by
1037 : * postmaster's SIGUSR1 signal handler.
1038 : */
1039 1422 : RemovePromoteSignalFiles();
1040 :
1041 : /* Do the same for logrotate signal file */
1042 1422 : RemoveLogrotateSignalFiles();
1043 :
1044 : /* Remove any outdated file holding the current log filenames. */
1045 1422 : if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1046 0 : ereport(LOG,
1047 : (errcode_for_file_access(),
1048 : errmsg("could not remove file \"%s\": %m",
1049 : LOG_METAINFO_DATAFILE)));
1050 :
1051 : /*
1052 : * If enabled, start up syslogger collection subprocess
1053 : */
1054 1422 : SysLoggerPID = SysLogger_Start();
1055 :
1056 : /*
1057 : * Reset whereToSendOutput from DestDebug (its starting state) to
1058 : * DestNone. This stops ereport from sending log messages to stderr unless
1059 : * Log_destination permits. We don't do this until the postmaster is
1060 : * fully launched, since startup failures may as well be reported to
1061 : * stderr.
1062 : *
1063 : * If we are in fact disabling logging to stderr, first emit a log message
1064 : * saying so, to provide a breadcrumb trail for users who may not remember
1065 : * that their logging is configured to go somewhere else.
1066 : */
1067 1422 : if (!(Log_destination & LOG_DESTINATION_STDERR))
1068 0 : ereport(LOG,
1069 : (errmsg("ending log output to stderr"),
1070 : errhint("Future log output will go to log destination \"%s\".",
1071 : Log_destination_string)));
1072 :
1073 1422 : whereToSendOutput = DestNone;
1074 :
1075 : /*
1076 : * Report server startup in log. While we could emit this much earlier,
1077 : * it seems best to do so after starting the log collector, if we intend
1078 : * to use one.
1079 : */
1080 1422 : ereport(LOG,
1081 : (errmsg("starting %s", PG_VERSION_STR)));
1082 :
1083 : /*
1084 : * Establish input sockets.
1085 : *
1086 : * First set up an on_proc_exit function that's charged with closing the
1087 : * sockets again at postmaster shutdown.
1088 : */
1089 1422 : ListenSockets = palloc(MAXLISTEN * sizeof(pgsocket));
1090 1422 : on_proc_exit(CloseServerPorts, 0);
1091 :
1092 1422 : if (ListenAddresses)
1093 : {
1094 : char *rawstring;
1095 : List *elemlist;
1096 : ListCell *l;
1097 1422 : int success = 0;
1098 :
1099 : /* Need a modifiable copy of ListenAddresses */
1100 1422 : rawstring = pstrdup(ListenAddresses);
1101 :
1102 : /* Parse string into list of hostnames */
1103 1422 : if (!SplitGUCList(rawstring, ',', &elemlist))
1104 : {
1105 : /* syntax error in list */
1106 0 : ereport(FATAL,
1107 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1108 : errmsg("invalid list syntax in parameter \"%s\"",
1109 : "listen_addresses")));
1110 : }
1111 :
1112 1472 : foreach(l, elemlist)
1113 : {
1114 50 : char *curhost = (char *) lfirst(l);
1115 :
1116 50 : if (strcmp(curhost, "*") == 0)
1117 0 : status = ListenServerPort(AF_UNSPEC, NULL,
1118 0 : (unsigned short) PostPortNumber,
1119 : NULL,
1120 : ListenSockets,
1121 : &NumListenSockets,
1122 : MAXLISTEN);
1123 : else
1124 50 : status = ListenServerPort(AF_UNSPEC, curhost,
1125 50 : (unsigned short) PostPortNumber,
1126 : NULL,
1127 : ListenSockets,
1128 : &NumListenSockets,
1129 : MAXLISTEN);
1130 :
1131 50 : if (status == STATUS_OK)
1132 : {
1133 50 : success++;
1134 : /* record the first successful host addr in lockfile */
1135 50 : if (!listen_addr_saved)
1136 : {
1137 50 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1138 50 : listen_addr_saved = true;
1139 : }
1140 : }
1141 : else
1142 0 : ereport(WARNING,
1143 : (errmsg("could not create listen socket for \"%s\"",
1144 : curhost)));
1145 : }
1146 :
1147 1422 : if (!success && elemlist != NIL)
1148 0 : ereport(FATAL,
1149 : (errmsg("could not create any TCP/IP sockets")));
1150 :
1151 1422 : list_free(elemlist);
1152 1422 : pfree(rawstring);
1153 : }
1154 :
1155 : #ifdef USE_BONJOUR
1156 : /* Register for Bonjour only if we opened TCP socket(s) */
1157 : if (enable_bonjour && NumListenSockets > 0)
1158 : {
1159 : DNSServiceErrorType err;
1160 :
1161 : /*
1162 : * We pass 0 for interface_index, which will result in registering on
1163 : * all "applicable" interfaces. It's not entirely clear from the
1164 : * DNS-SD docs whether this would be appropriate if we have bound to
1165 : * just a subset of the available network interfaces.
1166 : */
1167 : err = DNSServiceRegister(&bonjour_sdref,
1168 : 0,
1169 : 0,
1170 : bonjour_name,
1171 : "_postgresql._tcp.",
1172 : NULL,
1173 : NULL,
1174 : pg_hton16(PostPortNumber),
1175 : 0,
1176 : NULL,
1177 : NULL,
1178 : NULL);
1179 : if (err != kDNSServiceErr_NoError)
1180 : ereport(LOG,
1181 : (errmsg("DNSServiceRegister() failed: error code %ld",
1182 : (long) err)));
1183 :
1184 : /*
1185 : * We don't bother to read the mDNS daemon's reply, and we expect that
1186 : * it will automatically terminate our registration when the socket is
1187 : * closed at postmaster termination. So there's nothing more to be
1188 : * done here. However, the bonjour_sdref is kept around so that
1189 : * forked children can close their copies of the socket.
1190 : */
1191 : }
1192 : #endif
1193 :
1194 1422 : if (Unix_socket_directories)
1195 : {
1196 : char *rawstring;
1197 : List *elemlist;
1198 : ListCell *l;
1199 1422 : int success = 0;
1200 :
1201 : /* Need a modifiable copy of Unix_socket_directories */
1202 1422 : rawstring = pstrdup(Unix_socket_directories);
1203 :
1204 : /* Parse string into list of directories */
1205 1422 : if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1206 : {
1207 : /* syntax error in list */
1208 0 : ereport(FATAL,
1209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1210 : errmsg("invalid list syntax in parameter \"%s\"",
1211 : "unix_socket_directories")));
1212 : }
1213 :
1214 2842 : foreach(l, elemlist)
1215 : {
1216 1420 : char *socketdir = (char *) lfirst(l);
1217 :
1218 1420 : status = ListenServerPort(AF_UNIX, NULL,
1219 1420 : (unsigned short) PostPortNumber,
1220 : socketdir,
1221 : ListenSockets,
1222 : &NumListenSockets,
1223 : MAXLISTEN);
1224 :
1225 1420 : if (status == STATUS_OK)
1226 : {
1227 1420 : success++;
1228 : /* record the first successful Unix socket in lockfile */
1229 1420 : if (success == 1)
1230 1420 : AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1231 : }
1232 : else
1233 0 : ereport(WARNING,
1234 : (errmsg("could not create Unix-domain socket in directory \"%s\"",
1235 : socketdir)));
1236 : }
1237 :
1238 1422 : if (!success && elemlist != NIL)
1239 0 : ereport(FATAL,
1240 : (errmsg("could not create any Unix-domain sockets")));
1241 :
1242 1422 : list_free_deep(elemlist);
1243 1422 : pfree(rawstring);
1244 : }
1245 :
1246 : /*
1247 : * check that we have some socket to listen on
1248 : */
1249 1422 : if (NumListenSockets == 0)
1250 0 : ereport(FATAL,
1251 : (errmsg("no socket created for listening")));
1252 :
1253 : /*
1254 : * If no valid TCP ports, write an empty line for listen address,
1255 : * indicating the Unix socket must be used. Note that this line is not
1256 : * added to the lock file until there is a socket backing it.
1257 : */
1258 1422 : if (!listen_addr_saved)
1259 1372 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1260 :
1261 : /*
1262 : * Record postmaster options. We delay this till now to avoid recording
1263 : * bogus options (eg, unusable port number).
1264 : */
1265 1422 : if (!CreateOptsFile(argc, argv, my_exec_path))
1266 0 : ExitPostmaster(1);
1267 :
1268 : /*
1269 : * Write the external PID file if requested
1270 : */
1271 1422 : if (external_pid_file)
1272 : {
1273 0 : FILE *fpidfile = fopen(external_pid_file, "w");
1274 :
1275 0 : if (fpidfile)
1276 : {
1277 0 : fprintf(fpidfile, "%d\n", MyProcPid);
1278 0 : fclose(fpidfile);
1279 :
1280 : /* Make PID file world readable */
1281 0 : if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1282 0 : write_stderr("%s: could not change permissions of external PID file \"%s\": %m\n",
1283 : progname, external_pid_file);
1284 : }
1285 : else
1286 0 : write_stderr("%s: could not write external PID file \"%s\": %m\n",
1287 : progname, external_pid_file);
1288 :
1289 0 : on_proc_exit(unlink_external_pid_file, 0);
1290 : }
1291 :
1292 : /*
1293 : * Remove old temporary files. At this point there can be no other
1294 : * Postgres processes running in this directory, so this should be safe.
1295 : */
1296 1422 : RemovePgTempFiles();
1297 :
1298 : /*
1299 : * Initialize the autovacuum subsystem (again, no process start yet)
1300 : */
1301 1422 : autovac_init();
1302 :
1303 : /*
1304 : * Load configuration files for client authentication.
1305 : */
1306 1422 : if (!load_hba())
1307 : {
1308 : /*
1309 : * It makes no sense to continue if we fail to load the HBA file,
1310 : * since there is no way to connect to the database in this case.
1311 : */
1312 0 : ereport(FATAL,
1313 : /* translator: %s is a configuration file */
1314 : (errmsg("could not load %s", HbaFileName)));
1315 : }
1316 1422 : if (!load_ident())
1317 : {
1318 : /*
1319 : * We can start up without the IDENT file, although it means that you
1320 : * cannot log in using any of the authentication methods that need a
1321 : * user name mapping. load_ident() already logged the details of error
1322 : * to the log.
1323 : */
1324 : }
1325 :
1326 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1327 :
1328 : /*
1329 : * On macOS, libintl replaces setlocale() with a version that calls
1330 : * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1331 : * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1332 : * the process multithreaded. The postmaster calls sigprocmask() and
1333 : * calls fork() without an immediate exec(), both of which have undefined
1334 : * behavior in a multithreaded program. A multithreaded postmaster is the
1335 : * normal case on Windows, which offers neither fork() nor sigprocmask().
1336 : */
1337 : if (pthread_is_threaded_np() != 0)
1338 : ereport(FATAL,
1339 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1340 : errmsg("postmaster became multithreaded during startup"),
1341 : errhint("Set the LC_ALL environment variable to a valid locale.")));
1342 : #endif
1343 :
1344 : /*
1345 : * Remember postmaster startup time
1346 : */
1347 1422 : PgStartTime = GetCurrentTimestamp();
1348 :
1349 : /*
1350 : * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1351 : * see what's happening.
1352 : */
1353 1422 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1354 :
1355 : /* Start bgwriter and checkpointer so they can help with recovery */
1356 1422 : if (CheckpointerPID == 0)
1357 1422 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
1358 1422 : if (BgWriterPID == 0)
1359 1422 : BgWriterPID = StartChildProcess(B_BG_WRITER);
1360 :
1361 : /*
1362 : * We're ready to rock and roll...
1363 : */
1364 1422 : StartupPID = StartChildProcess(B_STARTUP);
1365 : Assert(StartupPID != 0);
1366 1422 : StartupStatus = STARTUP_RUNNING;
1367 1422 : pmState = PM_STARTUP;
1368 :
1369 : /* Some workers may be scheduled to start now */
1370 1422 : maybe_start_bgworkers();
1371 :
1372 1422 : status = ServerLoop();
1373 :
1374 : /*
1375 : * ServerLoop probably shouldn't ever return, but if it does, close down.
1376 : */
1377 0 : ExitPostmaster(status != STATUS_OK);
1378 :
1379 : abort(); /* not reached */
1380 : }
1381 :
1382 :
1383 : /*
1384 : * on_proc_exit callback to close server's listen sockets
1385 : */
1386 : static void
1387 1416 : CloseServerPorts(int status, Datum arg)
1388 : {
1389 : int i;
1390 :
1391 : /*
1392 : * First, explicitly close all the socket FDs. We used to just let this
1393 : * happen implicitly at postmaster exit, but it's better to close them
1394 : * before we remove the postmaster.pid lockfile; otherwise there's a race
1395 : * condition if a new postmaster wants to re-use the TCP port number.
1396 : */
1397 2882 : for (i = 0; i < NumListenSockets; i++)
1398 : {
1399 1466 : if (closesocket(ListenSockets[i]) != 0)
1400 0 : elog(LOG, "could not close listen socket: %m");
1401 : }
1402 1416 : NumListenSockets = 0;
1403 :
1404 : /*
1405 : * Next, remove any filesystem entries for Unix sockets. To avoid race
1406 : * conditions against incoming postmasters, this must happen after closing
1407 : * the sockets and before removing lock files.
1408 : */
1409 1416 : RemoveSocketFiles();
1410 :
1411 : /*
1412 : * We don't do anything about socket lock files here; those will be
1413 : * removed in a later on_proc_exit callback.
1414 : */
1415 1416 : }
1416 :
1417 : /*
1418 : * on_proc_exit callback to delete external_pid_file
1419 : */
1420 : static void
1421 0 : unlink_external_pid_file(int status, Datum arg)
1422 : {
1423 0 : if (external_pid_file)
1424 0 : unlink(external_pid_file);
1425 0 : }
1426 :
1427 :
1428 : /*
1429 : * Compute and check the directory paths to files that are part of the
1430 : * installation (as deduced from the postgres executable's own location)
1431 : */
1432 : static void
1433 1438 : getInstallationPaths(const char *argv0)
1434 : {
1435 : DIR *pdir;
1436 :
1437 : /* Locate the postgres executable itself */
1438 1438 : if (find_my_exec(argv0, my_exec_path) < 0)
1439 0 : ereport(FATAL,
1440 : (errmsg("%s: could not locate my own executable path", argv0)));
1441 :
1442 : #ifdef EXEC_BACKEND
1443 : /* Locate executable backend before we change working directory */
1444 : if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1445 : postgres_exec_path) < 0)
1446 : ereport(FATAL,
1447 : (errmsg("%s: could not locate matching postgres executable",
1448 : argv0)));
1449 : #endif
1450 :
1451 : /*
1452 : * Locate the pkglib directory --- this has to be set early in case we try
1453 : * to load any modules from it in response to postgresql.conf entries.
1454 : */
1455 1438 : get_pkglib_path(my_exec_path, pkglib_path);
1456 :
1457 : /*
1458 : * Verify that there's a readable directory there; otherwise the Postgres
1459 : * installation is incomplete or corrupt. (A typical cause of this
1460 : * failure is that the postgres executable has been moved or hardlinked to
1461 : * some directory that's not a sibling of the installation lib/
1462 : * directory.)
1463 : */
1464 1438 : pdir = AllocateDir(pkglib_path);
1465 1438 : if (pdir == NULL)
1466 0 : ereport(ERROR,
1467 : (errcode_for_file_access(),
1468 : errmsg("could not open directory \"%s\": %m",
1469 : pkglib_path),
1470 : errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1471 : my_exec_path)));
1472 1438 : FreeDir(pdir);
1473 :
1474 : /*
1475 : * It's not worth checking the share/ directory. If the lib/ directory is
1476 : * there, then share/ probably is too.
1477 : */
1478 1438 : }
1479 :
1480 : /*
1481 : * Check that pg_control exists in the correct location in the data directory.
1482 : *
1483 : * No attempt is made to validate the contents of pg_control here. This is
1484 : * just a sanity check to see if we are looking at a real data directory.
1485 : */
1486 : static void
1487 1434 : checkControlFile(void)
1488 : {
1489 : char path[MAXPGPATH];
1490 : FILE *fp;
1491 :
1492 1434 : snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1493 :
1494 1434 : fp = AllocateFile(path, PG_BINARY_R);
1495 1434 : if (fp == NULL)
1496 : {
1497 0 : write_stderr("%s: could not find the database system\n"
1498 : "Expected to find it in the directory \"%s\",\n"
1499 : "but could not open file \"%s\": %m\n",
1500 : progname, DataDir, path);
1501 0 : ExitPostmaster(2);
1502 : }
1503 1434 : FreeFile(fp);
1504 1434 : }
1505 :
1506 : /*
1507 : * Determine how long should we let ServerLoop sleep, in milliseconds.
1508 : *
1509 : * In normal conditions we wait at most one minute, to ensure that the other
1510 : * background tasks handled by ServerLoop get done even when no requests are
1511 : * arriving. However, if there are background workers waiting to be started,
1512 : * we don't actually sleep so that they are quickly serviced. Other exception
1513 : * cases are as shown in the code.
1514 : */
1515 : static int
1516 58924 : DetermineSleepTime(void)
1517 : {
1518 58924 : TimestampTz next_wakeup = 0;
1519 :
1520 : /*
1521 : * Normal case: either there are no background workers at all, or we're in
1522 : * a shutdown sequence (during which we ignore bgworkers altogether).
1523 : */
1524 58924 : if (Shutdown > NoShutdown ||
1525 52454 : (!StartWorkerNeeded && !HaveCrashedWorker))
1526 : {
1527 58924 : if (AbortStartTime != 0)
1528 : {
1529 : int seconds;
1530 :
1531 : /* time left to abort; clamp to 0 in case it already expired */
1532 2126 : seconds = SIGKILL_CHILDREN_AFTER_SECS -
1533 2126 : (time(NULL) - AbortStartTime);
1534 :
1535 2126 : return Max(seconds * 1000, 0);
1536 : }
1537 : else
1538 56798 : return 60 * 1000;
1539 : }
1540 :
1541 0 : if (StartWorkerNeeded)
1542 0 : return 0;
1543 :
1544 0 : if (HaveCrashedWorker)
1545 : {
1546 : slist_mutable_iter siter;
1547 :
1548 : /*
1549 : * When there are crashed bgworkers, we sleep just long enough that
1550 : * they are restarted when they request to be. Scan the list to
1551 : * determine the minimum of all wakeup times according to most recent
1552 : * crash time and requested restart interval.
1553 : */
1554 0 : slist_foreach_modify(siter, &BackgroundWorkerList)
1555 : {
1556 : RegisteredBgWorker *rw;
1557 : TimestampTz this_wakeup;
1558 :
1559 0 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1560 :
1561 0 : if (rw->rw_crashed_at == 0)
1562 0 : continue;
1563 :
1564 0 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1565 0 : || rw->rw_terminate)
1566 : {
1567 0 : ForgetBackgroundWorker(&siter);
1568 0 : continue;
1569 : }
1570 :
1571 0 : this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1572 : 1000L * rw->rw_worker.bgw_restart_time);
1573 0 : if (next_wakeup == 0 || this_wakeup < next_wakeup)
1574 0 : next_wakeup = this_wakeup;
1575 : }
1576 : }
1577 :
1578 0 : if (next_wakeup != 0)
1579 : {
1580 : int ms;
1581 :
1582 : /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1583 0 : ms = (int) TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
1584 : next_wakeup);
1585 0 : return Min(60 * 1000, ms);
1586 : }
1587 :
1588 0 : return 60 * 1000;
1589 : }
1590 :
1591 : /*
1592 : * Activate or deactivate notifications of server socket events. Since we
1593 : * don't currently have a way to remove events from an existing WaitEventSet,
1594 : * we'll just destroy and recreate the whole thing. This is called during
1595 : * shutdown so we can wait for backends to exit without accepting new
1596 : * connections, and during crash reinitialization when we need to start
1597 : * listening for new connections again. The WaitEventSet will be freed in fork
1598 : * children by ClosePostmasterPorts().
1599 : */
1600 : static void
1601 2886 : ConfigurePostmasterWaitSet(bool accept_connections)
1602 : {
1603 2886 : if (pm_wait_set)
1604 1464 : FreeWaitEventSet(pm_wait_set);
1605 2886 : pm_wait_set = NULL;
1606 :
1607 4316 : pm_wait_set = CreateWaitEventSet(NULL,
1608 1430 : accept_connections ? (1 + NumListenSockets) : 1);
1609 2886 : AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
1610 : NULL);
1611 :
1612 2886 : if (accept_connections)
1613 : {
1614 2910 : for (int i = 0; i < NumListenSockets; i++)
1615 1480 : AddWaitEventToSet(pm_wait_set, WL_SOCKET_ACCEPT, ListenSockets[i],
1616 : NULL, NULL);
1617 : }
1618 2886 : }
1619 :
1620 : /*
1621 : * Main idle loop of postmaster
1622 : */
1623 : static int
1624 1422 : ServerLoop(void)
1625 : {
1626 : time_t last_lockfile_recheck_time,
1627 : last_touch_time;
1628 : WaitEvent events[MAXLISTEN];
1629 : int nevents;
1630 :
1631 1422 : ConfigurePostmasterWaitSet(true);
1632 1422 : last_lockfile_recheck_time = last_touch_time = time(NULL);
1633 :
1634 : for (;;)
1635 57502 : {
1636 : time_t now;
1637 :
1638 58924 : nevents = WaitEventSetWait(pm_wait_set,
1639 58924 : DetermineSleepTime(),
1640 : events,
1641 : lengthof(events),
1642 : 0 /* postmaster posts no wait_events */ );
1643 :
1644 : /*
1645 : * Latch set by signal handler, or new connection pending on any of
1646 : * our sockets? If the latter, fork a child process to deal with it.
1647 : */
1648 116426 : for (int i = 0; i < nevents; i++)
1649 : {
1650 58924 : if (events[i].events & WL_LATCH_SET)
1651 37552 : ResetLatch(MyLatch);
1652 :
1653 : /*
1654 : * The following requests are handled unconditionally, even if we
1655 : * didn't see WL_LATCH_SET. This gives high priority to shutdown
1656 : * and reload requests where the latch happens to appear later in
1657 : * events[] or will be reported by a later call to
1658 : * WaitEventSetWait().
1659 : */
1660 58924 : if (pending_pm_shutdown_request)
1661 1410 : process_pm_shutdown_request();
1662 58924 : if (pending_pm_reload_request)
1663 246 : process_pm_reload_request();
1664 58924 : if (pending_pm_child_exit)
1665 30856 : process_pm_child_exit();
1666 57504 : if (pending_pm_pmsignal)
1667 5216 : process_pm_pmsignal();
1668 :
1669 57504 : if (events[i].events & WL_SOCKET_ACCEPT)
1670 : {
1671 : ClientSocket s;
1672 :
1673 21372 : if (AcceptConnection(events[i].fd, &s) == STATUS_OK)
1674 21372 : BackendStartup(&s);
1675 :
1676 : /* We no longer need the open socket in this process */
1677 21370 : if (s.sock != PGINVALID_SOCKET)
1678 : {
1679 21370 : if (closesocket(s.sock) != 0)
1680 0 : elog(LOG, "could not close client socket: %m");
1681 : }
1682 : }
1683 : }
1684 :
1685 : /* If we have lost the log collector, try to start a new one */
1686 57502 : if (SysLoggerPID == 0 && Logging_collector)
1687 0 : SysLoggerPID = SysLogger_Start();
1688 :
1689 : /*
1690 : * If no background writer process is running, and we are not in a
1691 : * state that prevents it, start one. It doesn't matter if this
1692 : * fails, we'll just try again later. Likewise for the checkpointer.
1693 : */
1694 57502 : if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1695 9840 : pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
1696 : {
1697 51020 : if (CheckpointerPID == 0)
1698 8 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
1699 51020 : if (BgWriterPID == 0)
1700 8 : BgWriterPID = StartChildProcess(B_BG_WRITER);
1701 : }
1702 :
1703 : /*
1704 : * Likewise, if we have lost the walwriter process, try to start a new
1705 : * one. But this is needed only in normal operation (else we cannot
1706 : * be writing any new WAL).
1707 : */
1708 57502 : if (WalWriterPID == 0 && pmState == PM_RUN)
1709 0 : WalWriterPID = StartChildProcess(B_WAL_WRITER);
1710 :
1711 : /*
1712 : * If we have lost the autovacuum launcher, try to start a new one. We
1713 : * don't want autovacuum to run in binary upgrade mode because
1714 : * autovacuum might update relfrozenxid for empty tables before the
1715 : * physical files are put in place.
1716 : */
1717 68248 : if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1718 16124 : (AutoVacuumingActive() || start_autovac_launcher) &&
1719 5368 : pmState == PM_RUN)
1720 : {
1721 0 : AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER);
1722 0 : if (AutoVacPID != 0)
1723 0 : start_autovac_launcher = false; /* signal processed */
1724 : }
1725 :
1726 : /* If we have lost the archiver, try to start a new one. */
1727 57502 : if (PgArchPID == 0 && PgArchStartupAllowed())
1728 0 : PgArchPID = StartChildProcess(B_ARCHIVER);
1729 :
1730 : /* If we need to start a slot sync worker, try to do that now */
1731 57502 : MaybeStartSlotSyncWorker();
1732 :
1733 : /* If we need to signal the autovacuum launcher, do so now */
1734 57502 : if (avlauncher_needs_signal)
1735 : {
1736 0 : avlauncher_needs_signal = false;
1737 0 : if (AutoVacPID != 0)
1738 0 : kill(AutoVacPID, SIGUSR2);
1739 : }
1740 :
1741 : /* If we need to start a WAL receiver, try to do that now */
1742 57502 : if (WalReceiverRequested)
1743 312 : MaybeStartWalReceiver();
1744 :
1745 : /* If we need to start a WAL summarizer, try to do that now */
1746 57502 : MaybeStartWalSummarizer();
1747 :
1748 : /* Get other worker processes running, if needed */
1749 57502 : if (StartWorkerNeeded || HaveCrashedWorker)
1750 5948 : maybe_start_bgworkers();
1751 :
1752 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1753 :
1754 : /*
1755 : * With assertions enabled, check regularly for appearance of
1756 : * additional threads. All builds check at start and exit.
1757 : */
1758 : Assert(pthread_is_threaded_np() == 0);
1759 : #endif
1760 :
1761 : /*
1762 : * Lastly, check to see if it's time to do some things that we don't
1763 : * want to do every single time through the loop, because they're a
1764 : * bit expensive. Note that there's up to a minute of slop in when
1765 : * these tasks will be performed, since DetermineSleepTime() will let
1766 : * us sleep at most that long; except for SIGKILL timeout which has
1767 : * special-case logic there.
1768 : */
1769 57502 : now = time(NULL);
1770 :
1771 : /*
1772 : * If we already sent SIGQUIT to children and they are slow to shut
1773 : * down, it's time to send them SIGKILL (or SIGABRT if requested).
1774 : * This doesn't happen normally, but under certain conditions backends
1775 : * can get stuck while shutting down. This is a last measure to get
1776 : * them unwedged.
1777 : *
1778 : * Note we also do this during recovery from a process crash.
1779 : */
1780 57502 : if ((Shutdown >= ImmediateShutdown || FatalError) &&
1781 2134 : AbortStartTime != 0 &&
1782 2126 : (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1783 : {
1784 : /* We were gentle with them before. Not anymore */
1785 0 : ereport(LOG,
1786 : /* translator: %s is SIGKILL or SIGABRT */
1787 : (errmsg("issuing %s to recalcitrant children",
1788 : send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1789 0 : TerminateChildren(send_abort_for_kill ? SIGABRT : SIGKILL);
1790 : /* reset flag so we don't SIGKILL again */
1791 0 : AbortStartTime = 0;
1792 : }
1793 :
1794 : /*
1795 : * Once a minute, verify that postmaster.pid hasn't been removed or
1796 : * overwritten. If it has, we force a shutdown. This avoids having
1797 : * postmasters and child processes hanging around after their database
1798 : * is gone, and maybe causing problems if a new database cluster is
1799 : * created in the same place. It also provides some protection
1800 : * against a DBA foolishly removing postmaster.pid and manually
1801 : * starting a new postmaster. Data corruption is likely to ensue from
1802 : * that anyway, but we can minimize the damage by aborting ASAP.
1803 : */
1804 57502 : if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1805 : {
1806 8 : if (!RecheckDataDirLockFile())
1807 : {
1808 0 : ereport(LOG,
1809 : (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1810 0 : kill(MyProcPid, SIGQUIT);
1811 : }
1812 8 : last_lockfile_recheck_time = now;
1813 : }
1814 :
1815 : /*
1816 : * Touch Unix socket and lock files every 58 minutes, to ensure that
1817 : * they are not removed by overzealous /tmp-cleaning tasks. We assume
1818 : * no one runs cleaners with cutoff times of less than an hour ...
1819 : */
1820 57502 : if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1821 : {
1822 0 : TouchSocketFiles();
1823 0 : TouchSocketLockFiles();
1824 0 : last_touch_time = now;
1825 : }
1826 : }
1827 : }
1828 :
1829 : /*
1830 : * The client has sent a cancel request packet, not a normal
1831 : * start-a-new-connection packet. Perform the necessary processing.
1832 : * Nothing is sent back to the client.
1833 : */
1834 : void
1835 20 : processCancelRequest(int backendPID, int32 cancelAuthCode)
1836 : {
1837 : Backend *bp;
1838 :
1839 : #ifndef EXEC_BACKEND
1840 : dlist_iter iter;
1841 : #else
1842 : int i;
1843 : #endif
1844 :
1845 : /*
1846 : * See if we have a matching backend. In the EXEC_BACKEND case, we can no
1847 : * longer access the postmaster's own backend list, and must rely on the
1848 : * duplicate array in shared memory.
1849 : */
1850 : #ifndef EXEC_BACKEND
1851 42 : dlist_foreach(iter, &BackendList)
1852 : {
1853 42 : bp = dlist_container(Backend, elem, iter.cur);
1854 : #else
1855 : for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
1856 : {
1857 : bp = (Backend *) &ShmemBackendArray[i];
1858 : #endif
1859 42 : if (bp->pid == backendPID)
1860 : {
1861 20 : if (bp->cancel_key == cancelAuthCode)
1862 : {
1863 : /* Found a match; signal that backend to cancel current op */
1864 20 : ereport(DEBUG2,
1865 : (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1866 : backendPID)));
1867 20 : signal_child(bp->pid, SIGINT);
1868 : }
1869 : else
1870 : /* Right PID, wrong key: no way, Jose */
1871 0 : ereport(LOG,
1872 : (errmsg("wrong key in cancel request for process %d",
1873 : backendPID)));
1874 20 : return;
1875 : }
1876 : #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
1877 : }
1878 : #else
1879 : }
1880 : #endif
1881 :
1882 : /* No matching backend */
1883 0 : ereport(LOG,
1884 : (errmsg("PID %d in cancel request did not match any process",
1885 : backendPID)));
1886 : }
1887 :
1888 : /*
1889 : * canAcceptConnections --- check to see if database state allows connections
1890 : * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
1891 : * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
1892 : * know whether a NORMAL connection might turn into a walsender.)
1893 : */
1894 : static CAC_state
1895 26032 : canAcceptConnections(int backend_type)
1896 : {
1897 26032 : CAC_state result = CAC_OK;
1898 :
1899 : /*
1900 : * Can't start backends when in startup/shutdown/inconsistent recovery
1901 : * state. We treat autovac workers the same as user backends for this
1902 : * purpose. However, bgworkers are excluded from this test; we expect
1903 : * bgworker_should_start_now() decided whether the DB state allows them.
1904 : */
1905 26032 : if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
1906 : backend_type != BACKEND_TYPE_BGWORKER)
1907 : {
1908 198 : if (Shutdown > NoShutdown)
1909 4 : return CAC_SHUTDOWN; /* shutdown is pending */
1910 194 : else if (!FatalError && pmState == PM_STARTUP)
1911 186 : return CAC_STARTUP; /* normal startup */
1912 8 : else if (!FatalError && pmState == PM_RECOVERY)
1913 8 : return CAC_NOTCONSISTENT; /* not yet at consistent recovery
1914 : * state */
1915 : else
1916 0 : return CAC_RECOVERY; /* else must be crash recovery */
1917 : }
1918 :
1919 : /*
1920 : * "Smart shutdown" restrictions are applied only to normal connections,
1921 : * not to autovac workers or bgworkers.
1922 : */
1923 25834 : if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
1924 0 : return CAC_SHUTDOWN; /* shutdown is pending */
1925 :
1926 : /*
1927 : * Don't start too many children.
1928 : *
1929 : * We allow more connections here than we can have backends because some
1930 : * might still be authenticating; they might fail auth, or some existing
1931 : * backend might exit before the auth cycle is completed. The exact
1932 : * MaxBackends limit is enforced when a new backend tries to join the
1933 : * shared-inval backend array.
1934 : *
1935 : * The limit here must match the sizes of the per-child-process arrays;
1936 : * see comments for MaxLivePostmasterChildren().
1937 : */
1938 25834 : if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
1939 0 : result = CAC_TOOMANY;
1940 :
1941 25834 : return result;
1942 : }
1943 :
1944 : /*
1945 : * ClosePostmasterPorts -- close all the postmaster's open sockets
1946 : *
1947 : * This is called during child process startup to release file descriptors
1948 : * that are not needed by that child process. The postmaster still has
1949 : * them open, of course.
1950 : *
1951 : * Note: we pass am_syslogger as a boolean because we don't want to set
1952 : * the global variable yet when this is called.
1953 : */
1954 : void
1955 29942 : ClosePostmasterPorts(bool am_syslogger)
1956 : {
1957 : /* Release resources held by the postmaster's WaitEventSet. */
1958 29942 : if (pm_wait_set)
1959 : {
1960 26958 : FreeWaitEventSetAfterFork(pm_wait_set);
1961 26958 : pm_wait_set = NULL;
1962 : }
1963 :
1964 : #ifndef WIN32
1965 :
1966 : /*
1967 : * Close the write end of postmaster death watch pipe. It's important to
1968 : * do this as early as possible, so that if postmaster dies, others won't
1969 : * think that it's still running because we're holding the pipe open.
1970 : */
1971 29942 : if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0)
1972 0 : ereport(FATAL,
1973 : (errcode_for_file_access(),
1974 : errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
1975 29942 : postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
1976 : /* Notify fd.c that we released one pipe FD. */
1977 29942 : ReleaseExternalFD();
1978 : #endif
1979 :
1980 : /*
1981 : * Close the postmaster's listen sockets. These aren't tracked by fd.c,
1982 : * so we don't call ReleaseExternalFD() here.
1983 : *
1984 : * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
1985 : * EXEC_BACKEND mode.
1986 : */
1987 : #ifndef EXEC_BACKEND
1988 29942 : if (ListenSockets)
1989 : {
1990 60666 : for (int i = 0; i < NumListenSockets; i++)
1991 : {
1992 30726 : if (closesocket(ListenSockets[i]) != 0)
1993 0 : elog(LOG, "could not close listen socket: %m");
1994 : }
1995 29940 : pfree(ListenSockets);
1996 : }
1997 29942 : NumListenSockets = 0;
1998 29942 : ListenSockets = NULL;
1999 : #endif
2000 :
2001 : /*
2002 : * If using syslogger, close the read side of the pipe. We don't bother
2003 : * tracking this in fd.c, either.
2004 : */
2005 29942 : if (!am_syslogger)
2006 : {
2007 : #ifndef WIN32
2008 29940 : if (syslogPipe[0] >= 0)
2009 28 : close(syslogPipe[0]);
2010 29940 : syslogPipe[0] = -1;
2011 : #else
2012 : if (syslogPipe[0])
2013 : CloseHandle(syslogPipe[0]);
2014 : syslogPipe[0] = 0;
2015 : #endif
2016 : }
2017 :
2018 : #ifdef USE_BONJOUR
2019 : /* If using Bonjour, close the connection to the mDNS daemon */
2020 : if (bonjour_sdref)
2021 : close(DNSServiceRefSockFD(bonjour_sdref));
2022 : #endif
2023 29942 : }
2024 :
2025 :
2026 : /*
2027 : * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2028 : *
2029 : * Called early in the postmaster and every backend.
2030 : */
2031 : void
2032 31772 : InitProcessGlobals(void)
2033 : {
2034 31772 : MyProcPid = getpid();
2035 31772 : MyStartTimestamp = GetCurrentTimestamp();
2036 31772 : MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2037 :
2038 : /*
2039 : * Set a different global seed in every process. We want something
2040 : * unpredictable, so if possible, use high-quality random bits for the
2041 : * seed. Otherwise, fall back to a seed based on timestamp and PID.
2042 : */
2043 31772 : if (unlikely(!pg_prng_strong_seed(&pg_global_prng_state)))
2044 : {
2045 : uint64 rseed;
2046 :
2047 : /*
2048 : * Since PIDs and timestamps tend to change more frequently in their
2049 : * least significant bits, shift the timestamp left to allow a larger
2050 : * total number of seeds in a given time period. Since that would
2051 : * leave only 20 bits of the timestamp that cycle every ~1 second,
2052 : * also mix in some higher bits.
2053 : */
2054 0 : rseed = ((uint64) MyProcPid) ^
2055 0 : ((uint64) MyStartTimestamp << 12) ^
2056 0 : ((uint64) MyStartTimestamp >> 20);
2057 :
2058 0 : pg_prng_seed(&pg_global_prng_state, rseed);
2059 : }
2060 :
2061 : /*
2062 : * Also make sure that we've set a good seed for random(3). Use of that
2063 : * is deprecated in core Postgres, but extensions might use it.
2064 : */
2065 : #ifndef WIN32
2066 31772 : srandom(pg_prng_uint32(&pg_global_prng_state));
2067 : #endif
2068 31772 : }
2069 :
2070 : /*
2071 : * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2072 : * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2073 : */
2074 : static void
2075 5420 : handle_pm_pmsignal_signal(SIGNAL_ARGS)
2076 : {
2077 5420 : pending_pm_pmsignal = true;
2078 5420 : SetLatch(MyLatch);
2079 5420 : }
2080 :
2081 : /*
2082 : * pg_ctl uses SIGHUP to request a reload of the configuration files.
2083 : */
2084 : static void
2085 246 : handle_pm_reload_request_signal(SIGNAL_ARGS)
2086 : {
2087 246 : pending_pm_reload_request = true;
2088 246 : SetLatch(MyLatch);
2089 246 : }
2090 :
2091 : /*
2092 : * Re-read config files, and tell children to do same.
2093 : */
2094 : static void
2095 246 : process_pm_reload_request(void)
2096 : {
2097 246 : pending_pm_reload_request = false;
2098 :
2099 246 : ereport(DEBUG2,
2100 : (errmsg_internal("postmaster received reload request signal")));
2101 :
2102 246 : if (Shutdown <= SmartShutdown)
2103 : {
2104 246 : ereport(LOG,
2105 : (errmsg("received SIGHUP, reloading configuration files")));
2106 246 : ProcessConfigFile(PGC_SIGHUP);
2107 246 : SignalChildren(SIGHUP);
2108 246 : if (StartupPID != 0)
2109 54 : signal_child(StartupPID, SIGHUP);
2110 246 : if (BgWriterPID != 0)
2111 246 : signal_child(BgWriterPID, SIGHUP);
2112 246 : if (CheckpointerPID != 0)
2113 246 : signal_child(CheckpointerPID, SIGHUP);
2114 246 : if (WalWriterPID != 0)
2115 192 : signal_child(WalWriterPID, SIGHUP);
2116 246 : if (WalReceiverPID != 0)
2117 48 : signal_child(WalReceiverPID, SIGHUP);
2118 246 : if (WalSummarizerPID != 0)
2119 0 : signal_child(WalSummarizerPID, SIGHUP);
2120 246 : if (AutoVacPID != 0)
2121 174 : signal_child(AutoVacPID, SIGHUP);
2122 246 : if (PgArchPID != 0)
2123 8 : signal_child(PgArchPID, SIGHUP);
2124 246 : if (SysLoggerPID != 0)
2125 0 : signal_child(SysLoggerPID, SIGHUP);
2126 246 : if (SlotSyncWorkerPID != 0)
2127 2 : signal_child(SlotSyncWorkerPID, SIGHUP);
2128 :
2129 : /* Reload authentication config files too */
2130 246 : if (!load_hba())
2131 0 : ereport(LOG,
2132 : /* translator: %s is a configuration file */
2133 : (errmsg("%s was not reloaded", HbaFileName)));
2134 :
2135 246 : if (!load_ident())
2136 0 : ereport(LOG,
2137 : (errmsg("%s was not reloaded", IdentFileName)));
2138 :
2139 : #ifdef USE_SSL
2140 : /* Reload SSL configuration as well */
2141 246 : if (EnableSSL)
2142 : {
2143 0 : if (secure_initialize(false) == 0)
2144 0 : LoadedSSL = true;
2145 : else
2146 0 : ereport(LOG,
2147 : (errmsg("SSL configuration was not reloaded")));
2148 : }
2149 : else
2150 : {
2151 246 : secure_destroy();
2152 246 : LoadedSSL = false;
2153 : }
2154 : #endif
2155 :
2156 : #ifdef EXEC_BACKEND
2157 : /* Update the starting-point file for future children */
2158 : write_nondefault_variables(PGC_SIGHUP);
2159 : #endif
2160 : }
2161 246 : }
2162 :
2163 : /*
2164 : * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2165 : * shutdown.
2166 : */
2167 : static void
2168 1410 : handle_pm_shutdown_request_signal(SIGNAL_ARGS)
2169 : {
2170 1410 : switch (postgres_signal_arg)
2171 : {
2172 34 : case SIGTERM:
2173 : /* smart is implied if the other two flags aren't set */
2174 34 : pending_pm_shutdown_request = true;
2175 34 : break;
2176 788 : case SIGINT:
2177 788 : pending_pm_fast_shutdown_request = true;
2178 788 : pending_pm_shutdown_request = true;
2179 788 : break;
2180 588 : case SIGQUIT:
2181 588 : pending_pm_immediate_shutdown_request = true;
2182 588 : pending_pm_shutdown_request = true;
2183 588 : break;
2184 : }
2185 1410 : SetLatch(MyLatch);
2186 1410 : }
2187 :
2188 : /*
2189 : * Process shutdown request.
2190 : */
2191 : static void
2192 1410 : process_pm_shutdown_request(void)
2193 : {
2194 : int mode;
2195 :
2196 1410 : ereport(DEBUG2,
2197 : (errmsg_internal("postmaster received shutdown request signal")));
2198 :
2199 1410 : pending_pm_shutdown_request = false;
2200 :
2201 : /*
2202 : * If more than one shutdown request signal arrived since the last server
2203 : * loop, take the one that is the most immediate. That matches the
2204 : * priority that would apply if we processed them one by one in any order.
2205 : */
2206 1410 : if (pending_pm_immediate_shutdown_request)
2207 : {
2208 588 : pending_pm_immediate_shutdown_request = false;
2209 588 : pending_pm_fast_shutdown_request = false;
2210 588 : mode = ImmediateShutdown;
2211 : }
2212 822 : else if (pending_pm_fast_shutdown_request)
2213 : {
2214 788 : pending_pm_fast_shutdown_request = false;
2215 788 : mode = FastShutdown;
2216 : }
2217 : else
2218 34 : mode = SmartShutdown;
2219 :
2220 1410 : switch (mode)
2221 : {
2222 34 : case SmartShutdown:
2223 :
2224 : /*
2225 : * Smart Shutdown:
2226 : *
2227 : * Wait for children to end their work, then shut down.
2228 : */
2229 34 : if (Shutdown >= SmartShutdown)
2230 0 : break;
2231 34 : Shutdown = SmartShutdown;
2232 34 : ereport(LOG,
2233 : (errmsg("received smart shutdown request")));
2234 :
2235 : /* Report status */
2236 34 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2237 : #ifdef USE_SYSTEMD
2238 : sd_notify(0, "STOPPING=1");
2239 : #endif
2240 :
2241 : /*
2242 : * If we reached normal running, we go straight to waiting for
2243 : * client backends to exit. If already in PM_STOP_BACKENDS or a
2244 : * later state, do not change it.
2245 : */
2246 34 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2247 34 : connsAllowed = false;
2248 0 : else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2249 : {
2250 : /* There should be no clients, so proceed to stop children */
2251 0 : pmState = PM_STOP_BACKENDS;
2252 : }
2253 :
2254 : /*
2255 : * Now wait for online backup mode to end and backends to exit. If
2256 : * that is already the case, PostmasterStateMachine will take the
2257 : * next step.
2258 : */
2259 34 : PostmasterStateMachine();
2260 34 : break;
2261 :
2262 788 : case FastShutdown:
2263 :
2264 : /*
2265 : * Fast Shutdown:
2266 : *
2267 : * Abort all children with SIGTERM (rollback active transactions
2268 : * and exit) and shut down when they are gone.
2269 : */
2270 788 : if (Shutdown >= FastShutdown)
2271 0 : break;
2272 788 : Shutdown = FastShutdown;
2273 788 : ereport(LOG,
2274 : (errmsg("received fast shutdown request")));
2275 :
2276 : /* Report status */
2277 788 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2278 : #ifdef USE_SYSTEMD
2279 : sd_notify(0, "STOPPING=1");
2280 : #endif
2281 :
2282 788 : if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2283 : {
2284 : /* Just shut down background processes silently */
2285 0 : pmState = PM_STOP_BACKENDS;
2286 : }
2287 788 : else if (pmState == PM_RUN ||
2288 94 : pmState == PM_HOT_STANDBY)
2289 : {
2290 : /* Report that we're about to zap live client sessions */
2291 788 : ereport(LOG,
2292 : (errmsg("aborting any active transactions")));
2293 788 : pmState = PM_STOP_BACKENDS;
2294 : }
2295 :
2296 : /*
2297 : * PostmasterStateMachine will issue any necessary signals, or
2298 : * take the next step if no child processes need to be killed.
2299 : */
2300 788 : PostmasterStateMachine();
2301 788 : break;
2302 :
2303 588 : case ImmediateShutdown:
2304 :
2305 : /*
2306 : * Immediate Shutdown:
2307 : *
2308 : * abort all children with SIGQUIT, wait for them to exit,
2309 : * terminate remaining ones with SIGKILL, then exit without
2310 : * attempt to properly shut down the data base system.
2311 : */
2312 588 : if (Shutdown >= ImmediateShutdown)
2313 0 : break;
2314 588 : Shutdown = ImmediateShutdown;
2315 588 : ereport(LOG,
2316 : (errmsg("received immediate shutdown request")));
2317 :
2318 : /* Report status */
2319 588 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2320 : #ifdef USE_SYSTEMD
2321 : sd_notify(0, "STOPPING=1");
2322 : #endif
2323 :
2324 : /* tell children to shut down ASAP */
2325 : /* (note we don't apply send_abort_for_crash here) */
2326 588 : SetQuitSignalReason(PMQUIT_FOR_STOP);
2327 588 : TerminateChildren(SIGQUIT);
2328 588 : pmState = PM_WAIT_BACKENDS;
2329 :
2330 : /* set stopwatch for them to die */
2331 588 : AbortStartTime = time(NULL);
2332 :
2333 : /*
2334 : * Now wait for backends to exit. If there are none,
2335 : * PostmasterStateMachine will take the next step.
2336 : */
2337 588 : PostmasterStateMachine();
2338 588 : break;
2339 : }
2340 1410 : }
2341 :
2342 : static void
2343 31114 : handle_pm_child_exit_signal(SIGNAL_ARGS)
2344 : {
2345 31114 : pending_pm_child_exit = true;
2346 31114 : SetLatch(MyLatch);
2347 31114 : }
2348 :
2349 : /*
2350 : * Cleanup after a child process dies.
2351 : */
2352 : static void
2353 30856 : process_pm_child_exit(void)
2354 : {
2355 : int pid; /* process id of dead child process */
2356 : int exitstatus; /* its exit status */
2357 :
2358 30856 : pending_pm_child_exit = false;
2359 :
2360 30856 : ereport(DEBUG4,
2361 : (errmsg_internal("reaping dead processes")));
2362 :
2363 64004 : while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2364 : {
2365 : /*
2366 : * Check if this child was a startup process.
2367 : */
2368 33152 : if (pid == StartupPID)
2369 : {
2370 1430 : StartupPID = 0;
2371 :
2372 : /*
2373 : * Startup process exited in response to a shutdown request (or it
2374 : * completed normally regardless of the shutdown request).
2375 : */
2376 1430 : if (Shutdown > NoShutdown &&
2377 182 : (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2378 : {
2379 94 : StartupStatus = STARTUP_NOT_RUNNING;
2380 94 : pmState = PM_WAIT_BACKENDS;
2381 : /* PostmasterStateMachine logic does the rest */
2382 94 : continue;
2383 : }
2384 :
2385 1336 : if (EXIT_STATUS_3(exitstatus))
2386 : {
2387 0 : ereport(LOG,
2388 : (errmsg("shutdown at recovery target")));
2389 0 : StartupStatus = STARTUP_NOT_RUNNING;
2390 0 : Shutdown = Max(Shutdown, SmartShutdown);
2391 0 : TerminateChildren(SIGTERM);
2392 0 : pmState = PM_WAIT_BACKENDS;
2393 : /* PostmasterStateMachine logic does the rest */
2394 0 : continue;
2395 : }
2396 :
2397 : /*
2398 : * Unexpected exit of startup process (including FATAL exit)
2399 : * during PM_STARTUP is treated as catastrophic. There are no
2400 : * other processes running yet, so we can just exit.
2401 : */
2402 1336 : if (pmState == PM_STARTUP &&
2403 968 : StartupStatus != STARTUP_SIGNALED &&
2404 968 : !EXIT_STATUS_0(exitstatus))
2405 : {
2406 0 : LogChildExit(LOG, _("startup process"),
2407 : pid, exitstatus);
2408 0 : ereport(LOG,
2409 : (errmsg("aborting startup due to startup process failure")));
2410 0 : ExitPostmaster(1);
2411 : }
2412 :
2413 : /*
2414 : * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2415 : * the startup process is catastrophic, so kill other children,
2416 : * and set StartupStatus so we don't try to reinitialize after
2417 : * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2418 : * then we previously sent the startup process a SIGQUIT; so
2419 : * that's probably the reason it died, and we do want to try to
2420 : * restart in that case.
2421 : *
2422 : * This stanza also handles the case where we sent a SIGQUIT
2423 : * during PM_STARTUP due to some dead_end child crashing: in that
2424 : * situation, if the startup process dies on the SIGQUIT, we need
2425 : * to transition to PM_WAIT_BACKENDS state which will allow
2426 : * PostmasterStateMachine to restart the startup process. (On the
2427 : * other hand, the startup process might complete normally, if we
2428 : * were too late with the SIGQUIT. In that case we'll fall
2429 : * through and commence normal operations.)
2430 : */
2431 1336 : if (!EXIT_STATUS_0(exitstatus))
2432 : {
2433 94 : if (StartupStatus == STARTUP_SIGNALED)
2434 : {
2435 88 : StartupStatus = STARTUP_NOT_RUNNING;
2436 88 : if (pmState == PM_STARTUP)
2437 0 : pmState = PM_WAIT_BACKENDS;
2438 : }
2439 : else
2440 6 : StartupStatus = STARTUP_CRASHED;
2441 94 : HandleChildCrash(pid, exitstatus,
2442 94 : _("startup process"));
2443 94 : continue;
2444 : }
2445 :
2446 : /*
2447 : * Startup succeeded, commence normal operations
2448 : */
2449 1242 : StartupStatus = STARTUP_NOT_RUNNING;
2450 1242 : FatalError = false;
2451 1242 : AbortStartTime = 0;
2452 1242 : ReachedNormalRunning = true;
2453 1242 : pmState = PM_RUN;
2454 1242 : connsAllowed = true;
2455 :
2456 : /*
2457 : * Crank up the background tasks, if we didn't do that already
2458 : * when we entered consistent recovery state. It doesn't matter
2459 : * if this fails, we'll just try again later.
2460 : */
2461 1242 : if (CheckpointerPID == 0)
2462 0 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
2463 1242 : if (BgWriterPID == 0)
2464 0 : BgWriterPID = StartChildProcess(B_BG_WRITER);
2465 1242 : if (WalWriterPID == 0)
2466 1242 : WalWriterPID = StartChildProcess(B_WAL_WRITER);
2467 1242 : MaybeStartWalSummarizer();
2468 :
2469 : /*
2470 : * Likewise, start other special children as needed. In a restart
2471 : * situation, some of them may be alive already.
2472 : */
2473 1242 : if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
2474 1086 : AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER);
2475 1242 : if (PgArchStartupAllowed() && PgArchPID == 0)
2476 84 : PgArchPID = StartChildProcess(B_ARCHIVER);
2477 1242 : MaybeStartSlotSyncWorker();
2478 :
2479 : /* workers may be scheduled to start now */
2480 1242 : maybe_start_bgworkers();
2481 :
2482 : /* at this point we are really open for business */
2483 1238 : ereport(LOG,
2484 : (errmsg("database system is ready to accept connections")));
2485 :
2486 : /* Report status */
2487 1238 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
2488 : #ifdef USE_SYSTEMD
2489 : sd_notify(0, "READY=1");
2490 : #endif
2491 :
2492 1238 : continue;
2493 : }
2494 :
2495 : /*
2496 : * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2497 : * one at the next iteration of the postmaster's main loop, if
2498 : * necessary. Any other exit condition is treated as a crash.
2499 : */
2500 31722 : if (pid == BgWriterPID)
2501 : {
2502 1424 : BgWriterPID = 0;
2503 1424 : if (!EXIT_STATUS_0(exitstatus))
2504 602 : HandleChildCrash(pid, exitstatus,
2505 602 : _("background writer process"));
2506 1424 : continue;
2507 : }
2508 :
2509 : /*
2510 : * Was it the checkpointer?
2511 : */
2512 30298 : if (pid == CheckpointerPID)
2513 : {
2514 1424 : CheckpointerPID = 0;
2515 1424 : if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2516 : {
2517 : /*
2518 : * OK, we saw normal exit of the checkpointer after it's been
2519 : * told to shut down. We expect that it wrote a shutdown
2520 : * checkpoint. (If for some reason it didn't, recovery will
2521 : * occur on next postmaster start.)
2522 : *
2523 : * At this point we should have no normal backend children
2524 : * left (else we'd not be in PM_SHUTDOWN state) but we might
2525 : * have dead_end children to wait for.
2526 : *
2527 : * If we have an archiver subprocess, tell it to do a last
2528 : * archive cycle and quit. Likewise, if we have walsender
2529 : * processes, tell them to send any remaining WAL and quit.
2530 : */
2531 : Assert(Shutdown > NoShutdown);
2532 :
2533 : /* Waken archiver for the last time */
2534 822 : if (PgArchPID != 0)
2535 24 : signal_child(PgArchPID, SIGUSR2);
2536 :
2537 : /*
2538 : * Waken walsenders for the last time. No regular backends
2539 : * should be around anymore.
2540 : */
2541 822 : SignalChildren(SIGUSR2);
2542 :
2543 822 : pmState = PM_SHUTDOWN_2;
2544 : }
2545 : else
2546 : {
2547 : /*
2548 : * Any unexpected exit of the checkpointer (including FATAL
2549 : * exit) is treated as a crash.
2550 : */
2551 602 : HandleChildCrash(pid, exitstatus,
2552 602 : _("checkpointer process"));
2553 : }
2554 :
2555 1424 : continue;
2556 : }
2557 :
2558 : /*
2559 : * Was it the wal writer? Normal exit can be ignored; we'll start a
2560 : * new one at the next iteration of the postmaster's main loop, if
2561 : * necessary. Any other exit condition is treated as a crash.
2562 : */
2563 28874 : if (pid == WalWriterPID)
2564 : {
2565 1236 : WalWriterPID = 0;
2566 1236 : if (!EXIT_STATUS_0(exitstatus))
2567 508 : HandleChildCrash(pid, exitstatus,
2568 508 : _("WAL writer process"));
2569 1236 : continue;
2570 : }
2571 :
2572 : /*
2573 : * Was it the wal receiver? If exit status is zero (normal) or one
2574 : * (FATAL exit), we assume everything is all right just like normal
2575 : * backends. (If we need a new wal receiver, we'll start one at the
2576 : * next iteration of the postmaster's main loop.)
2577 : */
2578 27638 : if (pid == WalReceiverPID)
2579 : {
2580 416 : WalReceiverPID = 0;
2581 416 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2582 36 : HandleChildCrash(pid, exitstatus,
2583 36 : _("WAL receiver process"));
2584 416 : continue;
2585 : }
2586 :
2587 : /*
2588 : * Was it the wal summarizer? Normal exit can be ignored; we'll start
2589 : * a new one at the next iteration of the postmaster's main loop, if
2590 : * necessary. Any other exit condition is treated as a crash.
2591 : */
2592 27222 : if (pid == WalSummarizerPID)
2593 : {
2594 24 : WalSummarizerPID = 0;
2595 24 : if (!EXIT_STATUS_0(exitstatus))
2596 24 : HandleChildCrash(pid, exitstatus,
2597 24 : _("WAL summarizer process"));
2598 24 : continue;
2599 : }
2600 :
2601 : /*
2602 : * Was it the autovacuum launcher? Normal exit can be ignored; we'll
2603 : * start a new one at the next iteration of the postmaster's main
2604 : * loop, if necessary. Any other exit condition is treated as a
2605 : * crash.
2606 : */
2607 27198 : if (pid == AutoVacPID)
2608 : {
2609 1080 : AutoVacPID = 0;
2610 1080 : if (!EXIT_STATUS_0(exitstatus))
2611 434 : HandleChildCrash(pid, exitstatus,
2612 434 : _("autovacuum launcher process"));
2613 1080 : continue;
2614 : }
2615 :
2616 : /*
2617 : * Was it the archiver? If exit status is zero (normal) or one (FATAL
2618 : * exit), we assume everything is all right just like normal backends
2619 : * and just try to restart a new one so that we immediately retry
2620 : * archiving remaining files. (If fail, we'll try again in future
2621 : * cycles of the postmaster's main loop.) Unless we were waiting for
2622 : * it to shut down; don't restart it in that case, and
2623 : * PostmasterStateMachine() will advance to the next shutdown step.
2624 : */
2625 26118 : if (pid == PgArchPID)
2626 : {
2627 90 : PgArchPID = 0;
2628 90 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2629 66 : HandleChildCrash(pid, exitstatus,
2630 66 : _("archiver process"));
2631 90 : if (PgArchStartupAllowed())
2632 0 : PgArchPID = StartChildProcess(B_ARCHIVER);
2633 90 : continue;
2634 : }
2635 :
2636 : /* Was it the system logger? If so, try to start a new one */
2637 26028 : if (pid == SysLoggerPID)
2638 : {
2639 0 : SysLoggerPID = 0;
2640 : /* for safety's sake, launch new logger *first* */
2641 0 : SysLoggerPID = SysLogger_Start();
2642 0 : if (!EXIT_STATUS_0(exitstatus))
2643 0 : LogChildExit(LOG, _("system logger process"),
2644 : pid, exitstatus);
2645 0 : continue;
2646 : }
2647 :
2648 : /*
2649 : * Was it the slot sync worker? Normal exit or FATAL exit can be
2650 : * ignored (FATAL can be caused by libpqwalreceiver on receiving
2651 : * shutdown request by the startup process during promotion); we'll
2652 : * start a new one at the next iteration of the postmaster's main
2653 : * loop, if necessary. Any other exit condition is treated as a crash.
2654 : */
2655 26028 : if (pid == SlotSyncWorkerPID)
2656 : {
2657 8 : SlotSyncWorkerPID = 0;
2658 8 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2659 0 : HandleChildCrash(pid, exitstatus,
2660 0 : _("slot sync worker process"));
2661 8 : continue;
2662 : }
2663 :
2664 : /* Was it one of our background workers? */
2665 26020 : if (CleanupBackgroundWorker(pid, exitstatus))
2666 : {
2667 : /* have it be restarted */
2668 4632 : HaveCrashedWorker = true;
2669 4632 : continue;
2670 : }
2671 :
2672 : /*
2673 : * Else do standard backend child cleanup.
2674 : */
2675 21388 : CleanupBackend(pid, exitstatus);
2676 : } /* loop over pending child-death reports */
2677 :
2678 : /*
2679 : * After cleaning out the SIGCHLD queue, see if we have any state changes
2680 : * or actions to make.
2681 : */
2682 30852 : PostmasterStateMachine();
2683 29436 : }
2684 :
2685 : /*
2686 : * Scan the bgworkers list and see if the given PID (which has just stopped
2687 : * or crashed) is in it. Handle its shutdown if so, and return true. If not a
2688 : * bgworker, return false.
2689 : *
2690 : * This is heavily based on CleanupBackend. One important difference is that
2691 : * we don't know yet that the dying process is a bgworker, so we must be silent
2692 : * until we're sure it is.
2693 : */
2694 : static bool
2695 26020 : CleanupBackgroundWorker(int pid,
2696 : int exitstatus) /* child's exit status */
2697 : {
2698 : char namebuf[MAXPGPATH];
2699 : slist_mutable_iter iter;
2700 :
2701 51058 : slist_foreach_modify(iter, &BackgroundWorkerList)
2702 : {
2703 : RegisteredBgWorker *rw;
2704 :
2705 29670 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
2706 :
2707 29670 : if (rw->rw_pid != pid)
2708 25038 : continue;
2709 :
2710 : #ifdef WIN32
2711 : /* see CleanupBackend */
2712 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
2713 : exitstatus = 0;
2714 : #endif
2715 :
2716 4632 : snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
2717 4632 : rw->rw_worker.bgw_type);
2718 :
2719 :
2720 4632 : if (!EXIT_STATUS_0(exitstatus))
2721 : {
2722 : /* Record timestamp, so we know when to restart the worker. */
2723 1576 : rw->rw_crashed_at = GetCurrentTimestamp();
2724 : }
2725 : else
2726 : {
2727 : /* Zero exit status means terminate */
2728 3056 : rw->rw_crashed_at = 0;
2729 3056 : rw->rw_terminate = true;
2730 : }
2731 :
2732 : /*
2733 : * Additionally, just like a backend, any exit status other than 0 or
2734 : * 1 is considered a crash and causes a system-wide restart.
2735 : */
2736 4632 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2737 : {
2738 550 : HandleChildCrash(pid, exitstatus, namebuf);
2739 550 : return true;
2740 : }
2741 :
2742 : /*
2743 : * We must release the postmaster child slot. If the worker failed to
2744 : * do so, it did not clean up after itself, requiring a crash-restart
2745 : * cycle.
2746 : */
2747 4082 : if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
2748 : {
2749 0 : HandleChildCrash(pid, exitstatus, namebuf);
2750 0 : return true;
2751 : }
2752 :
2753 : /* Get it out of the BackendList and clear out remaining data */
2754 4082 : dlist_delete(&rw->rw_backend->elem);
2755 : #ifdef EXEC_BACKEND
2756 : ShmemBackendArrayRemove(rw->rw_backend);
2757 : #endif
2758 :
2759 : /*
2760 : * It's possible that this background worker started some OTHER
2761 : * background worker and asked to be notified when that worker started
2762 : * or stopped. If so, cancel any notifications destined for the
2763 : * now-dead backend.
2764 : */
2765 4082 : if (rw->rw_backend->bgworker_notify)
2766 292 : BackgroundWorkerStopNotifications(rw->rw_pid);
2767 4082 : pfree(rw->rw_backend);
2768 4082 : rw->rw_backend = NULL;
2769 4082 : rw->rw_pid = 0;
2770 4082 : rw->rw_child_slot = 0;
2771 4082 : ReportBackgroundWorkerExit(&iter); /* report child death */
2772 :
2773 4082 : LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
2774 : namebuf, pid, exitstatus);
2775 :
2776 4082 : return true;
2777 : }
2778 :
2779 21388 : return false;
2780 : }
2781 :
2782 : /*
2783 : * CleanupBackend -- cleanup after terminated backend.
2784 : *
2785 : * Remove all local state associated with backend.
2786 : *
2787 : * If you change this, see also CleanupBackgroundWorker.
2788 : */
2789 : static void
2790 21388 : CleanupBackend(int pid,
2791 : int exitstatus) /* child's exit status. */
2792 : {
2793 : dlist_mutable_iter iter;
2794 :
2795 21388 : LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
2796 :
2797 : /*
2798 : * If a backend dies in an ugly way then we must signal all other backends
2799 : * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
2800 : * assume everything is all right and proceed to remove the backend from
2801 : * the active backend list.
2802 : */
2803 :
2804 : #ifdef WIN32
2805 :
2806 : /*
2807 : * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
2808 : * since that sometimes happens under load when the process fails to start
2809 : * properly (long before it starts using shared memory). Microsoft reports
2810 : * it is related to mutex failure:
2811 : * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
2812 : */
2813 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
2814 : {
2815 : LogChildExit(LOG, _("server process"), pid, exitstatus);
2816 : exitstatus = 0;
2817 : }
2818 : #endif
2819 :
2820 21388 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2821 : {
2822 398 : HandleChildCrash(pid, exitstatus, _("server process"));
2823 398 : return;
2824 : }
2825 :
2826 41502 : dlist_foreach_modify(iter, &BackendList)
2827 : {
2828 41502 : Backend *bp = dlist_container(Backend, elem, iter.cur);
2829 :
2830 41502 : if (bp->pid == pid)
2831 : {
2832 20990 : if (!bp->dead_end)
2833 : {
2834 20792 : if (!ReleasePostmasterChildSlot(bp->child_slot))
2835 : {
2836 : /*
2837 : * Uh-oh, the child failed to clean itself up. Treat as a
2838 : * crash after all.
2839 : */
2840 0 : HandleChildCrash(pid, exitstatus, _("server process"));
2841 0 : return;
2842 : }
2843 : #ifdef EXEC_BACKEND
2844 : ShmemBackendArrayRemove(bp);
2845 : #endif
2846 : }
2847 20990 : if (bp->bgworker_notify)
2848 : {
2849 : /*
2850 : * This backend may have been slated to receive SIGUSR1 when
2851 : * some background worker started or stopped. Cancel those
2852 : * notifications, as we don't want to signal PIDs that are not
2853 : * PostgreSQL backends. This gets skipped in the (probably
2854 : * very common) case where the backend has never requested any
2855 : * such notifications.
2856 : */
2857 132 : BackgroundWorkerStopNotifications(bp->pid);
2858 : }
2859 20990 : dlist_delete(iter.cur);
2860 20990 : pfree(bp);
2861 20990 : break;
2862 : }
2863 : }
2864 : }
2865 :
2866 : /*
2867 : * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
2868 : * walwriter, autovacuum, archiver, slot sync worker, or background worker.
2869 : *
2870 : * The objectives here are to clean up our local state about the child
2871 : * process, and to signal all other remaining children to quickdie.
2872 : */
2873 : static void
2874 3314 : HandleChildCrash(int pid, int exitstatus, const char *procname)
2875 : {
2876 : dlist_mutable_iter iter;
2877 : slist_iter siter;
2878 : Backend *bp;
2879 : bool take_action;
2880 :
2881 : /*
2882 : * We only log messages and send signals if this is the first process
2883 : * crash and we're not doing an immediate shutdown; otherwise, we're only
2884 : * here to update postmaster's idea of live processes. If we have already
2885 : * signaled children, nonzero exit status is to be expected, so don't
2886 : * clutter log.
2887 : */
2888 3314 : take_action = !FatalError && Shutdown != ImmediateShutdown;
2889 :
2890 3314 : if (take_action)
2891 : {
2892 14 : LogChildExit(LOG, procname, pid, exitstatus);
2893 14 : ereport(LOG,
2894 : (errmsg("terminating any other active server processes")));
2895 14 : SetQuitSignalReason(PMQUIT_FOR_CRASH);
2896 : }
2897 :
2898 : /* Process background workers. */
2899 6920 : slist_foreach(siter, &BackgroundWorkerList)
2900 : {
2901 : RegisteredBgWorker *rw;
2902 :
2903 3606 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
2904 3606 : if (rw->rw_pid == 0)
2905 1416 : continue; /* not running */
2906 2190 : if (rw->rw_pid == pid)
2907 : {
2908 : /*
2909 : * Found entry for freshly-dead worker, so remove it.
2910 : */
2911 550 : (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
2912 550 : dlist_delete(&rw->rw_backend->elem);
2913 : #ifdef EXEC_BACKEND
2914 : ShmemBackendArrayRemove(rw->rw_backend);
2915 : #endif
2916 550 : pfree(rw->rw_backend);
2917 550 : rw->rw_backend = NULL;
2918 550 : rw->rw_pid = 0;
2919 550 : rw->rw_child_slot = 0;
2920 : /* don't reset crashed_at */
2921 : /* don't report child stop, either */
2922 : /* Keep looping so we can signal remaining workers */
2923 : }
2924 : else
2925 : {
2926 : /*
2927 : * This worker is still alive. Unless we did so already, tell it
2928 : * to commit hara-kiri.
2929 : */
2930 1640 : if (take_action)
2931 8 : sigquit_child(rw->rw_pid);
2932 : }
2933 : }
2934 :
2935 : /* Process regular backends */
2936 7072 : dlist_foreach_modify(iter, &BackendList)
2937 : {
2938 3758 : bp = dlist_container(Backend, elem, iter.cur);
2939 :
2940 3758 : if (bp->pid == pid)
2941 : {
2942 : /*
2943 : * Found entry for freshly-dead backend, so remove it.
2944 : */
2945 398 : if (!bp->dead_end)
2946 : {
2947 398 : (void) ReleasePostmasterChildSlot(bp->child_slot);
2948 : #ifdef EXEC_BACKEND
2949 : ShmemBackendArrayRemove(bp);
2950 : #endif
2951 : }
2952 398 : dlist_delete(iter.cur);
2953 398 : pfree(bp);
2954 : /* Keep looping so we can signal remaining backends */
2955 : }
2956 : else
2957 : {
2958 : /*
2959 : * This backend is still alive. Unless we did so already, tell it
2960 : * to commit hara-kiri.
2961 : *
2962 : * We could exclude dead_end children here, but at least when
2963 : * sending SIGABRT it seems better to include them.
2964 : *
2965 : * Background workers were already processed above; ignore them
2966 : * here.
2967 : */
2968 3360 : if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
2969 1640 : continue;
2970 :
2971 1720 : if (take_action)
2972 8 : sigquit_child(bp->pid);
2973 : }
2974 : }
2975 :
2976 : /* Take care of the startup process too */
2977 3314 : if (pid == StartupPID)
2978 : {
2979 0 : StartupPID = 0;
2980 : /* Caller adjusts StartupStatus, so don't touch it here */
2981 : }
2982 3314 : else if (StartupPID != 0 && take_action)
2983 : {
2984 0 : sigquit_child(StartupPID);
2985 0 : StartupStatus = STARTUP_SIGNALED;
2986 : }
2987 :
2988 : /* Take care of the bgwriter too */
2989 3314 : if (pid == BgWriterPID)
2990 0 : BgWriterPID = 0;
2991 3314 : else if (BgWriterPID != 0 && take_action)
2992 14 : sigquit_child(BgWriterPID);
2993 :
2994 : /* Take care of the checkpointer too */
2995 3314 : if (pid == CheckpointerPID)
2996 0 : CheckpointerPID = 0;
2997 3314 : else if (CheckpointerPID != 0 && take_action)
2998 14 : sigquit_child(CheckpointerPID);
2999 :
3000 : /* Take care of the walwriter too */
3001 3314 : if (pid == WalWriterPID)
3002 0 : WalWriterPID = 0;
3003 3314 : else if (WalWriterPID != 0 && take_action)
3004 8 : sigquit_child(WalWriterPID);
3005 :
3006 : /* Take care of the walreceiver too */
3007 3314 : if (pid == WalReceiverPID)
3008 0 : WalReceiverPID = 0;
3009 3314 : else if (WalReceiverPID != 0 && take_action)
3010 0 : sigquit_child(WalReceiverPID);
3011 :
3012 : /* Take care of the walsummarizer too */
3013 3314 : if (pid == WalSummarizerPID)
3014 0 : WalSummarizerPID = 0;
3015 3314 : else if (WalSummarizerPID != 0 && take_action)
3016 0 : sigquit_child(WalSummarizerPID);
3017 :
3018 : /* Take care of the autovacuum launcher too */
3019 3314 : if (pid == AutoVacPID)
3020 0 : AutoVacPID = 0;
3021 3314 : else if (AutoVacPID != 0 && take_action)
3022 8 : sigquit_child(AutoVacPID);
3023 :
3024 : /* Take care of the archiver too */
3025 3314 : if (pid == PgArchPID)
3026 0 : PgArchPID = 0;
3027 3314 : else if (PgArchPID != 0 && take_action)
3028 0 : sigquit_child(PgArchPID);
3029 :
3030 : /* Take care of the slot sync worker too */
3031 3314 : if (pid == SlotSyncWorkerPID)
3032 0 : SlotSyncWorkerPID = 0;
3033 3314 : else if (SlotSyncWorkerPID != 0 && take_action)
3034 0 : sigquit_child(SlotSyncWorkerPID);
3035 :
3036 : /* We do NOT restart the syslogger */
3037 :
3038 3314 : if (Shutdown != ImmediateShutdown)
3039 74 : FatalError = true;
3040 :
3041 : /* We now transit into a state of waiting for children to die */
3042 3314 : if (pmState == PM_RECOVERY ||
3043 3310 : pmState == PM_HOT_STANDBY ||
3044 3308 : pmState == PM_RUN ||
3045 3300 : pmState == PM_STOP_BACKENDS ||
3046 3300 : pmState == PM_SHUTDOWN)
3047 14 : pmState = PM_WAIT_BACKENDS;
3048 :
3049 : /*
3050 : * .. and if this doesn't happen quickly enough, now the clock is ticking
3051 : * for us to kill them without mercy.
3052 : */
3053 3314 : if (AbortStartTime == 0)
3054 14 : AbortStartTime = time(NULL);
3055 3314 : }
3056 :
3057 : /*
3058 : * Log the death of a child process.
3059 : */
3060 : static void
3061 25484 : LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3062 : {
3063 : /*
3064 : * size of activity_buffer is arbitrary, but set equal to default
3065 : * track_activity_query_size
3066 : */
3067 : char activity_buffer[1024];
3068 25484 : const char *activity = NULL;
3069 :
3070 25484 : if (!EXIT_STATUS_0(exitstatus))
3071 1844 : activity = pgstat_get_crashed_backend_activity(pid,
3072 : activity_buffer,
3073 : sizeof(activity_buffer));
3074 :
3075 25484 : if (WIFEXITED(exitstatus))
3076 25472 : ereport(lev,
3077 :
3078 : /*------
3079 : translator: %s is a noun phrase describing a child process, such as
3080 : "server process" */
3081 : (errmsg("%s (PID %d) exited with exit code %d",
3082 : procname, pid, WEXITSTATUS(exitstatus)),
3083 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3084 12 : else if (WIFSIGNALED(exitstatus))
3085 : {
3086 : #if defined(WIN32)
3087 : ereport(lev,
3088 :
3089 : /*------
3090 : translator: %s is a noun phrase describing a child process, such as
3091 : "server process" */
3092 : (errmsg("%s (PID %d) was terminated by exception 0x%X",
3093 : procname, pid, WTERMSIG(exitstatus)),
3094 : errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3095 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3096 : #else
3097 12 : ereport(lev,
3098 :
3099 : /*------
3100 : translator: %s is a noun phrase describing a child process, such as
3101 : "server process" */
3102 : (errmsg("%s (PID %d) was terminated by signal %d: %s",
3103 : procname, pid, WTERMSIG(exitstatus),
3104 : pg_strsignal(WTERMSIG(exitstatus))),
3105 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3106 : #endif
3107 : }
3108 : else
3109 0 : ereport(lev,
3110 :
3111 : /*------
3112 : translator: %s is a noun phrase describing a child process, such as
3113 : "server process" */
3114 : (errmsg("%s (PID %d) exited with unrecognized status %d",
3115 : procname, pid, exitstatus),
3116 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3117 25484 : }
3118 :
3119 : /*
3120 : * Advance the postmaster's state machine and take actions as appropriate
3121 : *
3122 : * This is common code for process_pm_shutdown_request(),
3123 : * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3124 : * that might mean we need to change state.
3125 : */
3126 : static void
3127 34302 : PostmasterStateMachine(void)
3128 : {
3129 : /* If we're doing a smart shutdown, try to advance that state. */
3130 34302 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3131 : {
3132 26410 : if (!connsAllowed)
3133 : {
3134 : /*
3135 : * This state ends when we have no normal client backends running.
3136 : * Then we're ready to stop other children.
3137 : */
3138 68 : if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3139 34 : pmState = PM_STOP_BACKENDS;
3140 : }
3141 : }
3142 :
3143 : /*
3144 : * If we're ready to do so, signal child processes to shut down. (This
3145 : * isn't a persistent state, but treating it as a distinct pmState allows
3146 : * us to share this code across multiple shutdown code paths.)
3147 : */
3148 34302 : if (pmState == PM_STOP_BACKENDS)
3149 : {
3150 : /*
3151 : * Forget any pending requests for background workers, since we're no
3152 : * longer willing to launch any new workers. (If additional requests
3153 : * arrive, BackgroundWorkerStateChange will reject them.)
3154 : */
3155 822 : ForgetUnstartedBackgroundWorkers();
3156 :
3157 : /* Signal all backend children except walsenders */
3158 822 : SignalSomeChildren(SIGTERM,
3159 : BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
3160 : /* and the autovac launcher too */
3161 822 : if (AutoVacPID != 0)
3162 646 : signal_child(AutoVacPID, SIGTERM);
3163 : /* and the bgwriter too */
3164 822 : if (BgWriterPID != 0)
3165 822 : signal_child(BgWriterPID, SIGTERM);
3166 : /* and the walwriter too */
3167 822 : if (WalWriterPID != 0)
3168 728 : signal_child(WalWriterPID, SIGTERM);
3169 : /* If we're in recovery, also stop startup and walreceiver procs */
3170 822 : if (StartupPID != 0)
3171 94 : signal_child(StartupPID, SIGTERM);
3172 822 : if (WalReceiverPID != 0)
3173 80 : signal_child(WalReceiverPID, SIGTERM);
3174 822 : if (WalSummarizerPID != 0)
3175 0 : signal_child(WalSummarizerPID, SIGTERM);
3176 822 : if (SlotSyncWorkerPID != 0)
3177 4 : signal_child(SlotSyncWorkerPID, SIGTERM);
3178 : /* checkpointer, archiver, stats, and syslogger may continue for now */
3179 :
3180 : /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3181 822 : pmState = PM_WAIT_BACKENDS;
3182 : }
3183 :
3184 : /*
3185 : * If we are in a state-machine state that implies waiting for backends to
3186 : * exit, see if they're all gone, and change state if so.
3187 : */
3188 34302 : if (pmState == PM_WAIT_BACKENDS)
3189 : {
3190 : /*
3191 : * PM_WAIT_BACKENDS state ends when we have no regular backends
3192 : * (including autovac workers), no bgworkers (including unconnected
3193 : * ones), and no walwriter, autovac launcher, bgwriter or slot sync
3194 : * worker. If we are doing crash recovery or an immediate shutdown
3195 : * then we expect the checkpointer to exit as well, otherwise not. The
3196 : * stats and syslogger processes are disregarded since they are not
3197 : * connected to shared memory; we also disregard dead_end children
3198 : * here. Walsenders and archiver are also disregarded, they will be
3199 : * terminated later after writing the checkpoint record.
3200 : */
3201 6942 : if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
3202 3238 : StartupPID == 0 &&
3203 2970 : WalReceiverPID == 0 &&
3204 2870 : WalSummarizerPID == 0 &&
3205 2840 : BgWriterPID == 0 &&
3206 2126 : (CheckpointerPID == 0 ||
3207 1340 : (!FatalError && Shutdown < ImmediateShutdown)) &&
3208 2034 : WalWriterPID == 0 &&
3209 1654 : AutoVacPID == 0 &&
3210 1426 : SlotSyncWorkerPID == 0)
3211 : {
3212 1424 : if (Shutdown >= ImmediateShutdown || FatalError)
3213 : {
3214 : /*
3215 : * Start waiting for dead_end children to die. This state
3216 : * change causes ServerLoop to stop creating new ones.
3217 : */
3218 602 : pmState = PM_WAIT_DEAD_END;
3219 :
3220 : /*
3221 : * We already SIGQUIT'd the archiver and stats processes, if
3222 : * any, when we started immediate shutdown or entered
3223 : * FatalError state.
3224 : */
3225 : }
3226 : else
3227 : {
3228 : /*
3229 : * If we get here, we are proceeding with normal shutdown. All
3230 : * the regular children are gone, and it's time to tell the
3231 : * checkpointer to do a shutdown checkpoint.
3232 : */
3233 : Assert(Shutdown > NoShutdown);
3234 : /* Start the checkpointer if not running */
3235 822 : if (CheckpointerPID == 0)
3236 0 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
3237 : /* And tell it to shut down */
3238 822 : if (CheckpointerPID != 0)
3239 : {
3240 822 : signal_child(CheckpointerPID, SIGUSR2);
3241 822 : pmState = PM_SHUTDOWN;
3242 : }
3243 : else
3244 : {
3245 : /*
3246 : * If we failed to fork a checkpointer, just shut down.
3247 : * Any required cleanup will happen at next restart. We
3248 : * set FatalError so that an "abnormal shutdown" message
3249 : * gets logged when we exit.
3250 : *
3251 : * We don't consult send_abort_for_crash here, as it's
3252 : * unlikely that dumping cores would illuminate the reason
3253 : * for checkpointer fork failure.
3254 : */
3255 0 : FatalError = true;
3256 0 : pmState = PM_WAIT_DEAD_END;
3257 :
3258 : /* Kill the walsenders and archiver too */
3259 0 : SignalChildren(SIGQUIT);
3260 0 : if (PgArchPID != 0)
3261 0 : signal_child(PgArchPID, SIGQUIT);
3262 : }
3263 : }
3264 : }
3265 : }
3266 :
3267 34302 : if (pmState == PM_SHUTDOWN_2)
3268 : {
3269 : /*
3270 : * PM_SHUTDOWN_2 state ends when there's no other children than
3271 : * dead_end children left. There shouldn't be any regular backends
3272 : * left by now anyway; what we're really waiting for is walsenders and
3273 : * archiver.
3274 : */
3275 890 : if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3276 : {
3277 822 : pmState = PM_WAIT_DEAD_END;
3278 : }
3279 : }
3280 :
3281 34302 : if (pmState == PM_WAIT_DEAD_END)
3282 : {
3283 : /* Don't allow any new socket connection events. */
3284 1456 : ConfigurePostmasterWaitSet(false);
3285 :
3286 : /*
3287 : * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3288 : * (ie, no dead_end children remain), and the archiver is gone too.
3289 : *
3290 : * The reason we wait for those two is to protect them against a new
3291 : * postmaster starting conflicting subprocesses; this isn't an
3292 : * ironclad protection, but it at least helps in the
3293 : * shutdown-and-immediately-restart scenario. Note that they have
3294 : * already been sent appropriate shutdown signals, either during a
3295 : * normal state transition leading up to PM_WAIT_DEAD_END, or during
3296 : * FatalError processing.
3297 : */
3298 1456 : if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3299 : {
3300 : /* These other guys should be dead already */
3301 : Assert(StartupPID == 0);
3302 : Assert(WalReceiverPID == 0);
3303 : Assert(WalSummarizerPID == 0);
3304 : Assert(BgWriterPID == 0);
3305 : Assert(CheckpointerPID == 0);
3306 : Assert(WalWriterPID == 0);
3307 : Assert(AutoVacPID == 0);
3308 : Assert(SlotSyncWorkerPID == 0);
3309 : /* syslogger is not considered here */
3310 1424 : pmState = PM_NO_CHILDREN;
3311 : }
3312 : }
3313 :
3314 : /*
3315 : * If we've been told to shut down, we exit as soon as there are no
3316 : * remaining children. If there was a crash, cleanup will occur at the
3317 : * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3318 : * crash before exiting, but that seems unwise if we are quitting because
3319 : * we got SIGTERM from init --- there may well not be time for recovery
3320 : * before init decides to SIGKILL us.)
3321 : *
3322 : * Note that the syslogger continues to run. It will exit when it sees
3323 : * EOF on its input pipe, which happens when there are no more upstream
3324 : * processes.
3325 : */
3326 34302 : if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3327 : {
3328 1410 : if (FatalError)
3329 : {
3330 0 : ereport(LOG, (errmsg("abnormal database system shutdown")));
3331 0 : ExitPostmaster(1);
3332 : }
3333 : else
3334 : {
3335 : /*
3336 : * Normal exit from the postmaster is here. We don't need to log
3337 : * anything here, since the UnlinkLockFiles proc_exit callback
3338 : * will do so, and that should be the last user-visible action.
3339 : */
3340 1410 : ExitPostmaster(0);
3341 : }
3342 : }
3343 :
3344 : /*
3345 : * If the startup process failed, or the user does not want an automatic
3346 : * restart after backend crashes, wait for all non-syslogger children to
3347 : * exit, and then exit postmaster. We don't try to reinitialize when the
3348 : * startup process fails, because more than likely it will just fail again
3349 : * and we will keep trying forever.
3350 : */
3351 32892 : if (pmState == PM_NO_CHILDREN)
3352 : {
3353 14 : if (StartupStatus == STARTUP_CRASHED)
3354 : {
3355 6 : ereport(LOG,
3356 : (errmsg("shutting down due to startup process failure")));
3357 6 : ExitPostmaster(1);
3358 : }
3359 8 : if (!restart_after_crash)
3360 : {
3361 0 : ereport(LOG,
3362 : (errmsg("shutting down because restart_after_crash is off")));
3363 0 : ExitPostmaster(1);
3364 : }
3365 : }
3366 :
3367 : /*
3368 : * If we need to recover from a crash, wait for all non-syslogger children
3369 : * to exit, then reset shmem and start the startup process.
3370 : */
3371 32886 : if (FatalError && pmState == PM_NO_CHILDREN)
3372 : {
3373 8 : ereport(LOG,
3374 : (errmsg("all server processes terminated; reinitializing")));
3375 :
3376 : /* remove leftover temporary files after a crash */
3377 8 : if (remove_temp_files_after_crash)
3378 6 : RemovePgTempFiles();
3379 :
3380 : /* allow background workers to immediately restart */
3381 8 : ResetBackgroundWorkerCrashTimes();
3382 :
3383 8 : shmem_exit(1);
3384 :
3385 : /* re-read control file into local memory */
3386 8 : LocalProcessControlFile(true);
3387 :
3388 : /* re-create shared memory and semaphores */
3389 8 : CreateSharedMemoryAndSemaphores();
3390 :
3391 8 : StartupPID = StartChildProcess(B_STARTUP);
3392 : Assert(StartupPID != 0);
3393 8 : StartupStatus = STARTUP_RUNNING;
3394 8 : pmState = PM_STARTUP;
3395 : /* crash recovery started, reset SIGKILL flag */
3396 8 : AbortStartTime = 0;
3397 :
3398 : /* start accepting server socket connection events again */
3399 8 : ConfigurePostmasterWaitSet(true);
3400 : }
3401 32886 : }
3402 :
3403 :
3404 : /*
3405 : * Send a signal to a postmaster child process
3406 : *
3407 : * On systems that have setsid(), each child process sets itself up as a
3408 : * process group leader. For signals that are generally interpreted in the
3409 : * appropriate fashion, we signal the entire process group not just the
3410 : * direct child process. This allows us to, for example, SIGQUIT a blocked
3411 : * archive_recovery script, or SIGINT a script being run by a backend via
3412 : * system().
3413 : *
3414 : * There is a race condition for recently-forked children: they might not
3415 : * have executed setsid() yet. So we signal the child directly as well as
3416 : * the group. We assume such a child will handle the signal before trying
3417 : * to spawn any grandchild processes. We also assume that signaling the
3418 : * child twice will not cause any problems.
3419 : */
3420 : static void
3421 9412 : signal_child(pid_t pid, int signal)
3422 : {
3423 9412 : if (kill(pid, signal) < 0)
3424 0 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3425 : #ifdef HAVE_SETSID
3426 9412 : switch (signal)
3427 : {
3428 6932 : case SIGINT:
3429 : case SIGTERM:
3430 : case SIGQUIT:
3431 : case SIGKILL:
3432 : case SIGABRT:
3433 6932 : if (kill(-pid, signal) < 0)
3434 26 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3435 6932 : break;
3436 2480 : default:
3437 2480 : break;
3438 : }
3439 : #endif
3440 9412 : }
3441 :
3442 : /*
3443 : * Convenience function for killing a child process after a crash of some
3444 : * other child process. We log the action at a higher level than we would
3445 : * otherwise do, and we apply send_abort_for_crash to decide which signal
3446 : * to send. Normally it's SIGQUIT -- and most other comments in this file
3447 : * are written on the assumption that it is -- but developers might prefer
3448 : * to use SIGABRT to collect per-child core dumps.
3449 : */
3450 : static void
3451 60 : sigquit_child(pid_t pid)
3452 : {
3453 60 : ereport(DEBUG2,
3454 : (errmsg_internal("sending %s to process %d",
3455 : (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
3456 : (int) pid)));
3457 60 : signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT));
3458 60 : }
3459 :
3460 : /*
3461 : * Send a signal to the targeted children (but NOT special children;
3462 : * dead_end children are never signaled, either).
3463 : */
3464 : static bool
3465 2478 : SignalSomeChildren(int signal, int target)
3466 : {
3467 : dlist_iter iter;
3468 2478 : bool signaled = false;
3469 :
3470 5286 : dlist_foreach(iter, &BackendList)
3471 : {
3472 2808 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3473 :
3474 2808 : if (bp->dead_end)
3475 2 : continue;
3476 :
3477 : /*
3478 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
3479 : * it first and avoid touching shared memory for every child.
3480 : */
3481 2806 : if (target != BACKEND_TYPE_ALL)
3482 : {
3483 : /*
3484 : * Assign bkend_type for any recently announced WAL Sender
3485 : * processes.
3486 : */
3487 1824 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3488 534 : IsPostmasterChildWalSender(bp->child_slot))
3489 58 : bp->bkend_type = BACKEND_TYPE_WALSND;
3490 :
3491 1290 : if (!(target & bp->bkend_type))
3492 58 : continue;
3493 : }
3494 :
3495 2748 : ereport(DEBUG4,
3496 : (errmsg_internal("sending signal %d to process %d",
3497 : signal, (int) bp->pid)));
3498 2748 : signal_child(bp->pid, signal);
3499 2748 : signaled = true;
3500 : }
3501 2478 : return signaled;
3502 : }
3503 :
3504 : /*
3505 : * Send a termination signal to children. This considers all of our children
3506 : * processes, except syslogger and dead_end backends.
3507 : */
3508 : static void
3509 588 : TerminateChildren(int signal)
3510 : {
3511 588 : SignalChildren(signal);
3512 588 : if (StartupPID != 0)
3513 : {
3514 88 : signal_child(StartupPID, signal);
3515 88 : if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
3516 88 : StartupStatus = STARTUP_SIGNALED;
3517 : }
3518 588 : if (BgWriterPID != 0)
3519 588 : signal_child(BgWriterPID, signal);
3520 588 : if (CheckpointerPID != 0)
3521 588 : signal_child(CheckpointerPID, signal);
3522 588 : if (WalWriterPID != 0)
3523 500 : signal_child(WalWriterPID, signal);
3524 588 : if (WalReceiverPID != 0)
3525 36 : signal_child(WalReceiverPID, signal);
3526 588 : if (WalSummarizerPID != 0)
3527 24 : signal_child(WalSummarizerPID, signal);
3528 588 : if (AutoVacPID != 0)
3529 426 : signal_child(AutoVacPID, signal);
3530 588 : if (PgArchPID != 0)
3531 66 : signal_child(PgArchPID, signal);
3532 588 : if (SlotSyncWorkerPID != 0)
3533 0 : signal_child(SlotSyncWorkerPID, signal);
3534 588 : }
3535 :
3536 : /*
3537 : * BackendStartup -- start backend process
3538 : *
3539 : * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3540 : *
3541 : * Note: if you change this code, also consider StartAutovacuumWorker.
3542 : */
3543 : static int
3544 21372 : BackendStartup(ClientSocket *client_sock)
3545 : {
3546 : Backend *bn; /* for backend cleanup */
3547 : pid_t pid;
3548 : BackendStartupData startup_data;
3549 :
3550 : /*
3551 : * Create backend data structure. Better before the fork() so we can
3552 : * handle failure cleanly.
3553 : */
3554 21372 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
3555 21372 : if (!bn)
3556 : {
3557 0 : ereport(LOG,
3558 : (errcode(ERRCODE_OUT_OF_MEMORY),
3559 : errmsg("out of memory")));
3560 0 : return STATUS_ERROR;
3561 : }
3562 :
3563 : /*
3564 : * Compute the cancel key that will be assigned to this backend. The
3565 : * backend will have its own copy in the forked-off process' value of
3566 : * MyCancelKey, so that it can transmit the key to the frontend.
3567 : */
3568 21372 : if (!RandomCancelKey(&MyCancelKey))
3569 : {
3570 0 : pfree(bn);
3571 0 : ereport(LOG,
3572 : (errcode(ERRCODE_INTERNAL_ERROR),
3573 : errmsg("could not generate random cancel key")));
3574 0 : return STATUS_ERROR;
3575 : }
3576 :
3577 : /* Pass down canAcceptConnections state */
3578 21372 : startup_data.canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
3579 21372 : bn->dead_end = (startup_data.canAcceptConnections != CAC_OK);
3580 21372 : bn->cancel_key = MyCancelKey;
3581 :
3582 : /*
3583 : * Unless it's a dead_end child, assign it a child slot number
3584 : */
3585 21372 : if (!bn->dead_end)
3586 21174 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
3587 : else
3588 198 : bn->child_slot = 0;
3589 :
3590 : /* Hasn't asked to be notified about any bgworkers yet */
3591 21372 : bn->bgworker_notify = false;
3592 :
3593 21372 : pid = postmaster_child_launch(B_BACKEND,
3594 : (char *) &startup_data, sizeof(startup_data),
3595 : client_sock);
3596 21370 : if (pid < 0)
3597 : {
3598 : /* in parent, fork failed */
3599 0 : int save_errno = errno;
3600 :
3601 0 : if (!bn->dead_end)
3602 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
3603 0 : pfree(bn);
3604 0 : errno = save_errno;
3605 0 : ereport(LOG,
3606 : (errmsg("could not fork new process for connection: %m")));
3607 0 : report_fork_failure_to_client(client_sock, save_errno);
3608 0 : return STATUS_ERROR;
3609 : }
3610 :
3611 : /* in parent, successful fork */
3612 21370 : ereport(DEBUG2,
3613 : (errmsg_internal("forked new backend, pid=%d socket=%d",
3614 : (int) pid, (int) client_sock->sock)));
3615 :
3616 : /*
3617 : * Everything's been successful, it's safe to add this backend to our list
3618 : * of backends.
3619 : */
3620 21370 : bn->pid = pid;
3621 21370 : bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
3622 21370 : dlist_push_head(&BackendList, &bn->elem);
3623 :
3624 : #ifdef EXEC_BACKEND
3625 : if (!bn->dead_end)
3626 : ShmemBackendArrayAdd(bn);
3627 : #endif
3628 :
3629 21370 : return STATUS_OK;
3630 : }
3631 :
3632 : /*
3633 : * Try to report backend fork() failure to client before we close the
3634 : * connection. Since we do not care to risk blocking the postmaster on
3635 : * this connection, we set the connection to non-blocking and try only once.
3636 : *
3637 : * This is grungy special-purpose code; we cannot use backend libpq since
3638 : * it's not up and running.
3639 : */
3640 : static void
3641 0 : report_fork_failure_to_client(ClientSocket *client_sock, int errnum)
3642 : {
3643 : char buffer[1000];
3644 : int rc;
3645 :
3646 : /* Format the error message packet (always V2 protocol) */
3647 0 : snprintf(buffer, sizeof(buffer), "E%s%s\n",
3648 : _("could not fork new process for connection: "),
3649 : strerror(errnum));
3650 :
3651 : /* Set port to non-blocking. Don't do send() if this fails */
3652 0 : if (!pg_set_noblock(client_sock->sock))
3653 0 : return;
3654 :
3655 : /* We'll retry after EINTR, but ignore all other failures */
3656 : do
3657 : {
3658 0 : rc = send(client_sock->sock, buffer, strlen(buffer) + 1, 0);
3659 0 : } while (rc < 0 && errno == EINTR);
3660 : }
3661 :
3662 : /*
3663 : * ExitPostmaster -- cleanup
3664 : *
3665 : * Do NOT call exit() directly --- always go through here!
3666 : */
3667 : static void
3668 1420 : ExitPostmaster(int status)
3669 : {
3670 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
3671 :
3672 : /*
3673 : * There is no known cause for a postmaster to become multithreaded after
3674 : * startup. Recheck to account for the possibility of unknown causes.
3675 : * This message uses LOG level, because an unclean shutdown at this point
3676 : * would usually not look much different from a clean shutdown.
3677 : */
3678 : if (pthread_is_threaded_np() != 0)
3679 : ereport(LOG,
3680 : (errcode(ERRCODE_INTERNAL_ERROR),
3681 : errmsg_internal("postmaster became multithreaded"),
3682 : errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
3683 : #endif
3684 :
3685 : /* should cleanup shared memory and kill all backends */
3686 :
3687 : /*
3688 : * Not sure of the semantics here. When the Postmaster dies, should the
3689 : * backends all be killed? probably not.
3690 : *
3691 : * MUST -- vadim 05-10-1999
3692 : */
3693 :
3694 1420 : proc_exit(status);
3695 : }
3696 :
3697 : /*
3698 : * Handle pmsignal conditions representing requests from backends,
3699 : * and check for promote and logrotate requests from pg_ctl.
3700 : */
3701 : static void
3702 5216 : process_pm_pmsignal(void)
3703 : {
3704 5216 : pending_pm_pmsignal = false;
3705 :
3706 5216 : ereport(DEBUG2,
3707 : (errmsg_internal("postmaster received pmsignal signal")));
3708 :
3709 : /*
3710 : * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
3711 : * unexpected states. If the startup process quickly starts up, completes
3712 : * recovery, exits, we might process the death of the startup process
3713 : * first. We don't want to go back to recovery in that case.
3714 : */
3715 5216 : if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
3716 462 : pmState == PM_STARTUP && Shutdown == NoShutdown)
3717 : {
3718 : /* WAL redo has started. We're out of reinitialization. */
3719 462 : FatalError = false;
3720 462 : AbortStartTime = 0;
3721 :
3722 : /*
3723 : * Start the archiver if we're responsible for (re-)archiving received
3724 : * files.
3725 : */
3726 : Assert(PgArchPID == 0);
3727 462 : if (XLogArchivingAlways())
3728 6 : PgArchPID = StartChildProcess(B_ARCHIVER);
3729 :
3730 : /*
3731 : * If we aren't planning to enter hot standby mode later, treat
3732 : * RECOVERY_STARTED as meaning we're out of startup, and report status
3733 : * accordingly.
3734 : */
3735 462 : if (!EnableHotStandby)
3736 : {
3737 4 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
3738 : #ifdef USE_SYSTEMD
3739 : sd_notify(0, "READY=1");
3740 : #endif
3741 : }
3742 :
3743 462 : pmState = PM_RECOVERY;
3744 : }
3745 :
3746 5216 : if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
3747 274 : pmState == PM_RECOVERY && Shutdown == NoShutdown)
3748 : {
3749 274 : ereport(LOG,
3750 : (errmsg("database system is ready to accept read-only connections")));
3751 :
3752 : /* Report status */
3753 274 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
3754 : #ifdef USE_SYSTEMD
3755 : sd_notify(0, "READY=1");
3756 : #endif
3757 :
3758 274 : pmState = PM_HOT_STANDBY;
3759 274 : connsAllowed = true;
3760 :
3761 : /* Some workers may be scheduled to start now */
3762 274 : StartWorkerNeeded = true;
3763 : }
3764 :
3765 : /* Process background worker state changes. */
3766 5216 : if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
3767 : {
3768 : /* Accept new worker requests only if not stopping. */
3769 1998 : BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS);
3770 1998 : StartWorkerNeeded = true;
3771 : }
3772 :
3773 5216 : if (StartWorkerNeeded || HaveCrashedWorker)
3774 2274 : maybe_start_bgworkers();
3775 :
3776 : /* Tell syslogger to rotate logfile if requested */
3777 5216 : if (SysLoggerPID != 0)
3778 : {
3779 2 : if (CheckLogrotateSignal())
3780 : {
3781 2 : signal_child(SysLoggerPID, SIGUSR1);
3782 2 : RemoveLogrotateSignalFiles();
3783 : }
3784 0 : else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
3785 : {
3786 0 : signal_child(SysLoggerPID, SIGUSR1);
3787 : }
3788 : }
3789 :
3790 5216 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
3791 0 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
3792 : {
3793 : /*
3794 : * Start one iteration of the autovacuum daemon, even if autovacuuming
3795 : * is nominally not enabled. This is so we can have an active defense
3796 : * against transaction ID wraparound. We set a flag for the main loop
3797 : * to do it rather than trying to do it here --- this is because the
3798 : * autovac process itself may send the signal, and we want to handle
3799 : * that by launching another iteration as soon as the current one
3800 : * completes.
3801 : */
3802 0 : start_autovac_launcher = true;
3803 : }
3804 :
3805 5216 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
3806 22 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
3807 : {
3808 : /* The autovacuum launcher wants us to start a worker process. */
3809 22 : StartAutovacuumWorker();
3810 : }
3811 :
3812 5216 : if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
3813 : {
3814 : /* Startup Process wants us to start the walreceiver process. */
3815 : /* Start immediately if possible, else remember request for later. */
3816 418 : WalReceiverRequested = true;
3817 418 : MaybeStartWalReceiver();
3818 : }
3819 :
3820 : /*
3821 : * Try to advance postmaster's state machine, if a child requests it.
3822 : *
3823 : * Be careful about the order of this action relative to this function's
3824 : * other actions. Generally, this should be after other actions, in case
3825 : * they have effects PostmasterStateMachine would need to know about.
3826 : * However, we should do it before the CheckPromoteSignal step, which
3827 : * cannot have any (immediate) effect on the state machine, but does
3828 : * depend on what state we're in now.
3829 : */
3830 5216 : if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
3831 : {
3832 2040 : PostmasterStateMachine();
3833 : }
3834 :
3835 5216 : if (StartupPID != 0 &&
3836 1282 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
3837 2148 : pmState == PM_HOT_STANDBY) &&
3838 1282 : CheckPromoteSignal())
3839 : {
3840 : /*
3841 : * Tell startup process to finish recovery.
3842 : *
3843 : * Leave the promote signal file in place and let the Startup process
3844 : * do the unlink.
3845 : */
3846 76 : signal_child(StartupPID, SIGUSR2);
3847 : }
3848 5216 : }
3849 :
3850 : /*
3851 : * Dummy signal handler
3852 : *
3853 : * We use this for signals that we don't actually use in the postmaster,
3854 : * but we do use in backends. If we were to SIG_IGN such signals in the
3855 : * postmaster, then a newly started backend might drop a signal that arrives
3856 : * before it's able to reconfigure its signal processing. (See notes in
3857 : * tcop/postgres.c.)
3858 : */
3859 : static void
3860 0 : dummy_handler(SIGNAL_ARGS)
3861 : {
3862 0 : }
3863 :
3864 : /*
3865 : * Generate a random cancel key.
3866 : */
3867 : static bool
3868 26032 : RandomCancelKey(int32 *cancel_key)
3869 : {
3870 26032 : return pg_strong_random(cancel_key, sizeof(int32));
3871 : }
3872 :
3873 : /*
3874 : * Count up number of child processes of specified types (dead_end children
3875 : * are always excluded).
3876 : */
3877 : static int
3878 33706 : CountChildren(int target)
3879 : {
3880 : dlist_iter iter;
3881 33706 : int cnt = 0;
3882 :
3883 109460 : dlist_foreach(iter, &BackendList)
3884 : {
3885 75754 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3886 :
3887 75754 : if (bp->dead_end)
3888 182 : continue;
3889 :
3890 : /*
3891 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
3892 : * it first and avoid touching shared memory for every child.
3893 : */
3894 75572 : if (target != BACKEND_TYPE_ALL)
3895 : {
3896 : /*
3897 : * Assign bkend_type for any recently announced WAL Sender
3898 : * processes.
3899 : */
3900 7650 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3901 1940 : IsPostmasterChildWalSender(bp->child_slot))
3902 98 : bp->bkend_type = BACKEND_TYPE_WALSND;
3903 :
3904 5710 : if (!(target & bp->bkend_type))
3905 570 : continue;
3906 : }
3907 :
3908 75002 : cnt++;
3909 : }
3910 33706 : return cnt;
3911 : }
3912 :
3913 :
3914 : /*
3915 : * StartChildProcess -- start an auxiliary process for the postmaster
3916 : *
3917 : * "type" determines what kind of child will be started. All child types
3918 : * initially go to AuxiliaryProcessMain, which will handle common setup.
3919 : *
3920 : * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3921 : * to start subprocess.
3922 : */
3923 : static pid_t
3924 7178 : StartChildProcess(BackendType type)
3925 : {
3926 : pid_t pid;
3927 :
3928 7178 : pid = postmaster_child_launch(type, NULL, 0, NULL);
3929 7178 : if (pid < 0)
3930 : {
3931 : /* in parent, fork failed */
3932 0 : ereport(LOG,
3933 : (errmsg("could not fork \"%s\" process: %m", PostmasterChildName(type))));
3934 :
3935 : /*
3936 : * fork failure is fatal during startup, but there's no need to choke
3937 : * immediately if starting other child types fails.
3938 : */
3939 0 : if (type == B_STARTUP)
3940 0 : ExitPostmaster(1);
3941 0 : return 0;
3942 : }
3943 :
3944 : /*
3945 : * in parent, successful fork
3946 : */
3947 7178 : return pid;
3948 : }
3949 :
3950 : /*
3951 : * StartAutovacuumWorker
3952 : * Start an autovac worker process.
3953 : *
3954 : * This function is here because it enters the resulting PID into the
3955 : * postmaster's private backends list.
3956 : *
3957 : * NB -- this code very roughly matches BackendStartup.
3958 : */
3959 : static void
3960 22 : StartAutovacuumWorker(void)
3961 : {
3962 : Backend *bn;
3963 :
3964 : /*
3965 : * If not in condition to run a process, don't try, but handle it like a
3966 : * fork failure. This does not normally happen, since the signal is only
3967 : * supposed to be sent by autovacuum launcher when it's OK to do it, but
3968 : * we have to check to avoid race-condition problems during DB state
3969 : * changes.
3970 : */
3971 22 : if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK)
3972 : {
3973 : /*
3974 : * Compute the cancel key that will be assigned to this session. We
3975 : * probably don't need cancel keys for autovac workers, but we'd
3976 : * better have something random in the field to prevent unfriendly
3977 : * people from sending cancels to them.
3978 : */
3979 22 : if (!RandomCancelKey(&MyCancelKey))
3980 : {
3981 0 : ereport(LOG,
3982 : (errcode(ERRCODE_INTERNAL_ERROR),
3983 : errmsg("could not generate random cancel key")));
3984 0 : return;
3985 : }
3986 :
3987 22 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
3988 22 : if (bn)
3989 : {
3990 22 : bn->cancel_key = MyCancelKey;
3991 :
3992 : /* Autovac workers are not dead_end and need a child slot */
3993 22 : bn->dead_end = false;
3994 22 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
3995 22 : bn->bgworker_notify = false;
3996 :
3997 22 : bn->pid = StartChildProcess(B_AUTOVAC_WORKER);
3998 22 : if (bn->pid > 0)
3999 : {
4000 22 : bn->bkend_type = BACKEND_TYPE_AUTOVAC;
4001 22 : dlist_push_head(&BackendList, &bn->elem);
4002 : #ifdef EXEC_BACKEND
4003 : ShmemBackendArrayAdd(bn);
4004 : #endif
4005 : /* all OK */
4006 22 : return;
4007 : }
4008 :
4009 : /*
4010 : * fork failed, fall through to report -- actual error message was
4011 : * logged by StartChildProcess
4012 : */
4013 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
4014 0 : pfree(bn);
4015 : }
4016 : else
4017 0 : ereport(LOG,
4018 : (errcode(ERRCODE_OUT_OF_MEMORY),
4019 : errmsg("out of memory")));
4020 : }
4021 :
4022 : /*
4023 : * Report the failure to the launcher, if it's running. (If it's not, we
4024 : * might not even be connected to shared memory, so don't try to call
4025 : * AutoVacWorkerFailed.) Note that we also need to signal it so that it
4026 : * responds to the condition, but we don't do that here, instead waiting
4027 : * for ServerLoop to do it. This way we avoid a ping-pong signaling in
4028 : * quick succession between the autovac launcher and postmaster in case
4029 : * things get ugly.
4030 : */
4031 0 : if (AutoVacPID != 0)
4032 : {
4033 0 : AutoVacWorkerFailed();
4034 0 : avlauncher_needs_signal = true;
4035 : }
4036 : }
4037 :
4038 : /*
4039 : * MaybeStartWalReceiver
4040 : * Start the WAL receiver process, if not running and our state allows.
4041 : *
4042 : * Note: if WalReceiverPID is already nonzero, it might seem that we should
4043 : * clear WalReceiverRequested. However, there's a race condition if the
4044 : * walreceiver terminates and the startup process immediately requests a new
4045 : * one: it's quite possible to get the signal for the request before reaping
4046 : * the dead walreceiver process. Better to risk launching an extra
4047 : * walreceiver than to miss launching one we need. (The walreceiver code
4048 : * has logic to recognize that it should go away if not needed.)
4049 : */
4050 : static void
4051 730 : MaybeStartWalReceiver(void)
4052 : {
4053 730 : if (WalReceiverPID == 0 &&
4054 416 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
4055 414 : pmState == PM_HOT_STANDBY) &&
4056 416 : Shutdown <= SmartShutdown)
4057 : {
4058 416 : WalReceiverPID = StartChildProcess(B_WAL_RECEIVER);
4059 416 : if (WalReceiverPID != 0)
4060 416 : WalReceiverRequested = false;
4061 : /* else leave the flag set, so we'll try again later */
4062 : }
4063 730 : }
4064 :
4065 : /*
4066 : * MaybeStartWalSummarizer
4067 : * Start the WAL summarizer process, if not running and our state allows.
4068 : */
4069 : static void
4070 58744 : MaybeStartWalSummarizer(void)
4071 : {
4072 58744 : if (summarize_wal && WalSummarizerPID == 0 &&
4073 50 : (pmState == PM_RUN || pmState == PM_HOT_STANDBY) &&
4074 24 : Shutdown <= SmartShutdown)
4075 24 : WalSummarizerPID = StartChildProcess(B_WAL_SUMMARIZER);
4076 58744 : }
4077 :
4078 :
4079 : /*
4080 : * MaybeStartSlotSyncWorker
4081 : * Start the slot sync worker, if not running and our state allows.
4082 : *
4083 : * We allow to start the slot sync worker when we are on a hot standby,
4084 : * fast or immediate shutdown is not in progress, slot sync parameters
4085 : * are configured correctly, and it is the first time of worker's launch,
4086 : * or enough time has passed since the worker was launched last.
4087 : */
4088 : static void
4089 58744 : MaybeStartSlotSyncWorker(void)
4090 : {
4091 58744 : if (SlotSyncWorkerPID == 0 && pmState == PM_HOT_STANDBY &&
4092 3140 : Shutdown <= SmartShutdown && sync_replication_slots &&
4093 22 : ValidateSlotSyncParams(LOG) && SlotSyncWorkerCanRestart())
4094 8 : SlotSyncWorkerPID = StartChildProcess(B_SLOTSYNC_WORKER);
4095 58744 : }
4096 :
4097 : /*
4098 : * Create the opts file
4099 : */
4100 : static bool
4101 1422 : CreateOptsFile(int argc, char *argv[], char *fullprogname)
4102 : {
4103 : FILE *fp;
4104 : int i;
4105 :
4106 : #define OPTS_FILE "postmaster.opts"
4107 :
4108 1422 : if ((fp = fopen(OPTS_FILE, "w")) == NULL)
4109 : {
4110 0 : ereport(LOG,
4111 : (errcode_for_file_access(),
4112 : errmsg("could not create file \"%s\": %m", OPTS_FILE)));
4113 0 : return false;
4114 : }
4115 :
4116 1422 : fprintf(fp, "%s", fullprogname);
4117 7072 : for (i = 1; i < argc; i++)
4118 5650 : fprintf(fp, " \"%s\"", argv[i]);
4119 1422 : fputs("\n", fp);
4120 :
4121 1422 : if (fclose(fp))
4122 : {
4123 0 : ereport(LOG,
4124 : (errcode_for_file_access(),
4125 : errmsg("could not write file \"%s\": %m", OPTS_FILE)));
4126 0 : return false;
4127 : }
4128 :
4129 1422 : return true;
4130 : }
4131 :
4132 :
4133 : /*
4134 : * MaxLivePostmasterChildren
4135 : *
4136 : * This reports the number of entries needed in per-child-process arrays
4137 : * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
4138 : * These arrays include regular backends, autovac workers, walsenders
4139 : * and background workers, but not special children nor dead_end children.
4140 : * This allows the arrays to have a fixed maximum size, to wit the same
4141 : * too-many-children limit enforced by canAcceptConnections(). The exact value
4142 : * isn't too critical as long as it's more than MaxBackends.
4143 : */
4144 : int
4145 34436 : MaxLivePostmasterChildren(void)
4146 : {
4147 68872 : return 2 * (MaxConnections + autovacuum_max_workers + 1 +
4148 34436 : max_wal_senders + max_worker_processes);
4149 : }
4150 :
4151 : /*
4152 : * Connect background worker to a database.
4153 : */
4154 : void
4155 686 : BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
4156 : {
4157 686 : BackgroundWorker *worker = MyBgworkerEntry;
4158 686 : bits32 init_flags = 0; /* never honor session_preload_libraries */
4159 :
4160 : /* ignore datallowconn? */
4161 686 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
4162 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
4163 : /* ignore rolcanlogin? */
4164 686 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
4165 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
4166 :
4167 : /* XXX is this the right errcode? */
4168 686 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
4169 0 : ereport(FATAL,
4170 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4171 : errmsg("database connection requirement not indicated during registration")));
4172 :
4173 686 : InitPostgres(dbname, InvalidOid, /* database to connect to */
4174 : username, InvalidOid, /* role to connect as */
4175 : init_flags,
4176 : NULL); /* no out_dbname */
4177 :
4178 : /* it had better not gotten out of "init" mode yet */
4179 686 : if (!IsInitProcessingMode())
4180 0 : ereport(ERROR,
4181 : (errmsg("invalid processing mode in background worker")));
4182 686 : SetProcessingMode(NormalProcessing);
4183 686 : }
4184 :
4185 : /*
4186 : * Connect background worker to a database using OIDs.
4187 : */
4188 : void
4189 3388 : BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
4190 : {
4191 3388 : BackgroundWorker *worker = MyBgworkerEntry;
4192 3388 : bits32 init_flags = 0; /* never honor session_preload_libraries */
4193 :
4194 : /* ignore datallowconn? */
4195 3388 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
4196 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
4197 : /* ignore rolcanlogin? */
4198 3388 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
4199 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
4200 :
4201 : /* XXX is this the right errcode? */
4202 3388 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
4203 0 : ereport(FATAL,
4204 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4205 : errmsg("database connection requirement not indicated during registration")));
4206 :
4207 3388 : InitPostgres(NULL, dboid, /* database to connect to */
4208 : NULL, useroid, /* role to connect as */
4209 : init_flags,
4210 : NULL); /* no out_dbname */
4211 :
4212 : /* it had better not gotten out of "init" mode yet */
4213 3376 : if (!IsInitProcessingMode())
4214 0 : ereport(ERROR,
4215 : (errmsg("invalid processing mode in background worker")));
4216 3376 : SetProcessingMode(NormalProcessing);
4217 3376 : }
4218 :
4219 : /*
4220 : * Block/unblock signals in a background worker
4221 : */
4222 : void
4223 0 : BackgroundWorkerBlockSignals(void)
4224 : {
4225 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4226 0 : }
4227 :
4228 : void
4229 4256 : BackgroundWorkerUnblockSignals(void)
4230 : {
4231 4256 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
4232 4256 : }
4233 :
4234 : /*
4235 : * Start a new bgworker.
4236 : * Starting time conditions must have been checked already.
4237 : *
4238 : * Returns true on success, false on failure.
4239 : * In either case, update the RegisteredBgWorker's state appropriately.
4240 : *
4241 : * This code is heavily based on autovacuum.c, q.v.
4242 : */
4243 : static bool
4244 4638 : do_start_bgworker(RegisteredBgWorker *rw)
4245 : {
4246 : pid_t worker_pid;
4247 :
4248 : Assert(rw->rw_pid == 0);
4249 :
4250 : /*
4251 : * Allocate and assign the Backend element. Note we must do this before
4252 : * forking, so that we can handle failures (out of memory or child-process
4253 : * slots) cleanly.
4254 : *
4255 : * Treat failure as though the worker had crashed. That way, the
4256 : * postmaster will wait a bit before attempting to start it again; if we
4257 : * tried again right away, most likely we'd find ourselves hitting the
4258 : * same resource-exhaustion condition.
4259 : */
4260 4638 : if (!assign_backendlist_entry(rw))
4261 : {
4262 0 : rw->rw_crashed_at = GetCurrentTimestamp();
4263 0 : return false;
4264 : }
4265 :
4266 4638 : ereport(DEBUG1,
4267 : (errmsg_internal("starting background worker process \"%s\"",
4268 : rw->rw_worker.bgw_name)));
4269 :
4270 4638 : worker_pid = postmaster_child_launch(B_BG_WORKER, (char *) &rw->rw_worker, sizeof(BackgroundWorker), NULL);
4271 4634 : if (worker_pid == -1)
4272 : {
4273 : /* in postmaster, fork failed ... */
4274 0 : ereport(LOG,
4275 : (errmsg("could not fork background worker process: %m")));
4276 : /* undo what assign_backendlist_entry did */
4277 0 : ReleasePostmasterChildSlot(rw->rw_child_slot);
4278 0 : rw->rw_child_slot = 0;
4279 0 : pfree(rw->rw_backend);
4280 0 : rw->rw_backend = NULL;
4281 : /* mark entry as crashed, so we'll try again later */
4282 0 : rw->rw_crashed_at = GetCurrentTimestamp();
4283 0 : return false;
4284 : }
4285 :
4286 : /* in postmaster, fork successful ... */
4287 4634 : rw->rw_pid = worker_pid;
4288 4634 : rw->rw_backend->pid = rw->rw_pid;
4289 4634 : ReportBackgroundWorkerPID(rw);
4290 : /* add new worker to lists of backends */
4291 4634 : dlist_push_head(&BackendList, &rw->rw_backend->elem);
4292 : #ifdef EXEC_BACKEND
4293 : ShmemBackendArrayAdd(rw->rw_backend);
4294 : #endif
4295 4634 : return true;
4296 : }
4297 :
4298 : /*
4299 : * Does the current postmaster state require starting a worker with the
4300 : * specified start_time?
4301 : */
4302 : static bool
4303 6296 : bgworker_should_start_now(BgWorkerStartTime start_time)
4304 : {
4305 6296 : switch (pmState)
4306 : {
4307 0 : case PM_NO_CHILDREN:
4308 : case PM_WAIT_DEAD_END:
4309 : case PM_SHUTDOWN_2:
4310 : case PM_SHUTDOWN:
4311 : case PM_WAIT_BACKENDS:
4312 : case PM_STOP_BACKENDS:
4313 0 : break;
4314 :
4315 4638 : case PM_RUN:
4316 4638 : if (start_time == BgWorkerStart_RecoveryFinished)
4317 1974 : return true;
4318 : /* fall through */
4319 :
4320 : case PM_HOT_STANDBY:
4321 2938 : if (start_time == BgWorkerStart_ConsistentState)
4322 2664 : return true;
4323 : /* fall through */
4324 :
4325 : case PM_RECOVERY:
4326 : case PM_STARTUP:
4327 : case PM_INIT:
4328 1658 : if (start_time == BgWorkerStart_PostmasterStart)
4329 0 : return true;
4330 : /* fall through */
4331 : }
4332 :
4333 1658 : return false;
4334 : }
4335 :
4336 : /*
4337 : * Allocate the Backend struct for a connected background worker, but don't
4338 : * add it to the list of backends just yet.
4339 : *
4340 : * On failure, return false without changing any worker state.
4341 : *
4342 : * Some info from the Backend is copied into the passed rw.
4343 : */
4344 : static bool
4345 4638 : assign_backendlist_entry(RegisteredBgWorker *rw)
4346 : {
4347 : Backend *bn;
4348 :
4349 : /*
4350 : * Check that database state allows another connection. Currently the
4351 : * only possible failure is CAC_TOOMANY, so we just log an error message
4352 : * based on that rather than checking the error code precisely.
4353 : */
4354 4638 : if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK)
4355 : {
4356 0 : ereport(LOG,
4357 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
4358 : errmsg("no slot available for new background worker process")));
4359 0 : return false;
4360 : }
4361 :
4362 : /*
4363 : * Compute the cancel key that will be assigned to this session. We
4364 : * probably don't need cancel keys for background workers, but we'd better
4365 : * have something random in the field to prevent unfriendly people from
4366 : * sending cancels to them.
4367 : */
4368 4638 : if (!RandomCancelKey(&MyCancelKey))
4369 : {
4370 0 : ereport(LOG,
4371 : (errcode(ERRCODE_INTERNAL_ERROR),
4372 : errmsg("could not generate random cancel key")));
4373 0 : return false;
4374 : }
4375 :
4376 4638 : bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
4377 4638 : if (bn == NULL)
4378 : {
4379 0 : ereport(LOG,
4380 : (errcode(ERRCODE_OUT_OF_MEMORY),
4381 : errmsg("out of memory")));
4382 0 : return false;
4383 : }
4384 :
4385 4638 : bn->cancel_key = MyCancelKey;
4386 4638 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4387 4638 : bn->bkend_type = BACKEND_TYPE_BGWORKER;
4388 4638 : bn->dead_end = false;
4389 4638 : bn->bgworker_notify = false;
4390 :
4391 4638 : rw->rw_backend = bn;
4392 4638 : rw->rw_child_slot = bn->child_slot;
4393 :
4394 4638 : return true;
4395 : }
4396 :
4397 : /*
4398 : * If the time is right, start background worker(s).
4399 : *
4400 : * As a side effect, the bgworker control variables are set or reset
4401 : * depending on whether more workers may need to be started.
4402 : *
4403 : * We limit the number of workers started per call, to avoid consuming the
4404 : * postmaster's attention for too long when many such requests are pending.
4405 : * As long as StartWorkerNeeded is true, ServerLoop will not block and will
4406 : * call this function again after dealing with any other issues.
4407 : */
4408 : static void
4409 10886 : maybe_start_bgworkers(void)
4410 : {
4411 : #define MAX_BGWORKERS_TO_LAUNCH 100
4412 10886 : int num_launched = 0;
4413 10886 : TimestampTz now = 0;
4414 : slist_mutable_iter iter;
4415 :
4416 : /*
4417 : * During crash recovery, we have no need to be called until the state
4418 : * transition out of recovery.
4419 : */
4420 10886 : if (FatalError)
4421 : {
4422 8 : StartWorkerNeeded = false;
4423 8 : HaveCrashedWorker = false;
4424 8 : return;
4425 : }
4426 :
4427 : /* Don't need to be called again unless we find a reason for it below */
4428 10878 : StartWorkerNeeded = false;
4429 10878 : HaveCrashedWorker = false;
4430 :
4431 29866 : slist_foreach_modify(iter, &BackgroundWorkerList)
4432 : {
4433 : RegisteredBgWorker *rw;
4434 :
4435 18992 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
4436 :
4437 : /* ignore if already running */
4438 18992 : if (rw->rw_pid != 0)
4439 9856 : continue;
4440 :
4441 : /* if marked for death, clean up and remove from list */
4442 9136 : if (rw->rw_terminate)
4443 : {
4444 0 : ForgetBackgroundWorker(&iter);
4445 0 : continue;
4446 : }
4447 :
4448 : /*
4449 : * If this worker has crashed previously, maybe it needs to be
4450 : * restarted (unless on registration it specified it doesn't want to
4451 : * be restarted at all). Check how long ago did a crash last happen.
4452 : * If the last crash is too recent, don't start it right away; let it
4453 : * be restarted once enough time has passed.
4454 : */
4455 9136 : if (rw->rw_crashed_at != 0)
4456 : {
4457 2840 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
4458 : {
4459 : int notify_pid;
4460 :
4461 28 : notify_pid = rw->rw_worker.bgw_notify_pid;
4462 :
4463 28 : ForgetBackgroundWorker(&iter);
4464 :
4465 : /* Report worker is gone now. */
4466 28 : if (notify_pid != 0)
4467 14 : kill(notify_pid, SIGUSR1);
4468 :
4469 28 : continue;
4470 : }
4471 :
4472 : /* read system time only when needed */
4473 2812 : if (now == 0)
4474 2812 : now = GetCurrentTimestamp();
4475 :
4476 2812 : if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
4477 2812 : rw->rw_worker.bgw_restart_time * 1000))
4478 : {
4479 : /* Set flag to remember that we have workers to start later */
4480 2812 : HaveCrashedWorker = true;
4481 2812 : continue;
4482 : }
4483 : }
4484 :
4485 6296 : if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
4486 : {
4487 : /* reset crash time before trying to start worker */
4488 4638 : rw->rw_crashed_at = 0;
4489 :
4490 : /*
4491 : * Try to start the worker.
4492 : *
4493 : * On failure, give up processing workers for now, but set
4494 : * StartWorkerNeeded so we'll come back here on the next iteration
4495 : * of ServerLoop to try again. (We don't want to wait, because
4496 : * there might be additional ready-to-run workers.) We could set
4497 : * HaveCrashedWorker as well, since this worker is now marked
4498 : * crashed, but there's no need because the next run of this
4499 : * function will do that.
4500 : */
4501 4638 : if (!do_start_bgworker(rw))
4502 : {
4503 0 : StartWorkerNeeded = true;
4504 0 : return;
4505 : }
4506 :
4507 : /*
4508 : * If we've launched as many workers as allowed, quit, but have
4509 : * ServerLoop call us again to look for additional ready-to-run
4510 : * workers. There might not be any, but we'll find out the next
4511 : * time we run.
4512 : */
4513 4634 : if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
4514 : {
4515 0 : StartWorkerNeeded = true;
4516 0 : return;
4517 : }
4518 : }
4519 : }
4520 : }
4521 :
4522 : /*
4523 : * When a backend asks to be notified about worker state changes, we
4524 : * set a flag in its backend entry. The background worker machinery needs
4525 : * to know when such backends exit.
4526 : */
4527 : bool
4528 3434 : PostmasterMarkPIDForWorkerNotify(int pid)
4529 : {
4530 : dlist_iter iter;
4531 : Backend *bp;
4532 :
4533 7090 : dlist_foreach(iter, &BackendList)
4534 : {
4535 7090 : bp = dlist_container(Backend, elem, iter.cur);
4536 7090 : if (bp->pid == pid)
4537 : {
4538 3434 : bp->bgworker_notify = true;
4539 3434 : return true;
4540 : }
4541 : }
4542 0 : return false;
4543 : }
4544 :
4545 : #ifdef EXEC_BACKEND
4546 :
4547 : Size
4548 : ShmemBackendArraySize(void)
4549 : {
4550 : return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
4551 : }
4552 :
4553 : void
4554 : ShmemBackendArrayAllocation(void)
4555 : {
4556 : Size size = ShmemBackendArraySize();
4557 :
4558 : ShmemBackendArray = (Backend *) ShmemAlloc(size);
4559 : /* Mark all slots as empty */
4560 : memset(ShmemBackendArray, 0, size);
4561 : }
4562 :
4563 : static void
4564 : ShmemBackendArrayAdd(Backend *bn)
4565 : {
4566 : /* The array slot corresponding to my PMChildSlot should be free */
4567 : int i = bn->child_slot - 1;
4568 :
4569 : Assert(ShmemBackendArray[i].pid == 0);
4570 : ShmemBackendArray[i] = *bn;
4571 : }
4572 :
4573 : static void
4574 : ShmemBackendArrayRemove(Backend *bn)
4575 : {
4576 : int i = bn->child_slot - 1;
4577 :
4578 : Assert(ShmemBackendArray[i].pid == bn->pid);
4579 : /* Mark the slot as empty */
4580 : ShmemBackendArray[i].pid = 0;
4581 : }
4582 : #endif /* EXEC_BACKEND */
4583 :
4584 :
4585 : #ifdef WIN32
4586 :
4587 : /*
4588 : * Subset implementation of waitpid() for Windows. We assume pid is -1
4589 : * (that is, check all child processes) and options is WNOHANG (don't wait).
4590 : */
4591 : static pid_t
4592 : waitpid(pid_t pid, int *exitstatus, int options)
4593 : {
4594 : win32_deadchild_waitinfo *childinfo;
4595 : DWORD exitcode;
4596 : DWORD dwd;
4597 : ULONG_PTR key;
4598 : OVERLAPPED *ovl;
4599 :
4600 : /* Try to consume one win32_deadchild_waitinfo from the queue. */
4601 : if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
4602 : {
4603 : errno = EAGAIN;
4604 : return -1;
4605 : }
4606 :
4607 : childinfo = (win32_deadchild_waitinfo *) key;
4608 : pid = childinfo->procId;
4609 :
4610 : /*
4611 : * Remove handle from wait - required even though it's set to wait only
4612 : * once
4613 : */
4614 : UnregisterWaitEx(childinfo->waitHandle, NULL);
4615 :
4616 : if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
4617 : {
4618 : /*
4619 : * Should never happen. Inform user and set a fixed exitcode.
4620 : */
4621 : write_stderr("could not read exit code for process\n");
4622 : exitcode = 255;
4623 : }
4624 : *exitstatus = exitcode;
4625 :
4626 : /*
4627 : * Close the process handle. Only after this point can the PID can be
4628 : * recycled by the kernel.
4629 : */
4630 : CloseHandle(childinfo->procHandle);
4631 :
4632 : /*
4633 : * Free struct that was allocated before the call to
4634 : * RegisterWaitForSingleObject()
4635 : */
4636 : pfree(childinfo);
4637 :
4638 : return pid;
4639 : }
4640 :
4641 : /*
4642 : * Note! Code below executes on a thread pool! All operations must
4643 : * be thread safe! Note that elog() and friends must *not* be used.
4644 : */
4645 : static void WINAPI
4646 : pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
4647 : {
4648 : /* Should never happen, since we use INFINITE as timeout value. */
4649 : if (TimerOrWaitFired)
4650 : return;
4651 :
4652 : /*
4653 : * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If
4654 : * that fails, we leak the object, but we also leak a whole process and
4655 : * get into an unrecoverable state, so there's not much point in worrying
4656 : * about that. We'd like to panic, but we can't use that infrastructure
4657 : * from this thread.
4658 : */
4659 : if (!PostQueuedCompletionStatus(win32ChildQueue,
4660 : 0,
4661 : (ULONG_PTR) lpParameter,
4662 : NULL))
4663 : write_stderr("could not post child completion status\n");
4664 :
4665 : /* Queue SIGCHLD signal. */
4666 : pg_queue_signal(SIGCHLD);
4667 : }
4668 :
4669 : /*
4670 : * Queue a waiter to signal when this child dies. The wait will be handled
4671 : * automatically by an operating system thread pool. The memory and the
4672 : * process handle will be freed by a later call to waitpid().
4673 : */
4674 : void
4675 : pgwin32_register_deadchild_callback(HANDLE procHandle, DWORD procId)
4676 : {
4677 : win32_deadchild_waitinfo *childinfo;
4678 :
4679 : childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4680 : childinfo->procHandle = procHandle;
4681 : childinfo->procId = procId;
4682 :
4683 : if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4684 : procHandle,
4685 : pgwin32_deadchild_callback,
4686 : childinfo,
4687 : INFINITE,
4688 : WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4689 : ereport(FATAL,
4690 : (errmsg_internal("could not register process for wait: error code %lu",
4691 : GetLastError())));
4692 : }
4693 :
4694 : #endif /* WIN32 */
4695 :
4696 : /*
4697 : * Initialize one and only handle for monitoring postmaster death.
4698 : *
4699 : * Called once in the postmaster, so that child processes can subsequently
4700 : * monitor if their parent is dead.
4701 : */
4702 : static void
4703 1422 : InitPostmasterDeathWatchHandle(void)
4704 : {
4705 : #ifndef WIN32
4706 :
4707 : /*
4708 : * Create a pipe. Postmaster holds the write end of the pipe open
4709 : * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
4710 : * the read file descriptor to select() to wake up in case postmaster
4711 : * dies, or check for postmaster death with a (read() == 0). Children must
4712 : * close the write end as soon as possible after forking, because EOF
4713 : * won't be signaled in the read end until all processes have closed the
4714 : * write fd. That is taken care of in ClosePostmasterPorts().
4715 : */
4716 : Assert(MyProcPid == PostmasterPid);
4717 1422 : if (pipe(postmaster_alive_fds) < 0)
4718 0 : ereport(FATAL,
4719 : (errcode_for_file_access(),
4720 : errmsg_internal("could not create pipe to monitor postmaster death: %m")));
4721 :
4722 : /* Notify fd.c that we've eaten two FDs for the pipe. */
4723 1422 : ReserveExternalFD();
4724 1422 : ReserveExternalFD();
4725 :
4726 : /*
4727 : * Set O_NONBLOCK to allow testing for the fd's presence with a read()
4728 : * call.
4729 : */
4730 1422 : if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
4731 0 : ereport(FATAL,
4732 : (errcode_for_socket_access(),
4733 : errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
4734 : #else
4735 :
4736 : /*
4737 : * On Windows, we use a process handle for the same purpose.
4738 : */
4739 : if (DuplicateHandle(GetCurrentProcess(),
4740 : GetCurrentProcess(),
4741 : GetCurrentProcess(),
4742 : &PostmasterHandle,
4743 : 0,
4744 : TRUE,
4745 : DUPLICATE_SAME_ACCESS) == 0)
4746 : ereport(FATAL,
4747 : (errmsg_internal("could not duplicate postmaster handle: error code %lu",
4748 : GetLastError())));
4749 : #endif /* WIN32 */
4750 1422 : }
|