Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * postmaster.c
4 : * This program acts as a clearing house for requests to the
5 : * POSTGRES system. Frontend programs send a startup message
6 : * to the Postmaster and the postmaster uses the info in the
7 : * message to setup a backend process.
8 : *
9 : * The postmaster also manages system-wide operations such as
10 : * startup and shutdown. The postmaster itself doesn't do those
11 : * operations, mind you --- it just forks off a subprocess to do them
12 : * at the right times. It also takes care of resetting the system
13 : * if a backend crashes.
14 : *
15 : * The postmaster process creates the shared memory and semaphore
16 : * pools during startup, but as a rule does not touch them itself.
17 : * In particular, it is not a member of the PGPROC array of backends
18 : * and so it cannot participate in lock-manager operations. Keeping
19 : * the postmaster away from shared memory operations makes it simpler
20 : * and more reliable. The postmaster is almost always able to recover
21 : * from crashes of individual backends by resetting shared memory;
22 : * if it did much with shared memory then it would be prone to crashing
23 : * along with the backends.
24 : *
25 : * When a request message is received, we now fork() immediately.
26 : * The child process performs authentication of the request, and
27 : * then becomes a backend if successful. This allows the auth code
28 : * to be written in a simple single-threaded style (as opposed to the
29 : * crufty "poor man's multitasking" code that used to be needed).
30 : * More importantly, it ensures that blockages in non-multithreaded
31 : * libraries like SSL or PAM cannot cause denial of service to other
32 : * clients.
33 : *
34 : *
35 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/postmaster/postmaster.c
41 : *
42 : * NOTES
43 : *
44 : * Initialization:
45 : * The Postmaster sets up shared memory data structures
46 : * for the backends.
47 : *
48 : * Synchronization:
49 : * The Postmaster shares memory with the backends but should avoid
50 : * touching shared memory, so as not to become stuck if a crashing
51 : * backend screws up locks or shared memory. Likewise, the Postmaster
52 : * should never block on messages from frontend clients.
53 : *
54 : * Garbage Collection:
55 : * The Postmaster cleans up after backends if they have an emergency
56 : * exit and/or core dump.
57 : *
58 : * Error Reporting:
59 : * Use write_stderr() only for reporting "interactive" errors
60 : * (essentially, bogus arguments on the command line). Once the
61 : * postmaster is launched, use ereport().
62 : *
63 : *-------------------------------------------------------------------------
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include <unistd.h>
69 : #include <signal.h>
70 : #include <time.h>
71 : #include <sys/wait.h>
72 : #include <ctype.h>
73 : #include <sys/stat.h>
74 : #include <sys/socket.h>
75 : #include <fcntl.h>
76 : #include <sys/param.h>
77 : #include <netdb.h>
78 : #include <limits.h>
79 :
80 : #ifdef USE_BONJOUR
81 : #include <dns_sd.h>
82 : #endif
83 :
84 : #ifdef USE_SYSTEMD
85 : #include <systemd/sd-daemon.h>
86 : #endif
87 :
88 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 : #include <pthread.h>
90 : #endif
91 :
92 : #include "access/xlog.h"
93 : #include "access/xlogrecovery.h"
94 : #include "catalog/pg_control.h"
95 : #include "common/file_perm.h"
96 : #include "common/file_utils.h"
97 : #include "common/ip.h"
98 : #include "common/pg_prng.h"
99 : #include "common/string.h"
100 : #include "lib/ilist.h"
101 : #include "libpq/auth.h"
102 : #include "libpq/libpq.h"
103 : #include "libpq/pqformat.h"
104 : #include "libpq/pqsignal.h"
105 : #include "nodes/queryjumble.h"
106 : #include "pg_getopt.h"
107 : #include "pgstat.h"
108 : #include "port/pg_bswap.h"
109 : #include "postmaster/autovacuum.h"
110 : #include "postmaster/auxprocess.h"
111 : #include "postmaster/bgworker_internals.h"
112 : #include "postmaster/fork_process.h"
113 : #include "postmaster/pgarch.h"
114 : #include "postmaster/postmaster.h"
115 : #include "postmaster/syslogger.h"
116 : #include "replication/logicallauncher.h"
117 : #include "replication/walsender.h"
118 : #include "storage/fd.h"
119 : #include "storage/ipc.h"
120 : #include "storage/pg_shmem.h"
121 : #include "storage/pmsignal.h"
122 : #include "storage/proc.h"
123 : #include "tcop/tcopprot.h"
124 : #include "utils/builtins.h"
125 : #include "utils/datetime.h"
126 : #include "utils/memutils.h"
127 : #include "utils/pidfile.h"
128 : #include "utils/ps_status.h"
129 : #include "utils/timeout.h"
130 : #include "utils/timestamp.h"
131 : #include "utils/varlena.h"
132 :
133 : #ifdef EXEC_BACKEND
134 : #include "storage/spin.h"
135 : #endif
136 :
137 :
138 : /*
139 : * Possible types of a backend. Beyond being the possible bkend_type values in
140 : * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
141 : * and CountChildren().
142 : */
143 : #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
144 : #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
145 : #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
146 : #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
147 : #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
148 :
149 : /*
150 : * List of active backends (or child processes anyway; we don't actually
151 : * know whether a given child has become a backend or is still in the
152 : * authorization phase). This is used mainly to keep track of how many
153 : * children we have and send them appropriate signals when necessary.
154 : *
155 : * As shown in the above set of backend types, this list includes not only
156 : * "normal" client sessions, but also autovacuum workers, walsenders, and
157 : * background workers. (Note that at the time of launch, walsenders are
158 : * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
159 : * upon noticing they've changed their PMChildFlags entry. Hence that check
160 : * must be done before any operation that needs to distinguish walsenders
161 : * from normal backends.)
162 : *
163 : * Also, "dead_end" children are in it: these are children launched just for
164 : * the purpose of sending a friendly rejection message to a would-be client.
165 : * We must track them because they are attached to shared memory, but we know
166 : * they will never become live backends. dead_end children are not assigned a
167 : * PMChildSlot. dead_end children have bkend_type NORMAL.
168 : *
169 : * "Special" children such as the startup, bgwriter and autovacuum launcher
170 : * tasks are not in this list. They are tracked via StartupPID and other
171 : * pid_t variables below. (Thus, there can't be more than one of any given
172 : * "special" child process type. We use BackendList entries for any child
173 : * process there can be more than one of.)
174 : */
175 : typedef struct bkend
176 : {
177 : pid_t pid; /* process id of backend */
178 : int32 cancel_key; /* cancel key for cancels for this backend */
179 : int child_slot; /* PMChildSlot for this backend, if any */
180 : int bkend_type; /* child process flavor, see above */
181 : bool dead_end; /* is it going to send an error and quit? */
182 : bool bgworker_notify; /* gets bgworker start/stop notifications */
183 : dlist_node elem; /* list link in BackendList */
184 : } Backend;
185 :
186 : static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
187 :
188 : #ifdef EXEC_BACKEND
189 : static Backend *ShmemBackendArray;
190 : #endif
191 :
192 : BackgroundWorker *MyBgworkerEntry = NULL;
193 :
194 :
195 :
196 : /* The socket number we are listening for connections on */
197 : int PostPortNumber = DEF_PGPORT;
198 :
199 : /* The directory names for Unix socket(s) */
200 : char *Unix_socket_directories;
201 :
202 : /* The TCP listen address(es) */
203 : char *ListenAddresses;
204 :
205 : /*
206 : * SuperuserReservedConnections is the number of backends reserved for
207 : * superuser use, and ReservedConnections is the number of backends reserved
208 : * for use by roles with privileges of the pg_use_reserved_connections
209 : * predefined role. These are taken out of the pool of MaxConnections backend
210 : * slots, so the number of backend slots available for roles that are neither
211 : * superuser nor have privileges of pg_use_reserved_connections is
212 : * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
213 : *
214 : * If the number of remaining slots is less than or equal to
215 : * SuperuserReservedConnections, only superusers can make new connections. If
216 : * the number of remaining slots is greater than SuperuserReservedConnections
217 : * but less than or equal to
218 : * (SuperuserReservedConnections + ReservedConnections), only superusers and
219 : * roles with privileges of pg_use_reserved_connections can make new
220 : * connections. Note that pre-existing superuser and
221 : * pg_use_reserved_connections connections don't count against the limits.
222 : */
223 : int SuperuserReservedConnections;
224 : int ReservedConnections;
225 :
226 : /* The socket(s) we're listening to. */
227 : #define MAXLISTEN 64
228 : static int NumListenSockets = 0;
229 : static pgsocket *ListenSockets = NULL;
230 :
231 : /* still more option variables */
232 : bool EnableSSL = false;
233 :
234 : int PreAuthDelay = 0;
235 : int AuthenticationTimeout = 60;
236 :
237 : bool log_hostname; /* for ps display and logging */
238 : bool Log_connections = false;
239 :
240 : bool enable_bonjour = false;
241 : char *bonjour_name;
242 : bool restart_after_crash = true;
243 : bool remove_temp_files_after_crash = true;
244 : bool send_abort_for_crash = false;
245 : bool send_abort_for_kill = false;
246 :
247 : /* PIDs of special child processes; 0 when not running */
248 : static pid_t StartupPID = 0,
249 : BgWriterPID = 0,
250 : CheckpointerPID = 0,
251 : WalWriterPID = 0,
252 : WalReceiverPID = 0,
253 : AutoVacPID = 0,
254 : PgArchPID = 0,
255 : SysLoggerPID = 0;
256 :
257 : /* Startup process's status */
258 : typedef enum
259 : {
260 : STARTUP_NOT_RUNNING,
261 : STARTUP_RUNNING,
262 : STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
263 : STARTUP_CRASHED,
264 : } StartupStatusEnum;
265 :
266 : static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
267 :
268 : /* Startup/shutdown state */
269 : #define NoShutdown 0
270 : #define SmartShutdown 1
271 : #define FastShutdown 2
272 : #define ImmediateShutdown 3
273 :
274 : static int Shutdown = NoShutdown;
275 :
276 : static bool FatalError = false; /* T if recovering from backend crash */
277 :
278 : /*
279 : * We use a simple state machine to control startup, shutdown, and
280 : * crash recovery (which is rather like shutdown followed by startup).
281 : *
282 : * After doing all the postmaster initialization work, we enter PM_STARTUP
283 : * state and the startup process is launched. The startup process begins by
284 : * reading the control file and other preliminary initialization steps.
285 : * In a normal startup, or after crash recovery, the startup process exits
286 : * with exit code 0 and we switch to PM_RUN state. However, archive recovery
287 : * is handled specially since it takes much longer and we would like to support
288 : * hot standby during archive recovery.
289 : *
290 : * When the startup process is ready to start archive recovery, it signals the
291 : * postmaster, and we switch to PM_RECOVERY state. The background writer and
292 : * checkpointer are launched, while the startup process continues applying WAL.
293 : * If Hot Standby is enabled, then, after reaching a consistent point in WAL
294 : * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
295 : * state and begin accepting connections to perform read-only queries. When
296 : * archive recovery is finished, the startup process exits with exit code 0
297 : * and we switch to PM_RUN state.
298 : *
299 : * Normal child backends can only be launched when we are in PM_RUN or
300 : * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
301 : * In other states we handle connection requests by launching "dead_end"
302 : * child processes, which will simply send the client an error message and
303 : * quit. (We track these in the BackendList so that we can know when they
304 : * are all gone; this is important because they're still connected to shared
305 : * memory, and would interfere with an attempt to destroy the shmem segment,
306 : * possibly leading to SHMALL failure when we try to make a new one.)
307 : * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
308 : * to drain out of the system, and therefore stop accepting connection
309 : * requests at all until the last existing child has quit (which hopefully
310 : * will not be very long).
311 : *
312 : * Notice that this state variable does not distinguish *why* we entered
313 : * states later than PM_RUN --- Shutdown and FatalError must be consulted
314 : * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
315 : * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
316 : * states when trying to recover from a crash). It can be true in PM_STARTUP
317 : * state, because we don't clear it until we've successfully started WAL redo.
318 : */
319 : typedef enum
320 : {
321 : PM_INIT, /* postmaster starting */
322 : PM_STARTUP, /* waiting for startup subprocess */
323 : PM_RECOVERY, /* in archive recovery mode */
324 : PM_HOT_STANDBY, /* in hot standby mode */
325 : PM_RUN, /* normal "database is alive" state */
326 : PM_STOP_BACKENDS, /* need to stop remaining backends */
327 : PM_WAIT_BACKENDS, /* waiting for live backends to exit */
328 : PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
329 : * ckpt */
330 : PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
331 : * finish */
332 : PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
333 : PM_NO_CHILDREN, /* all important children have exited */
334 : } PMState;
335 :
336 : static PMState pmState = PM_INIT;
337 :
338 : /*
339 : * While performing a "smart shutdown", we restrict new connections but stay
340 : * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
341 : * connsAllowed is a sub-state indicator showing the active restriction.
342 : * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
343 : */
344 : static bool connsAllowed = true;
345 :
346 : /* Start time of SIGKILL timeout during immediate shutdown or child crash */
347 : /* Zero means timeout is not running */
348 : static time_t AbortStartTime = 0;
349 :
350 : /* Length of said timeout */
351 : #define SIGKILL_CHILDREN_AFTER_SECS 5
352 :
353 : static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
354 :
355 : bool ClientAuthInProgress = false; /* T during new-client
356 : * authentication */
357 :
358 : bool redirection_done = false; /* stderr redirected for syslogger? */
359 :
360 : /* received START_AUTOVAC_LAUNCHER signal */
361 : static bool start_autovac_launcher = false;
362 :
363 : /* the launcher needs to be signaled to communicate some condition */
364 : static bool avlauncher_needs_signal = false;
365 :
366 : /* received START_WALRECEIVER signal */
367 : static bool WalReceiverRequested = false;
368 :
369 : /* set when there's a worker that needs to be started up */
370 : static bool StartWorkerNeeded = true;
371 : static bool HaveCrashedWorker = false;
372 :
373 : /* set when signals arrive */
374 : static volatile sig_atomic_t pending_pm_pmsignal;
375 : static volatile sig_atomic_t pending_pm_child_exit;
376 : static volatile sig_atomic_t pending_pm_reload_request;
377 : static volatile sig_atomic_t pending_pm_shutdown_request;
378 : static volatile sig_atomic_t pending_pm_fast_shutdown_request;
379 : static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
380 :
381 : /* event multiplexing object */
382 : static WaitEventSet *pm_wait_set;
383 :
384 : #ifdef USE_SSL
385 : /* Set when and if SSL has been initialized properly */
386 : static bool LoadedSSL = false;
387 : #endif
388 :
389 : #ifdef USE_BONJOUR
390 : static DNSServiceRef bonjour_sdref = NULL;
391 : #endif
392 :
393 : /*
394 : * postmaster.c - function prototypes
395 : */
396 : static void CloseServerPorts(int status, Datum arg);
397 : static void unlink_external_pid_file(int status, Datum arg);
398 : static void getInstallationPaths(const char *argv0);
399 : static void checkControlFile(void);
400 : static Port *ConnCreate(int serverFd);
401 : static void ConnFree(Port *port);
402 : static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
403 : static void handle_pm_child_exit_signal(SIGNAL_ARGS);
404 : static void handle_pm_reload_request_signal(SIGNAL_ARGS);
405 : static void handle_pm_shutdown_request_signal(SIGNAL_ARGS);
406 : static void process_pm_pmsignal(void);
407 : static void process_pm_child_exit(void);
408 : static void process_pm_reload_request(void);
409 : static void process_pm_shutdown_request(void);
410 : static void process_startup_packet_die(SIGNAL_ARGS);
411 : static void dummy_handler(SIGNAL_ARGS);
412 : static void StartupPacketTimeoutHandler(void);
413 : static void CleanupBackend(int pid, int exitstatus);
414 : static bool CleanupBackgroundWorker(int pid, int exitstatus);
415 : static void HandleChildCrash(int pid, int exitstatus, const char *procname);
416 : static void LogChildExit(int lev, const char *procname,
417 : int pid, int exitstatus);
418 : static void PostmasterStateMachine(void);
419 : static void BackendInitialize(Port *port);
420 : static void BackendRun(Port *port) pg_attribute_noreturn();
421 : static void ExitPostmaster(int status) pg_attribute_noreturn();
422 : static int ServerLoop(void);
423 : static int BackendStartup(Port *port);
424 : static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
425 : static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
426 : static void processCancelRequest(Port *port, void *pkt);
427 : static void report_fork_failure_to_client(Port *port, int errnum);
428 : static CAC_state canAcceptConnections(int backend_type);
429 : static bool RandomCancelKey(int32 *cancel_key);
430 : static void signal_child(pid_t pid, int signal);
431 : static void sigquit_child(pid_t pid);
432 : static bool SignalSomeChildren(int signal, int target);
433 : static void TerminateChildren(int signal);
434 :
435 : #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
436 :
437 : static int CountChildren(int target);
438 : static bool assign_backendlist_entry(RegisteredBgWorker *rw);
439 : static void maybe_start_bgworkers(void);
440 : static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
441 : static pid_t StartChildProcess(AuxProcType type);
442 : static void StartAutovacuumWorker(void);
443 : static void MaybeStartWalReceiver(void);
444 : static void InitPostmasterDeathWatchHandle(void);
445 :
446 : /*
447 : * Archiver is allowed to start up at the current postmaster state?
448 : *
449 : * If WAL archiving is enabled always, we are allowed to start archiver
450 : * even during recovery.
451 : */
452 : #define PgArchStartupAllowed() \
453 : (((XLogArchivingActive() && pmState == PM_RUN) || \
454 : (XLogArchivingAlways() && \
455 : (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
456 : PgArchCanRestart())
457 :
458 : #ifdef EXEC_BACKEND
459 :
460 : #ifdef WIN32
461 : #define WNOHANG 0 /* ignored, so any integer value will do */
462 :
463 : static pid_t waitpid(pid_t pid, int *exitstatus, int options);
464 : static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
465 :
466 : static HANDLE win32ChildQueue;
467 :
468 : typedef struct
469 : {
470 : HANDLE waitHandle;
471 : HANDLE procHandle;
472 : DWORD procId;
473 : } win32_deadchild_waitinfo;
474 : #endif /* WIN32 */
475 :
476 : static pid_t backend_forkexec(Port *port);
477 : static pid_t internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker);
478 :
479 : /* Type for a socket that can be inherited to a client process */
480 : #ifdef WIN32
481 : typedef struct
482 : {
483 : SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
484 : * if not a socket */
485 : WSAPROTOCOL_INFO wsainfo;
486 : } InheritableSocket;
487 : #else
488 : typedef int InheritableSocket;
489 : #endif
490 :
491 : /*
492 : * Structure contains all variables passed to exec:ed backends
493 : */
494 : typedef struct
495 : {
496 : bool has_port;
497 : Port port;
498 : InheritableSocket portsocket;
499 :
500 : bool has_bgworker;
501 : BackgroundWorker bgworker;
502 :
503 : char DataDir[MAXPGPATH];
504 : int32 MyCancelKey;
505 : int MyPMChildSlot;
506 : #ifndef WIN32
507 : unsigned long UsedShmemSegID;
508 : #else
509 : void *ShmemProtectiveRegion;
510 : HANDLE UsedShmemSegID;
511 : #endif
512 : void *UsedShmemSegAddr;
513 : slock_t *ShmemLock;
514 : Backend *ShmemBackendArray;
515 : #ifndef HAVE_SPINLOCKS
516 : PGSemaphore *SpinlockSemaArray;
517 : #endif
518 : int NamedLWLockTrancheRequests;
519 : NamedLWLockTranche *NamedLWLockTrancheArray;
520 : LWLockPadded *MainLWLockArray;
521 : slock_t *ProcStructLock;
522 : PROC_HDR *ProcGlobal;
523 : PGPROC *AuxiliaryProcs;
524 : PGPROC *PreparedXactProcs;
525 : PMSignalData *PMSignalState;
526 : pid_t PostmasterPid;
527 : TimestampTz PgStartTime;
528 : TimestampTz PgReloadTime;
529 : pg_time_t first_syslogger_file_time;
530 : bool redirection_done;
531 : bool IsBinaryUpgrade;
532 : bool query_id_enabled;
533 : int max_safe_fds;
534 : int MaxBackends;
535 : #ifdef WIN32
536 : HANDLE PostmasterHandle;
537 : HANDLE initial_signal_pipe;
538 : HANDLE syslogPipe[2];
539 : #else
540 : int postmaster_alive_fds[2];
541 : int syslogPipe[2];
542 : #endif
543 : char my_exec_path[MAXPGPATH];
544 : char pkglib_path[MAXPGPATH];
545 : } BackendParameters;
546 :
547 : static void read_backend_variables(char *id, Port **port, BackgroundWorker **worker);
548 : static void restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorker **worker);
549 :
550 : #ifndef WIN32
551 : static bool save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker);
552 : #else
553 : static bool save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker,
554 : HANDLE childProcess, pid_t childPid);
555 : #endif
556 :
557 : static void ShmemBackendArrayAdd(Backend *bn);
558 : static void ShmemBackendArrayRemove(Backend *bn);
559 : #endif /* EXEC_BACKEND */
560 :
561 : #define StartupDataBase() StartChildProcess(StartupProcess)
562 : #define StartArchiver() StartChildProcess(ArchiverProcess)
563 : #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
564 : #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
565 : #define StartWalWriter() StartChildProcess(WalWriterProcess)
566 : #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
567 :
568 : /* Macros to check exit status of a child process */
569 : #define EXIT_STATUS_0(st) ((st) == 0)
570 : #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
571 : #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
572 :
573 : #ifndef WIN32
574 : /*
575 : * File descriptors for pipe used to monitor if postmaster is alive.
576 : * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
577 : */
578 : int postmaster_alive_fds[2] = {-1, -1};
579 : #else
580 : /* Process handle of postmaster used for the same purpose on Windows */
581 : HANDLE PostmasterHandle;
582 : #endif
583 :
584 : /*
585 : * Postmaster main entry point
586 : */
587 : void
588 1296 : PostmasterMain(int argc, char *argv[])
589 : {
590 : int opt;
591 : int status;
592 1296 : char *userDoption = NULL;
593 1296 : bool listen_addr_saved = false;
594 1296 : char *output_config_variable = NULL;
595 :
596 1296 : InitProcessGlobals();
597 :
598 1296 : PostmasterPid = MyProcPid;
599 :
600 1296 : IsPostmasterEnvironment = true;
601 :
602 : /*
603 : * Start our win32 signal implementation
604 : */
605 : #ifdef WIN32
606 : pgwin32_signal_initialize();
607 : #endif
608 :
609 : /*
610 : * We should not be creating any files or directories before we check the
611 : * data directory (see checkDataDir()), but just in case set the umask to
612 : * the most restrictive (owner-only) permissions.
613 : *
614 : * checkDataDir() will reset the umask based on the data directory
615 : * permissions.
616 : */
617 1296 : umask(PG_MODE_MASK_OWNER);
618 :
619 : /*
620 : * By default, palloc() requests in the postmaster will be allocated in
621 : * the PostmasterContext, which is space that can be recycled by backends.
622 : * Allocated data that needs to be available to backends should be
623 : * allocated in TopMemoryContext.
624 : */
625 1296 : PostmasterContext = AllocSetContextCreate(TopMemoryContext,
626 : "Postmaster",
627 : ALLOCSET_DEFAULT_SIZES);
628 1296 : MemoryContextSwitchTo(PostmasterContext);
629 :
630 : /* Initialize paths to installation files */
631 1296 : getInstallationPaths(argv[0]);
632 :
633 : /*
634 : * Set up signal handlers for the postmaster process.
635 : *
636 : * CAUTION: when changing this list, check for side-effects on the signal
637 : * handling setup of child processes. See tcop/postgres.c,
638 : * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
639 : * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
640 : * postmaster/bgworker.c and postmaster/checkpointer.c.
641 : */
642 1296 : pqinitmask();
643 1296 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
644 :
645 1296 : pqsignal(SIGHUP, handle_pm_reload_request_signal);
646 1296 : pqsignal(SIGINT, handle_pm_shutdown_request_signal);
647 1296 : pqsignal(SIGQUIT, handle_pm_shutdown_request_signal);
648 1296 : pqsignal(SIGTERM, handle_pm_shutdown_request_signal);
649 1296 : pqsignal(SIGALRM, SIG_IGN); /* ignored */
650 1296 : pqsignal(SIGPIPE, SIG_IGN); /* ignored */
651 1296 : pqsignal(SIGUSR1, handle_pm_pmsignal_signal);
652 1296 : pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
653 1296 : pqsignal(SIGCHLD, handle_pm_child_exit_signal);
654 :
655 : /* This may configure SIGURG, depending on platform. */
656 1296 : InitializeLatchSupport();
657 1296 : InitProcessLocalLatch();
658 :
659 : /*
660 : * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
661 : * ignore those signals in a postmaster environment, so that there is no
662 : * risk of a child process freezing up due to writing to stderr. But for
663 : * a standalone backend, their default handling is reasonable. Hence, all
664 : * child processes should just allow the inherited settings to stand.
665 : */
666 : #ifdef SIGTTIN
667 1296 : pqsignal(SIGTTIN, SIG_IGN); /* ignored */
668 : #endif
669 : #ifdef SIGTTOU
670 1296 : pqsignal(SIGTTOU, SIG_IGN); /* ignored */
671 : #endif
672 :
673 : /* ignore SIGXFSZ, so that ulimit violations work like disk full */
674 : #ifdef SIGXFSZ
675 1296 : pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
676 : #endif
677 :
678 : /* Begin accepting signals. */
679 1296 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
680 :
681 : /*
682 : * Options setup
683 : */
684 1296 : InitializeGUCOptions();
685 :
686 1296 : opterr = 1;
687 :
688 : /*
689 : * Parse command-line options. CAUTION: keep this in sync with
690 : * tcop/postgres.c (the option sets should not conflict) and with the
691 : * common help() function in main/main.c.
692 : */
693 4428 : while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
694 : {
695 3132 : switch (opt)
696 : {
697 0 : case 'B':
698 0 : SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
699 0 : break;
700 :
701 30 : case 'b':
702 : /* Undocumented flag used for binary upgrades */
703 30 : IsBinaryUpgrade = true;
704 30 : break;
705 :
706 4 : case 'C':
707 4 : output_config_variable = strdup(optarg);
708 4 : break;
709 :
710 1448 : case 'c':
711 : case '-':
712 : {
713 : char *name,
714 : *value;
715 :
716 1448 : ParseLongOption(optarg, &name, &value);
717 1448 : if (!value)
718 : {
719 0 : if (opt == '-')
720 0 : ereport(ERROR,
721 : (errcode(ERRCODE_SYNTAX_ERROR),
722 : errmsg("--%s requires a value",
723 : optarg)));
724 : else
725 0 : ereport(ERROR,
726 : (errcode(ERRCODE_SYNTAX_ERROR),
727 : errmsg("-c %s requires a value",
728 : optarg)));
729 : }
730 :
731 1448 : SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
732 1448 : pfree(name);
733 1448 : pfree(value);
734 1448 : break;
735 : }
736 :
737 1296 : case 'D':
738 1296 : userDoption = strdup(optarg);
739 1296 : break;
740 :
741 0 : case 'd':
742 0 : set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
743 0 : break;
744 :
745 0 : case 'E':
746 0 : SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
747 0 : break;
748 :
749 0 : case 'e':
750 0 : SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
751 0 : break;
752 :
753 162 : case 'F':
754 162 : SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
755 162 : break;
756 :
757 0 : case 'f':
758 0 : if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
759 : {
760 0 : write_stderr("%s: invalid argument for option -f: \"%s\"\n",
761 : progname, optarg);
762 0 : ExitPostmaster(1);
763 : }
764 0 : break;
765 :
766 0 : case 'h':
767 0 : SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
768 0 : break;
769 :
770 0 : case 'i':
771 0 : SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
772 0 : break;
773 :
774 0 : case 'j':
775 : /* only used by interactive backend */
776 0 : break;
777 :
778 162 : case 'k':
779 162 : SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
780 162 : break;
781 :
782 0 : case 'l':
783 0 : SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
784 0 : break;
785 :
786 0 : case 'N':
787 0 : SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
788 0 : break;
789 :
790 0 : case 'O':
791 0 : SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
792 0 : break;
793 :
794 0 : case 'P':
795 0 : SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
796 0 : break;
797 :
798 30 : case 'p':
799 30 : SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
800 30 : break;
801 :
802 0 : case 'r':
803 : /* only used by single-user backend */
804 0 : break;
805 :
806 0 : case 'S':
807 0 : SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
808 0 : break;
809 :
810 0 : case 's':
811 0 : SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
812 0 : break;
813 :
814 0 : case 'T':
815 :
816 : /*
817 : * This option used to be defined as sending SIGSTOP after a
818 : * backend crash, but sending SIGABRT seems more useful.
819 : */
820 0 : SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
821 0 : break;
822 :
823 0 : case 't':
824 : {
825 0 : const char *tmp = get_stats_option_name(optarg);
826 :
827 0 : if (tmp)
828 : {
829 0 : SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
830 : }
831 : else
832 : {
833 0 : write_stderr("%s: invalid argument for option -t: \"%s\"\n",
834 : progname, optarg);
835 0 : ExitPostmaster(1);
836 : }
837 0 : break;
838 : }
839 :
840 0 : case 'W':
841 0 : SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
842 0 : break;
843 :
844 0 : default:
845 0 : write_stderr("Try \"%s --help\" for more information.\n",
846 : progname);
847 0 : ExitPostmaster(1);
848 : }
849 : }
850 :
851 : /*
852 : * Postmaster accepts no non-option switch arguments.
853 : */
854 1296 : if (optind < argc)
855 : {
856 0 : write_stderr("%s: invalid argument: \"%s\"\n",
857 0 : progname, argv[optind]);
858 0 : write_stderr("Try \"%s --help\" for more information.\n",
859 : progname);
860 0 : ExitPostmaster(1);
861 : }
862 :
863 : /*
864 : * Locate the proper configuration files and data directory, and read
865 : * postgresql.conf for the first time.
866 : */
867 1296 : if (!SelectConfigFiles(userDoption, progname))
868 0 : ExitPostmaster(2);
869 :
870 1294 : if (output_config_variable != NULL)
871 : {
872 : /*
873 : * If this is a runtime-computed GUC, it hasn't yet been initialized,
874 : * and the present value is not useful. However, this is a convenient
875 : * place to print the value for most GUCs because it is safe to run
876 : * postmaster startup to this point even if the server is already
877 : * running. For the handful of runtime-computed GUCs that we cannot
878 : * provide meaningful values for yet, we wait until later in
879 : * postmaster startup to print the value. We won't be able to use -C
880 : * on running servers for those GUCs, but using this option now would
881 : * lead to incorrect results for them.
882 : */
883 4 : int flags = GetConfigOptionFlags(output_config_variable, true);
884 :
885 4 : if ((flags & GUC_RUNTIME_COMPUTED) == 0)
886 : {
887 : /*
888 : * "-C guc" was specified, so print GUC's value and exit. No
889 : * extra permission check is needed because the user is reading
890 : * inside the data dir.
891 : */
892 2 : const char *config_val = GetConfigOption(output_config_variable,
893 : false, false);
894 :
895 2 : puts(config_val ? config_val : "");
896 2 : ExitPostmaster(0);
897 : }
898 :
899 : /*
900 : * A runtime-computed GUC will be printed later on. As we initialize
901 : * a server startup sequence, silence any log messages that may show
902 : * up in the output generated. FATAL and more severe messages are
903 : * useful to show, even if one would only expect at least PANIC. LOG
904 : * entries are hidden.
905 : */
906 2 : SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
907 : PGC_S_OVERRIDE);
908 : }
909 :
910 : /* Verify that DataDir looks reasonable */
911 1292 : checkDataDir();
912 :
913 : /* Check that pg_control exists */
914 1292 : checkControlFile();
915 :
916 : /* And switch working directory into it */
917 1292 : ChangeToDataDir();
918 :
919 : /*
920 : * Check for invalid combinations of GUC settings.
921 : */
922 1292 : if (SuperuserReservedConnections + ReservedConnections >= MaxConnections)
923 : {
924 0 : write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
925 : progname,
926 : SuperuserReservedConnections, ReservedConnections,
927 : MaxConnections);
928 0 : ExitPostmaster(1);
929 : }
930 1292 : if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
931 0 : ereport(ERROR,
932 : (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
933 1292 : if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
934 0 : ereport(ERROR,
935 : (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
936 :
937 : /*
938 : * Other one-time internal sanity checks can go here, if they are fast.
939 : * (Put any slow processing further down, after postmaster.pid creation.)
940 : */
941 1292 : if (!CheckDateTokenTables())
942 : {
943 0 : write_stderr("%s: invalid datetoken tables, please fix\n", progname);
944 0 : ExitPostmaster(1);
945 : }
946 :
947 : /*
948 : * Now that we are done processing the postmaster arguments, reset
949 : * getopt(3) library so that it will work correctly in subprocesses.
950 : */
951 1292 : optind = 1;
952 : #ifdef HAVE_INT_OPTRESET
953 : optreset = 1; /* some systems need this too */
954 : #endif
955 :
956 : /* For debugging: display postmaster environment */
957 : {
958 : extern char **environ;
959 : char **p;
960 :
961 1292 : ereport(DEBUG3,
962 : (errmsg_internal("%s: PostmasterMain: initial environment dump:",
963 : progname)));
964 1292 : ereport(DEBUG3,
965 : (errmsg_internal("-----------------------------------------")));
966 52090 : for (p = environ; *p; ++p)
967 50798 : ereport(DEBUG3,
968 : (errmsg_internal("\t%s", *p)));
969 1292 : ereport(DEBUG3,
970 : (errmsg_internal("-----------------------------------------")));
971 : }
972 :
973 : /*
974 : * Create lockfile for data directory.
975 : *
976 : * We want to do this before we try to grab the input sockets, because the
977 : * data directory interlock is more reliable than the socket-file
978 : * interlock (thanks to whoever decided to put socket files in /tmp :-().
979 : * For the same reason, it's best to grab the TCP socket(s) before the
980 : * Unix socket(s).
981 : *
982 : * Also note that this internally sets up the on_proc_exit function that
983 : * is responsible for removing both data directory and socket lockfiles;
984 : * so it must happen before opening sockets so that at exit, the socket
985 : * lockfiles go away after CloseServerPorts runs.
986 : */
987 1292 : CreateDataDirLockFile(true);
988 :
989 : /*
990 : * Read the control file (for error checking and config info).
991 : *
992 : * Since we verify the control file's CRC, this has a useful side effect
993 : * on machines where we need a run-time test for CRC support instructions.
994 : * The postmaster will do the test once at startup, and then its child
995 : * processes will inherit the correct function pointer and not need to
996 : * repeat the test.
997 : */
998 1290 : LocalProcessControlFile(false);
999 :
1000 : /*
1001 : * Register the apply launcher. It's probably a good idea to call this
1002 : * before any modules had a chance to take the background worker slots.
1003 : */
1004 1290 : ApplyLauncherRegister();
1005 :
1006 : /*
1007 : * process any libraries that should be preloaded at postmaster start
1008 : */
1009 1290 : process_shared_preload_libraries();
1010 :
1011 : /*
1012 : * Initialize SSL library, if specified.
1013 : */
1014 : #ifdef USE_SSL
1015 1290 : if (EnableSSL)
1016 : {
1017 52 : (void) secure_initialize(true);
1018 46 : LoadedSSL = true;
1019 : }
1020 : #endif
1021 :
1022 : /*
1023 : * Now that loadable modules have had their chance to alter any GUCs,
1024 : * calculate MaxBackends.
1025 : */
1026 1284 : InitializeMaxBackends();
1027 :
1028 : /*
1029 : * Give preloaded libraries a chance to request additional shared memory.
1030 : */
1031 1284 : process_shmem_requests();
1032 :
1033 : /*
1034 : * Now that loadable modules have had their chance to request additional
1035 : * shared memory, determine the value of any runtime-computed GUCs that
1036 : * depend on the amount of shared memory required.
1037 : */
1038 1284 : InitializeShmemGUCs();
1039 :
1040 : /*
1041 : * Now that modules have been loaded, we can process any custom resource
1042 : * managers specified in the wal_consistency_checking GUC.
1043 : */
1044 1284 : InitializeWalConsistencyChecking();
1045 :
1046 : /*
1047 : * If -C was specified with a runtime-computed GUC, we held off printing
1048 : * the value earlier, as the GUC was not yet initialized. We handle -C
1049 : * for most GUCs before we lock the data directory so that the option may
1050 : * be used on a running server. However, a handful of GUCs are runtime-
1051 : * computed and do not have meaningful values until after locking the data
1052 : * directory, and we cannot safely calculate their values earlier on a
1053 : * running server. At this point, such GUCs should be properly
1054 : * initialized, and we haven't yet set up shared memory, so this is a good
1055 : * time to handle the -C option for these special GUCs.
1056 : */
1057 1284 : if (output_config_variable != NULL)
1058 : {
1059 2 : const char *config_val = GetConfigOption(output_config_variable,
1060 : false, false);
1061 :
1062 2 : puts(config_val ? config_val : "");
1063 2 : ExitPostmaster(0);
1064 : }
1065 :
1066 : /*
1067 : * Set up shared memory and semaphores.
1068 : *
1069 : * Note: if using SysV shmem and/or semas, each postmaster startup will
1070 : * normally choose the same IPC keys. This helps ensure that we will
1071 : * clean up dead IPC objects if the postmaster crashes and is restarted.
1072 : */
1073 1282 : CreateSharedMemoryAndSemaphores();
1074 :
1075 : /*
1076 : * Estimate number of openable files. This must happen after setting up
1077 : * semaphores, because on some platforms semaphores count as open files.
1078 : */
1079 1280 : set_max_safe_fds();
1080 :
1081 : /*
1082 : * Set reference point for stack-depth checking.
1083 : */
1084 1280 : (void) set_stack_base();
1085 :
1086 : /*
1087 : * Initialize pipe (or process handle on Windows) that allows children to
1088 : * wake up from sleep on postmaster death.
1089 : */
1090 1280 : InitPostmasterDeathWatchHandle();
1091 :
1092 : #ifdef WIN32
1093 :
1094 : /*
1095 : * Initialize I/O completion port used to deliver list of dead children.
1096 : */
1097 : win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1098 : if (win32ChildQueue == NULL)
1099 : ereport(FATAL,
1100 : (errmsg("could not create I/O completion port for child queue")));
1101 : #endif
1102 :
1103 : #ifdef EXEC_BACKEND
1104 : /* Write out nondefault GUC settings for child processes to use */
1105 : write_nondefault_variables(PGC_POSTMASTER);
1106 :
1107 : /*
1108 : * Clean out the temp directory used to transmit parameters to child
1109 : * processes (see internal_forkexec, below). We must do this before
1110 : * launching any child processes, else we have a race condition: we could
1111 : * remove a parameter file before the child can read it. It should be
1112 : * safe to do so now, because we verified earlier that there are no
1113 : * conflicting Postgres processes in this data directory.
1114 : */
1115 : RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false);
1116 : #endif
1117 :
1118 : /*
1119 : * Forcibly remove the files signaling a standby promotion request.
1120 : * Otherwise, the existence of those files triggers a promotion too early,
1121 : * whether a user wants that or not.
1122 : *
1123 : * This removal of files is usually unnecessary because they can exist
1124 : * only during a few moments during a standby promotion. However there is
1125 : * a race condition: if pg_ctl promote is executed and creates the files
1126 : * during a promotion, the files can stay around even after the server is
1127 : * brought up to be the primary. Then, if a new standby starts by using
1128 : * the backup taken from the new primary, the files can exist at server
1129 : * startup and must be removed in order to avoid an unexpected promotion.
1130 : *
1131 : * Note that promotion signal files need to be removed before the startup
1132 : * process is invoked. Because, after that, they can be used by
1133 : * postmaster's SIGUSR1 signal handler.
1134 : */
1135 1280 : RemovePromoteSignalFiles();
1136 :
1137 : /* Do the same for logrotate signal file */
1138 1280 : RemoveLogrotateSignalFiles();
1139 :
1140 : /* Remove any outdated file holding the current log filenames. */
1141 1280 : if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1142 0 : ereport(LOG,
1143 : (errcode_for_file_access(),
1144 : errmsg("could not remove file \"%s\": %m",
1145 : LOG_METAINFO_DATAFILE)));
1146 :
1147 : /*
1148 : * If enabled, start up syslogger collection subprocess
1149 : */
1150 1280 : SysLoggerPID = SysLogger_Start();
1151 :
1152 : /*
1153 : * Reset whereToSendOutput from DestDebug (its starting state) to
1154 : * DestNone. This stops ereport from sending log messages to stderr unless
1155 : * Log_destination permits. We don't do this until the postmaster is
1156 : * fully launched, since startup failures may as well be reported to
1157 : * stderr.
1158 : *
1159 : * If we are in fact disabling logging to stderr, first emit a log message
1160 : * saying so, to provide a breadcrumb trail for users who may not remember
1161 : * that their logging is configured to go somewhere else.
1162 : */
1163 1280 : if (!(Log_destination & LOG_DESTINATION_STDERR))
1164 0 : ereport(LOG,
1165 : (errmsg("ending log output to stderr"),
1166 : errhint("Future log output will go to log destination \"%s\".",
1167 : Log_destination_string)));
1168 :
1169 1280 : whereToSendOutput = DestNone;
1170 :
1171 : /*
1172 : * Report server startup in log. While we could emit this much earlier,
1173 : * it seems best to do so after starting the log collector, if we intend
1174 : * to use one.
1175 : */
1176 1280 : ereport(LOG,
1177 : (errmsg("starting %s", PG_VERSION_STR)));
1178 :
1179 : /*
1180 : * Establish input sockets.
1181 : *
1182 : * First set up an on_proc_exit function that's charged with closing the
1183 : * sockets again at postmaster shutdown.
1184 : */
1185 1280 : ListenSockets = palloc(MAXLISTEN * sizeof(pgsocket));
1186 1280 : on_proc_exit(CloseServerPorts, 0);
1187 :
1188 1280 : if (ListenAddresses)
1189 : {
1190 : char *rawstring;
1191 : List *elemlist;
1192 : ListCell *l;
1193 1280 : int success = 0;
1194 :
1195 : /* Need a modifiable copy of ListenAddresses */
1196 1280 : rawstring = pstrdup(ListenAddresses);
1197 :
1198 : /* Parse string into list of hostnames */
1199 1280 : if (!SplitGUCList(rawstring, ',', &elemlist))
1200 : {
1201 : /* syntax error in list */
1202 0 : ereport(FATAL,
1203 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1204 : errmsg("invalid list syntax in parameter \"%s\"",
1205 : "listen_addresses")));
1206 : }
1207 :
1208 1330 : foreach(l, elemlist)
1209 : {
1210 50 : char *curhost = (char *) lfirst(l);
1211 :
1212 50 : if (strcmp(curhost, "*") == 0)
1213 0 : status = StreamServerPort(AF_UNSPEC, NULL,
1214 0 : (unsigned short) PostPortNumber,
1215 : NULL,
1216 : ListenSockets,
1217 : &NumListenSockets,
1218 : MAXLISTEN);
1219 : else
1220 50 : status = StreamServerPort(AF_UNSPEC, curhost,
1221 50 : (unsigned short) PostPortNumber,
1222 : NULL,
1223 : ListenSockets,
1224 : &NumListenSockets,
1225 : MAXLISTEN);
1226 :
1227 50 : if (status == STATUS_OK)
1228 : {
1229 50 : success++;
1230 : /* record the first successful host addr in lockfile */
1231 50 : if (!listen_addr_saved)
1232 : {
1233 50 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1234 50 : listen_addr_saved = true;
1235 : }
1236 : }
1237 : else
1238 0 : ereport(WARNING,
1239 : (errmsg("could not create listen socket for \"%s\"",
1240 : curhost)));
1241 : }
1242 :
1243 1280 : if (!success && elemlist != NIL)
1244 0 : ereport(FATAL,
1245 : (errmsg("could not create any TCP/IP sockets")));
1246 :
1247 1280 : list_free(elemlist);
1248 1280 : pfree(rawstring);
1249 : }
1250 :
1251 : #ifdef USE_BONJOUR
1252 : /* Register for Bonjour only if we opened TCP socket(s) */
1253 : if (enable_bonjour && NumListenSockets > 0)
1254 : {
1255 : DNSServiceErrorType err;
1256 :
1257 : /*
1258 : * We pass 0 for interface_index, which will result in registering on
1259 : * all "applicable" interfaces. It's not entirely clear from the
1260 : * DNS-SD docs whether this would be appropriate if we have bound to
1261 : * just a subset of the available network interfaces.
1262 : */
1263 : err = DNSServiceRegister(&bonjour_sdref,
1264 : 0,
1265 : 0,
1266 : bonjour_name,
1267 : "_postgresql._tcp.",
1268 : NULL,
1269 : NULL,
1270 : pg_hton16(PostPortNumber),
1271 : 0,
1272 : NULL,
1273 : NULL,
1274 : NULL);
1275 : if (err != kDNSServiceErr_NoError)
1276 : ereport(LOG,
1277 : (errmsg("DNSServiceRegister() failed: error code %ld",
1278 : (long) err)));
1279 :
1280 : /*
1281 : * We don't bother to read the mDNS daemon's reply, and we expect that
1282 : * it will automatically terminate our registration when the socket is
1283 : * closed at postmaster termination. So there's nothing more to be
1284 : * done here. However, the bonjour_sdref is kept around so that
1285 : * forked children can close their copies of the socket.
1286 : */
1287 : }
1288 : #endif
1289 :
1290 1280 : if (Unix_socket_directories)
1291 : {
1292 : char *rawstring;
1293 : List *elemlist;
1294 : ListCell *l;
1295 1280 : int success = 0;
1296 :
1297 : /* Need a modifiable copy of Unix_socket_directories */
1298 1280 : rawstring = pstrdup(Unix_socket_directories);
1299 :
1300 : /* Parse string into list of directories */
1301 1280 : if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1302 : {
1303 : /* syntax error in list */
1304 0 : ereport(FATAL,
1305 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1306 : errmsg("invalid list syntax in parameter \"%s\"",
1307 : "unix_socket_directories")));
1308 : }
1309 :
1310 2558 : foreach(l, elemlist)
1311 : {
1312 1278 : char *socketdir = (char *) lfirst(l);
1313 :
1314 1278 : status = StreamServerPort(AF_UNIX, NULL,
1315 1278 : (unsigned short) PostPortNumber,
1316 : socketdir,
1317 : ListenSockets,
1318 : &NumListenSockets,
1319 : MAXLISTEN);
1320 :
1321 1278 : if (status == STATUS_OK)
1322 : {
1323 1278 : success++;
1324 : /* record the first successful Unix socket in lockfile */
1325 1278 : if (success == 1)
1326 1278 : AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1327 : }
1328 : else
1329 0 : ereport(WARNING,
1330 : (errmsg("could not create Unix-domain socket in directory \"%s\"",
1331 : socketdir)));
1332 : }
1333 :
1334 1280 : if (!success && elemlist != NIL)
1335 0 : ereport(FATAL,
1336 : (errmsg("could not create any Unix-domain sockets")));
1337 :
1338 1280 : list_free_deep(elemlist);
1339 1280 : pfree(rawstring);
1340 : }
1341 :
1342 : /*
1343 : * check that we have some socket to listen on
1344 : */
1345 1280 : if (NumListenSockets == 0)
1346 0 : ereport(FATAL,
1347 : (errmsg("no socket created for listening")));
1348 :
1349 : /*
1350 : * If no valid TCP ports, write an empty line for listen address,
1351 : * indicating the Unix socket must be used. Note that this line is not
1352 : * added to the lock file until there is a socket backing it.
1353 : */
1354 1280 : if (!listen_addr_saved)
1355 1230 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1356 :
1357 : /*
1358 : * Record postmaster options. We delay this till now to avoid recording
1359 : * bogus options (eg, unusable port number).
1360 : */
1361 1280 : if (!CreateOptsFile(argc, argv, my_exec_path))
1362 0 : ExitPostmaster(1);
1363 :
1364 : /*
1365 : * Write the external PID file if requested
1366 : */
1367 1280 : if (external_pid_file)
1368 : {
1369 0 : FILE *fpidfile = fopen(external_pid_file, "w");
1370 :
1371 0 : if (fpidfile)
1372 : {
1373 0 : fprintf(fpidfile, "%d\n", MyProcPid);
1374 0 : fclose(fpidfile);
1375 :
1376 : /* Make PID file world readable */
1377 0 : if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1378 0 : write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1379 0 : progname, external_pid_file, strerror(errno));
1380 : }
1381 : else
1382 0 : write_stderr("%s: could not write external PID file \"%s\": %s\n",
1383 0 : progname, external_pid_file, strerror(errno));
1384 :
1385 0 : on_proc_exit(unlink_external_pid_file, 0);
1386 : }
1387 :
1388 : /*
1389 : * Remove old temporary files. At this point there can be no other
1390 : * Postgres processes running in this directory, so this should be safe.
1391 : */
1392 1280 : RemovePgTempFiles();
1393 :
1394 : /*
1395 : * Initialize the autovacuum subsystem (again, no process start yet)
1396 : */
1397 1280 : autovac_init();
1398 :
1399 : /*
1400 : * Load configuration files for client authentication.
1401 : */
1402 1280 : if (!load_hba())
1403 : {
1404 : /*
1405 : * It makes no sense to continue if we fail to load the HBA file,
1406 : * since there is no way to connect to the database in this case.
1407 : */
1408 0 : ereport(FATAL,
1409 : /* translator: %s is a configuration file */
1410 : (errmsg("could not load %s", HbaFileName)));
1411 : }
1412 1280 : if (!load_ident())
1413 : {
1414 : /*
1415 : * We can start up without the IDENT file, although it means that you
1416 : * cannot log in using any of the authentication methods that need a
1417 : * user name mapping. load_ident() already logged the details of error
1418 : * to the log.
1419 : */
1420 : }
1421 :
1422 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1423 :
1424 : /*
1425 : * On macOS, libintl replaces setlocale() with a version that calls
1426 : * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1427 : * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1428 : * the process multithreaded. The postmaster calls sigprocmask() and
1429 : * calls fork() without an immediate exec(), both of which have undefined
1430 : * behavior in a multithreaded program. A multithreaded postmaster is the
1431 : * normal case on Windows, which offers neither fork() nor sigprocmask().
1432 : */
1433 : if (pthread_is_threaded_np() != 0)
1434 : ereport(FATAL,
1435 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1436 : errmsg("postmaster became multithreaded during startup"),
1437 : errhint("Set the LC_ALL environment variable to a valid locale.")));
1438 : #endif
1439 :
1440 : /*
1441 : * Remember postmaster startup time
1442 : */
1443 1280 : PgStartTime = GetCurrentTimestamp();
1444 :
1445 : /*
1446 : * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1447 : * see what's happening.
1448 : */
1449 1280 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1450 :
1451 : /* Start bgwriter and checkpointer so they can help with recovery */
1452 1280 : if (CheckpointerPID == 0)
1453 1280 : CheckpointerPID = StartCheckpointer();
1454 1280 : if (BgWriterPID == 0)
1455 1280 : BgWriterPID = StartBackgroundWriter();
1456 :
1457 : /*
1458 : * We're ready to rock and roll...
1459 : */
1460 1280 : StartupPID = StartupDataBase();
1461 : Assert(StartupPID != 0);
1462 1280 : StartupStatus = STARTUP_RUNNING;
1463 1280 : pmState = PM_STARTUP;
1464 :
1465 : /* Some workers may be scheduled to start now */
1466 1280 : maybe_start_bgworkers();
1467 :
1468 1280 : status = ServerLoop();
1469 :
1470 : /*
1471 : * ServerLoop probably shouldn't ever return, but if it does, close down.
1472 : */
1473 0 : ExitPostmaster(status != STATUS_OK);
1474 :
1475 : abort(); /* not reached */
1476 : }
1477 :
1478 :
1479 : /*
1480 : * on_proc_exit callback to close server's listen sockets
1481 : */
1482 : static void
1483 1274 : CloseServerPorts(int status, Datum arg)
1484 : {
1485 : int i;
1486 :
1487 : /*
1488 : * First, explicitly close all the socket FDs. We used to just let this
1489 : * happen implicitly at postmaster exit, but it's better to close them
1490 : * before we remove the postmaster.pid lockfile; otherwise there's a race
1491 : * condition if a new postmaster wants to re-use the TCP port number.
1492 : */
1493 2598 : for (i = 0; i < NumListenSockets; i++)
1494 1324 : StreamClose(ListenSockets[i]);
1495 1274 : NumListenSockets = 0;
1496 :
1497 : /*
1498 : * Next, remove any filesystem entries for Unix sockets. To avoid race
1499 : * conditions against incoming postmasters, this must happen after closing
1500 : * the sockets and before removing lock files.
1501 : */
1502 1274 : RemoveSocketFiles();
1503 :
1504 : /*
1505 : * We don't do anything about socket lock files here; those will be
1506 : * removed in a later on_proc_exit callback.
1507 : */
1508 1274 : }
1509 :
1510 : /*
1511 : * on_proc_exit callback to delete external_pid_file
1512 : */
1513 : static void
1514 0 : unlink_external_pid_file(int status, Datum arg)
1515 : {
1516 0 : if (external_pid_file)
1517 0 : unlink(external_pid_file);
1518 0 : }
1519 :
1520 :
1521 : /*
1522 : * Compute and check the directory paths to files that are part of the
1523 : * installation (as deduced from the postgres executable's own location)
1524 : */
1525 : static void
1526 1296 : getInstallationPaths(const char *argv0)
1527 : {
1528 : DIR *pdir;
1529 :
1530 : /* Locate the postgres executable itself */
1531 1296 : if (find_my_exec(argv0, my_exec_path) < 0)
1532 0 : ereport(FATAL,
1533 : (errmsg("%s: could not locate my own executable path", argv0)));
1534 :
1535 : #ifdef EXEC_BACKEND
1536 : /* Locate executable backend before we change working directory */
1537 : if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1538 : postgres_exec_path) < 0)
1539 : ereport(FATAL,
1540 : (errmsg("%s: could not locate matching postgres executable",
1541 : argv0)));
1542 : #endif
1543 :
1544 : /*
1545 : * Locate the pkglib directory --- this has to be set early in case we try
1546 : * to load any modules from it in response to postgresql.conf entries.
1547 : */
1548 1296 : get_pkglib_path(my_exec_path, pkglib_path);
1549 :
1550 : /*
1551 : * Verify that there's a readable directory there; otherwise the Postgres
1552 : * installation is incomplete or corrupt. (A typical cause of this
1553 : * failure is that the postgres executable has been moved or hardlinked to
1554 : * some directory that's not a sibling of the installation lib/
1555 : * directory.)
1556 : */
1557 1296 : pdir = AllocateDir(pkglib_path);
1558 1296 : if (pdir == NULL)
1559 0 : ereport(ERROR,
1560 : (errcode_for_file_access(),
1561 : errmsg("could not open directory \"%s\": %m",
1562 : pkglib_path),
1563 : errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1564 : my_exec_path)));
1565 1296 : FreeDir(pdir);
1566 :
1567 : /*
1568 : * It's not worth checking the share/ directory. If the lib/ directory is
1569 : * there, then share/ probably is too.
1570 : */
1571 1296 : }
1572 :
1573 : /*
1574 : * Check that pg_control exists in the correct location in the data directory.
1575 : *
1576 : * No attempt is made to validate the contents of pg_control here. This is
1577 : * just a sanity check to see if we are looking at a real data directory.
1578 : */
1579 : static void
1580 1292 : checkControlFile(void)
1581 : {
1582 : char path[MAXPGPATH];
1583 : FILE *fp;
1584 :
1585 1292 : snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1586 :
1587 1292 : fp = AllocateFile(path, PG_BINARY_R);
1588 1292 : if (fp == NULL)
1589 : {
1590 0 : write_stderr("%s: could not find the database system\n"
1591 : "Expected to find it in the directory \"%s\",\n"
1592 : "but could not open file \"%s\": %s\n",
1593 0 : progname, DataDir, path, strerror(errno));
1594 0 : ExitPostmaster(2);
1595 : }
1596 1292 : FreeFile(fp);
1597 1292 : }
1598 :
1599 : /*
1600 : * Determine how long should we let ServerLoop sleep, in milliseconds.
1601 : *
1602 : * In normal conditions we wait at most one minute, to ensure that the other
1603 : * background tasks handled by ServerLoop get done even when no requests are
1604 : * arriving. However, if there are background workers waiting to be started,
1605 : * we don't actually sleep so that they are quickly serviced. Other exception
1606 : * cases are as shown in the code.
1607 : */
1608 : static int
1609 54866 : DetermineSleepTime(void)
1610 : {
1611 54866 : TimestampTz next_wakeup = 0;
1612 :
1613 : /*
1614 : * Normal case: either there are no background workers at all, or we're in
1615 : * a shutdown sequence (during which we ignore bgworkers altogether).
1616 : */
1617 54866 : if (Shutdown > NoShutdown ||
1618 48944 : (!StartWorkerNeeded && !HaveCrashedWorker))
1619 : {
1620 54866 : if (AbortStartTime != 0)
1621 : {
1622 : int seconds;
1623 :
1624 : /* time left to abort; clamp to 0 in case it already expired */
1625 2018 : seconds = SIGKILL_CHILDREN_AFTER_SECS -
1626 2018 : (time(NULL) - AbortStartTime);
1627 :
1628 2018 : return Max(seconds * 1000, 0);
1629 : }
1630 : else
1631 52848 : return 60 * 1000;
1632 : }
1633 :
1634 0 : if (StartWorkerNeeded)
1635 0 : return 0;
1636 :
1637 0 : if (HaveCrashedWorker)
1638 : {
1639 : slist_mutable_iter siter;
1640 :
1641 : /*
1642 : * When there are crashed bgworkers, we sleep just long enough that
1643 : * they are restarted when they request to be. Scan the list to
1644 : * determine the minimum of all wakeup times according to most recent
1645 : * crash time and requested restart interval.
1646 : */
1647 0 : slist_foreach_modify(siter, &BackgroundWorkerList)
1648 : {
1649 : RegisteredBgWorker *rw;
1650 : TimestampTz this_wakeup;
1651 :
1652 0 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1653 :
1654 0 : if (rw->rw_crashed_at == 0)
1655 0 : continue;
1656 :
1657 0 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1658 0 : || rw->rw_terminate)
1659 : {
1660 0 : ForgetBackgroundWorker(&siter);
1661 0 : continue;
1662 : }
1663 :
1664 0 : this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1665 : 1000L * rw->rw_worker.bgw_restart_time);
1666 0 : if (next_wakeup == 0 || this_wakeup < next_wakeup)
1667 0 : next_wakeup = this_wakeup;
1668 : }
1669 : }
1670 :
1671 0 : if (next_wakeup != 0)
1672 : {
1673 : int ms;
1674 :
1675 : /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1676 0 : ms = (int) TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
1677 : next_wakeup);
1678 0 : return Min(60 * 1000, ms);
1679 : }
1680 :
1681 0 : return 60 * 1000;
1682 : }
1683 :
1684 : /*
1685 : * Activate or deactivate notifications of server socket events. Since we
1686 : * don't currently have a way to remove events from an existing WaitEventSet,
1687 : * we'll just destroy and recreate the whole thing. This is called during
1688 : * shutdown so we can wait for backends to exit without accepting new
1689 : * connections, and during crash reinitialization when we need to start
1690 : * listening for new connections again. The WaitEventSet will be freed in fork
1691 : * children by ClosePostmasterPorts().
1692 : */
1693 : static void
1694 2588 : ConfigurePostmasterWaitSet(bool accept_connections)
1695 : {
1696 2588 : if (pm_wait_set)
1697 1308 : FreeWaitEventSet(pm_wait_set);
1698 2588 : pm_wait_set = NULL;
1699 :
1700 3876 : pm_wait_set = CreateWaitEventSet(NULL,
1701 1288 : accept_connections ? (1 + NumListenSockets) : 1);
1702 2588 : AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
1703 : NULL);
1704 :
1705 2588 : if (accept_connections)
1706 : {
1707 2626 : for (int i = 0; i < NumListenSockets; i++)
1708 1338 : AddWaitEventToSet(pm_wait_set, WL_SOCKET_ACCEPT, ListenSockets[i],
1709 : NULL, NULL);
1710 : }
1711 2588 : }
1712 :
1713 : /*
1714 : * Main idle loop of postmaster
1715 : */
1716 : static int
1717 1280 : ServerLoop(void)
1718 : {
1719 : time_t last_lockfile_recheck_time,
1720 : last_touch_time;
1721 : WaitEvent events[MAXLISTEN];
1722 : int nevents;
1723 :
1724 1280 : ConfigurePostmasterWaitSet(true);
1725 1280 : last_lockfile_recheck_time = last_touch_time = time(NULL);
1726 :
1727 : for (;;)
1728 53586 : {
1729 : time_t now;
1730 :
1731 54866 : nevents = WaitEventSetWait(pm_wait_set,
1732 54866 : DetermineSleepTime(),
1733 : events,
1734 : lengthof(events),
1735 : 0 /* postmaster posts no wait_events */ );
1736 :
1737 : /*
1738 : * Latch set by signal handler, or new connection pending on any of
1739 : * our sockets? If the latter, fork a child process to deal with it.
1740 : */
1741 108452 : for (int i = 0; i < nevents; i++)
1742 : {
1743 54866 : if (events[i].events & WL_LATCH_SET)
1744 34940 : ResetLatch(MyLatch);
1745 :
1746 : /*
1747 : * The following requests are handled unconditionally, even if we
1748 : * didn't see WL_LATCH_SET. This gives high priority to shutdown
1749 : * and reload requests where the latch happens to appear later in
1750 : * events[] or will be reported by a later call to
1751 : * WaitEventSetWait().
1752 : */
1753 54866 : if (pending_pm_shutdown_request)
1754 1268 : process_pm_shutdown_request();
1755 54866 : if (pending_pm_reload_request)
1756 208 : process_pm_reload_request();
1757 54866 : if (pending_pm_child_exit)
1758 28856 : process_pm_child_exit();
1759 53588 : if (pending_pm_pmsignal)
1760 4756 : process_pm_pmsignal();
1761 :
1762 53588 : if (events[i].events & WL_SOCKET_ACCEPT)
1763 : {
1764 : Port *port;
1765 :
1766 19926 : port = ConnCreate(events[i].fd);
1767 19926 : if (port)
1768 : {
1769 19926 : BackendStartup(port);
1770 :
1771 : /*
1772 : * We no longer need the open socket or port structure in
1773 : * this process
1774 : */
1775 19924 : StreamClose(port->sock);
1776 19924 : ConnFree(port);
1777 : }
1778 : }
1779 : }
1780 :
1781 : /* If we have lost the log collector, try to start a new one */
1782 53586 : if (SysLoggerPID == 0 && Logging_collector)
1783 0 : SysLoggerPID = SysLogger_Start();
1784 :
1785 : /*
1786 : * If no background writer process is running, and we are not in a
1787 : * state that prevents it, start one. It doesn't matter if this
1788 : * fails, we'll just try again later. Likewise for the checkpointer.
1789 : */
1790 53586 : if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1791 8942 : pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
1792 : {
1793 47634 : if (CheckpointerPID == 0)
1794 8 : CheckpointerPID = StartCheckpointer();
1795 47634 : if (BgWriterPID == 0)
1796 8 : BgWriterPID = StartBackgroundWriter();
1797 : }
1798 :
1799 : /*
1800 : * Likewise, if we have lost the walwriter process, try to start a new
1801 : * one. But this is needed only in normal operation (else we cannot
1802 : * be writing any new WAL).
1803 : */
1804 53586 : if (WalWriterPID == 0 && pmState == PM_RUN)
1805 0 : WalWriterPID = StartWalWriter();
1806 :
1807 : /*
1808 : * If we have lost the autovacuum launcher, try to start a new one. We
1809 : * don't want autovacuum to run in binary upgrade mode because
1810 : * autovacuum might update relfrozenxid for empty tables before the
1811 : * physical files are put in place.
1812 : */
1813 62738 : if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1814 12386 : (AutoVacuumingActive() || start_autovac_launcher) &&
1815 5918 : pmState == PM_RUN)
1816 : {
1817 0 : AutoVacPID = StartAutoVacLauncher();
1818 0 : if (AutoVacPID != 0)
1819 0 : start_autovac_launcher = false; /* signal processed */
1820 : }
1821 :
1822 : /* If we have lost the archiver, try to start a new one. */
1823 53586 : if (PgArchPID == 0 && PgArchStartupAllowed())
1824 0 : PgArchPID = StartArchiver();
1825 :
1826 : /* If we need to signal the autovacuum launcher, do so now */
1827 53586 : if (avlauncher_needs_signal)
1828 : {
1829 0 : avlauncher_needs_signal = false;
1830 0 : if (AutoVacPID != 0)
1831 0 : kill(AutoVacPID, SIGUSR2);
1832 : }
1833 :
1834 : /* If we need to start a WAL receiver, try to do that now */
1835 53586 : if (WalReceiverRequested)
1836 324 : MaybeStartWalReceiver();
1837 :
1838 : /* Get other worker processes running, if needed */
1839 53586 : if (StartWorkerNeeded || HaveCrashedWorker)
1840 5704 : maybe_start_bgworkers();
1841 :
1842 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1843 :
1844 : /*
1845 : * With assertions enabled, check regularly for appearance of
1846 : * additional threads. All builds check at start and exit.
1847 : */
1848 : Assert(pthread_is_threaded_np() == 0);
1849 : #endif
1850 :
1851 : /*
1852 : * Lastly, check to see if it's time to do some things that we don't
1853 : * want to do every single time through the loop, because they're a
1854 : * bit expensive. Note that there's up to a minute of slop in when
1855 : * these tasks will be performed, since DetermineSleepTime() will let
1856 : * us sleep at most that long; except for SIGKILL timeout which has
1857 : * special-case logic there.
1858 : */
1859 53586 : now = time(NULL);
1860 :
1861 : /*
1862 : * If we already sent SIGQUIT to children and they are slow to shut
1863 : * down, it's time to send them SIGKILL (or SIGABRT if requested).
1864 : * This doesn't happen normally, but under certain conditions backends
1865 : * can get stuck while shutting down. This is a last measure to get
1866 : * them unwedged.
1867 : *
1868 : * Note we also do this during recovery from a process crash.
1869 : */
1870 53586 : if ((Shutdown >= ImmediateShutdown || FatalError) &&
1871 2026 : AbortStartTime != 0 &&
1872 2018 : (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1873 : {
1874 : /* We were gentle with them before. Not anymore */
1875 0 : ereport(LOG,
1876 : /* translator: %s is SIGKILL or SIGABRT */
1877 : (errmsg("issuing %s to recalcitrant children",
1878 : send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1879 0 : TerminateChildren(send_abort_for_kill ? SIGABRT : SIGKILL);
1880 : /* reset flag so we don't SIGKILL again */
1881 0 : AbortStartTime = 0;
1882 : }
1883 :
1884 : /*
1885 : * Once a minute, verify that postmaster.pid hasn't been removed or
1886 : * overwritten. If it has, we force a shutdown. This avoids having
1887 : * postmasters and child processes hanging around after their database
1888 : * is gone, and maybe causing problems if a new database cluster is
1889 : * created in the same place. It also provides some protection
1890 : * against a DBA foolishly removing postmaster.pid and manually
1891 : * starting a new postmaster. Data corruption is likely to ensue from
1892 : * that anyway, but we can minimize the damage by aborting ASAP.
1893 : */
1894 53586 : if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1895 : {
1896 10 : if (!RecheckDataDirLockFile())
1897 : {
1898 0 : ereport(LOG,
1899 : (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1900 0 : kill(MyProcPid, SIGQUIT);
1901 : }
1902 10 : last_lockfile_recheck_time = now;
1903 : }
1904 :
1905 : /*
1906 : * Touch Unix socket and lock files every 58 minutes, to ensure that
1907 : * they are not removed by overzealous /tmp-cleaning tasks. We assume
1908 : * no one runs cleaners with cutoff times of less than an hour ...
1909 : */
1910 53586 : if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1911 : {
1912 0 : TouchSocketFiles();
1913 0 : TouchSocketLockFiles();
1914 0 : last_touch_time = now;
1915 : }
1916 : }
1917 : }
1918 :
1919 : /*
1920 : * Read a client's startup packet and do something according to it.
1921 : *
1922 : * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1923 : * not return at all.
1924 : *
1925 : * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1926 : * if that's what you want. Return STATUS_ERROR if you don't want to
1927 : * send anything to the client, which would typically be appropriate
1928 : * if we detect a communications failure.)
1929 : *
1930 : * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1931 : * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1932 : * encryption layer sets both flags, but a rejected negotiation sets only the
1933 : * flag for that layer, since the client may wish to try the other one. We
1934 : * should make no assumption here about the order in which the client may make
1935 : * requests.
1936 : */
1937 : static int
1938 20086 : ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1939 : {
1940 : int32 len;
1941 : char *buf;
1942 : ProtocolVersion proto;
1943 : MemoryContext oldcontext;
1944 :
1945 20086 : pq_startmsgread();
1946 :
1947 : /*
1948 : * Grab the first byte of the length word separately, so that we can tell
1949 : * whether we have no data at all or an incomplete packet. (This might
1950 : * sound inefficient, but it's not really, because of buffering in
1951 : * pqcomm.c.)
1952 : */
1953 20086 : if (pq_getbytes((char *) &len, 1) == EOF)
1954 : {
1955 : /*
1956 : * If we get no data at all, don't clutter the log with a complaint;
1957 : * such cases often occur for legitimate reasons. An example is that
1958 : * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1959 : * client didn't like our response, it'll probably just drop the
1960 : * connection. Service-monitoring software also often just opens and
1961 : * closes a connection without sending anything. (So do port
1962 : * scanners, which may be less benign, but it's not really our job to
1963 : * notice those.)
1964 : */
1965 26 : return STATUS_ERROR;
1966 : }
1967 :
1968 20060 : if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1969 : {
1970 : /* Got a partial length word, so bleat about that */
1971 0 : if (!ssl_done && !gss_done)
1972 0 : ereport(COMMERROR,
1973 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
1974 : errmsg("incomplete startup packet")));
1975 0 : return STATUS_ERROR;
1976 : }
1977 :
1978 20060 : len = pg_ntoh32(len);
1979 20060 : len -= 4;
1980 :
1981 20060 : if (len < (int32) sizeof(ProtocolVersion) ||
1982 20060 : len > MAX_STARTUP_PACKET_LENGTH)
1983 : {
1984 0 : ereport(COMMERROR,
1985 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
1986 : errmsg("invalid length of startup packet")));
1987 0 : return STATUS_ERROR;
1988 : }
1989 :
1990 : /*
1991 : * Allocate space to hold the startup packet, plus one extra byte that's
1992 : * initialized to be zero. This ensures we will have null termination of
1993 : * all strings inside the packet.
1994 : */
1995 20060 : buf = palloc(len + 1);
1996 20060 : buf[len] = '\0';
1997 :
1998 20060 : if (pq_getbytes(buf, len) == EOF)
1999 : {
2000 0 : ereport(COMMERROR,
2001 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2002 : errmsg("incomplete startup packet")));
2003 0 : return STATUS_ERROR;
2004 : }
2005 20060 : pq_endmsgread();
2006 :
2007 : /*
2008 : * The first field is either a protocol version number or a special
2009 : * request code.
2010 : */
2011 20060 : port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2012 :
2013 20060 : if (proto == CANCEL_REQUEST_CODE)
2014 : {
2015 6 : if (len != sizeof(CancelRequestPacket))
2016 : {
2017 0 : ereport(COMMERROR,
2018 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2019 : errmsg("invalid length of startup packet")));
2020 0 : return STATUS_ERROR;
2021 : }
2022 6 : processCancelRequest(port, buf);
2023 : /* Not really an error, but we don't want to proceed further */
2024 6 : return STATUS_ERROR;
2025 : }
2026 :
2027 20054 : if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2028 : {
2029 : char SSLok;
2030 :
2031 : #ifdef USE_SSL
2032 : /* No SSL when disabled or on Unix sockets */
2033 514 : if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2034 286 : SSLok = 'N';
2035 : else
2036 228 : SSLok = 'S'; /* Support for SSL */
2037 : #else
2038 : SSLok = 'N'; /* No support for SSL */
2039 : #endif
2040 :
2041 514 : retry1:
2042 514 : if (send(port->sock, &SSLok, 1, 0) != 1)
2043 : {
2044 0 : if (errno == EINTR)
2045 0 : goto retry1; /* if interrupted, just retry */
2046 0 : ereport(COMMERROR,
2047 : (errcode_for_socket_access(),
2048 : errmsg("failed to send SSL negotiation response: %m")));
2049 0 : return STATUS_ERROR; /* close the connection */
2050 : }
2051 :
2052 : #ifdef USE_SSL
2053 514 : if (SSLok == 'S' && secure_open_server(port) == -1)
2054 36 : return STATUS_ERROR;
2055 : #endif
2056 :
2057 : /*
2058 : * At this point we should have no data already buffered. If we do,
2059 : * it was received before we performed the SSL handshake, so it wasn't
2060 : * encrypted and indeed may have been injected by a man-in-the-middle.
2061 : * We report this case to the client.
2062 : */
2063 478 : if (pq_buffer_has_data())
2064 0 : ereport(FATAL,
2065 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2066 : errmsg("received unencrypted data after SSL request"),
2067 : errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2068 :
2069 : /*
2070 : * regular startup packet, cancel, etc packet should follow, but not
2071 : * another SSL negotiation request, and a GSS request should only
2072 : * follow if SSL was rejected (client may negotiate in either order)
2073 : */
2074 478 : return ProcessStartupPacket(port, true, SSLok == 'S');
2075 : }
2076 19540 : else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2077 : {
2078 0 : char GSSok = 'N';
2079 :
2080 : #ifdef ENABLE_GSS
2081 : /* No GSSAPI encryption when on Unix socket */
2082 : if (port->laddr.addr.ss_family != AF_UNIX)
2083 : GSSok = 'G';
2084 : #endif
2085 :
2086 0 : while (send(port->sock, &GSSok, 1, 0) != 1)
2087 : {
2088 0 : if (errno == EINTR)
2089 0 : continue;
2090 0 : ereport(COMMERROR,
2091 : (errcode_for_socket_access(),
2092 : errmsg("failed to send GSSAPI negotiation response: %m")));
2093 0 : return STATUS_ERROR; /* close the connection */
2094 : }
2095 :
2096 : #ifdef ENABLE_GSS
2097 : if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2098 : return STATUS_ERROR;
2099 : #endif
2100 :
2101 : /*
2102 : * At this point we should have no data already buffered. If we do,
2103 : * it was received before we performed the GSS handshake, so it wasn't
2104 : * encrypted and indeed may have been injected by a man-in-the-middle.
2105 : * We report this case to the client.
2106 : */
2107 0 : if (pq_buffer_has_data())
2108 0 : ereport(FATAL,
2109 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2110 : errmsg("received unencrypted data after GSSAPI encryption request"),
2111 : errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2112 :
2113 : /*
2114 : * regular startup packet, cancel, etc packet should follow, but not
2115 : * another GSS negotiation request, and an SSL request should only
2116 : * follow if GSS was rejected (client may negotiate in either order)
2117 : */
2118 0 : return ProcessStartupPacket(port, GSSok == 'G', true);
2119 : }
2120 :
2121 : /* Could add additional special packet types here */
2122 :
2123 : /*
2124 : * Set FrontendProtocol now so that ereport() knows what format to send if
2125 : * we fail during startup.
2126 : */
2127 19540 : FrontendProtocol = proto;
2128 :
2129 : /* Check that the major protocol version is in range. */
2130 19540 : if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
2131 19540 : PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST))
2132 0 : ereport(FATAL,
2133 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2134 : errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2135 : PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2136 : PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
2137 : PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
2138 : PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
2139 :
2140 : /*
2141 : * Now fetch parameters out of startup packet and save them into the Port
2142 : * structure. All data structures attached to the Port struct must be
2143 : * allocated in TopMemoryContext so that they will remain available in a
2144 : * running backend (even after PostmasterContext is destroyed). We need
2145 : * not worry about leaking this storage on failure, since we aren't in the
2146 : * postmaster process anymore.
2147 : */
2148 19540 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
2149 :
2150 : /* Handle protocol version 3 startup packet */
2151 : {
2152 19540 : int32 offset = sizeof(ProtocolVersion);
2153 19540 : List *unrecognized_protocol_options = NIL;
2154 :
2155 : /*
2156 : * Scan packet body for name/option pairs. We can assume any string
2157 : * beginning within the packet body is null-terminated, thanks to
2158 : * zeroing extra byte above.
2159 : */
2160 19540 : port->guc_options = NIL;
2161 :
2162 95358 : while (offset < len)
2163 : {
2164 95358 : char *nameptr = buf + offset;
2165 : int32 valoffset;
2166 : char *valptr;
2167 :
2168 95358 : if (*nameptr == '\0')
2169 19540 : break; /* found packet terminator */
2170 75818 : valoffset = offset + strlen(nameptr) + 1;
2171 75818 : if (valoffset >= len)
2172 0 : break; /* missing value, will complain below */
2173 75818 : valptr = buf + valoffset;
2174 :
2175 75818 : if (strcmp(nameptr, "database") == 0)
2176 19540 : port->database_name = pstrdup(valptr);
2177 56278 : else if (strcmp(nameptr, "user") == 0)
2178 19540 : port->user_name = pstrdup(valptr);
2179 36738 : else if (strcmp(nameptr, "options") == 0)
2180 5798 : port->cmdline_options = pstrdup(valptr);
2181 30940 : else if (strcmp(nameptr, "replication") == 0)
2182 : {
2183 : /*
2184 : * Due to backward compatibility concerns the replication
2185 : * parameter is a hybrid beast which allows the value to be
2186 : * either boolean or the string 'database'. The latter
2187 : * connects to a specific database which is e.g. required for
2188 : * logical decoding while.
2189 : */
2190 1732 : if (strcmp(valptr, "database") == 0)
2191 : {
2192 1030 : am_walsender = true;
2193 1030 : am_db_walsender = true;
2194 : }
2195 702 : else if (!parse_bool(valptr, &am_walsender))
2196 0 : ereport(FATAL,
2197 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2198 : errmsg("invalid value for parameter \"%s\": \"%s\"",
2199 : "replication",
2200 : valptr),
2201 : errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2202 : }
2203 29208 : else if (strncmp(nameptr, "_pq_.", 5) == 0)
2204 : {
2205 : /*
2206 : * Any option beginning with _pq_. is reserved for use as a
2207 : * protocol-level option, but at present no such options are
2208 : * defined.
2209 : */
2210 : unrecognized_protocol_options =
2211 0 : lappend(unrecognized_protocol_options, pstrdup(nameptr));
2212 : }
2213 : else
2214 : {
2215 : /* Assume it's a generic GUC option */
2216 29208 : port->guc_options = lappend(port->guc_options,
2217 29208 : pstrdup(nameptr));
2218 29208 : port->guc_options = lappend(port->guc_options,
2219 29208 : pstrdup(valptr));
2220 :
2221 : /*
2222 : * Copy application_name to port if we come across it. This
2223 : * is done so we can log the application_name in the
2224 : * connection authorization message. Note that the GUC would
2225 : * be used but we haven't gone through GUC setup yet.
2226 : */
2227 29208 : if (strcmp(nameptr, "application_name") == 0)
2228 : {
2229 19538 : port->application_name = pg_clean_ascii(valptr, 0);
2230 : }
2231 : }
2232 75818 : offset = valoffset + strlen(valptr) + 1;
2233 : }
2234 :
2235 : /*
2236 : * If we didn't find a packet terminator exactly at the end of the
2237 : * given packet length, complain.
2238 : */
2239 19540 : if (offset != len - 1)
2240 0 : ereport(FATAL,
2241 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2242 : errmsg("invalid startup packet layout: expected terminator as last byte")));
2243 :
2244 : /*
2245 : * If the client requested a newer protocol version or if the client
2246 : * requested any protocol options we didn't recognize, let them know
2247 : * the newest minor protocol version we do support and the names of
2248 : * any unrecognized options.
2249 : */
2250 19540 : if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) ||
2251 : unrecognized_protocol_options != NIL)
2252 0 : SendNegotiateProtocolVersion(unrecognized_protocol_options);
2253 : }
2254 :
2255 : /* Check a user name was given. */
2256 19540 : if (port->user_name == NULL || port->user_name[0] == '\0')
2257 0 : ereport(FATAL,
2258 : (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2259 : errmsg("no PostgreSQL user name specified in startup packet")));
2260 :
2261 : /* The database defaults to the user name. */
2262 19540 : if (port->database_name == NULL || port->database_name[0] == '\0')
2263 0 : port->database_name = pstrdup(port->user_name);
2264 :
2265 19540 : if (am_walsender)
2266 1732 : MyBackendType = B_WAL_SENDER;
2267 : else
2268 17808 : MyBackendType = B_BACKEND;
2269 :
2270 : /*
2271 : * Normal walsender backends, e.g. for streaming replication, are not
2272 : * connected to a particular database. But walsenders used for logical
2273 : * replication need to connect to a specific database. We allow streaming
2274 : * replication commands to be issued even if connected to a database as it
2275 : * can make sense to first make a basebackup and then stream changes
2276 : * starting from that.
2277 : */
2278 19540 : if (am_walsender && !am_db_walsender)
2279 702 : port->database_name[0] = '\0';
2280 :
2281 : /*
2282 : * Done putting stuff in TopMemoryContext.
2283 : */
2284 19540 : MemoryContextSwitchTo(oldcontext);
2285 :
2286 19540 : return STATUS_OK;
2287 : }
2288 :
2289 : /*
2290 : * Send a NegotiateProtocolVersion to the client. This lets the client know
2291 : * that they have requested a newer minor protocol version than we are able
2292 : * to speak. We'll speak the highest version we know about; the client can,
2293 : * of course, abandon the connection if that's a problem.
2294 : *
2295 : * We also include in the response a list of protocol options we didn't
2296 : * understand. This allows clients to include optional parameters that might
2297 : * be present either in newer protocol versions or third-party protocol
2298 : * extensions without fear of having to reconnect if those options are not
2299 : * understood, while at the same time making certain that the client is aware
2300 : * of which options were actually accepted.
2301 : */
2302 : static void
2303 0 : SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2304 : {
2305 : StringInfoData buf;
2306 : ListCell *lc;
2307 :
2308 0 : pq_beginmessage(&buf, PqMsg_NegotiateProtocolVersion);
2309 0 : pq_sendint32(&buf, PG_PROTOCOL_LATEST);
2310 0 : pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2311 0 : foreach(lc, unrecognized_protocol_options)
2312 0 : pq_sendstring(&buf, lfirst(lc));
2313 0 : pq_endmessage(&buf);
2314 :
2315 : /* no need to flush, some other message will follow */
2316 0 : }
2317 :
2318 : /*
2319 : * The client has sent a cancel request packet, not a normal
2320 : * start-a-new-connection packet. Perform the necessary processing.
2321 : * Nothing is sent back to the client.
2322 : */
2323 : static void
2324 6 : processCancelRequest(Port *port, void *pkt)
2325 : {
2326 6 : CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2327 : int backendPID;
2328 : int32 cancelAuthCode;
2329 : Backend *bp;
2330 :
2331 : #ifndef EXEC_BACKEND
2332 : dlist_iter iter;
2333 : #else
2334 : int i;
2335 : #endif
2336 :
2337 6 : backendPID = (int) pg_ntoh32(canc->backendPID);
2338 6 : cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2339 :
2340 : /*
2341 : * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2342 : * longer access the postmaster's own backend list, and must rely on the
2343 : * duplicate array in shared memory.
2344 : */
2345 : #ifndef EXEC_BACKEND
2346 8 : dlist_foreach(iter, &BackendList)
2347 : {
2348 8 : bp = dlist_container(Backend, elem, iter.cur);
2349 : #else
2350 : for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2351 : {
2352 : bp = (Backend *) &ShmemBackendArray[i];
2353 : #endif
2354 8 : if (bp->pid == backendPID)
2355 : {
2356 6 : if (bp->cancel_key == cancelAuthCode)
2357 : {
2358 : /* Found a match; signal that backend to cancel current op */
2359 6 : ereport(DEBUG2,
2360 : (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2361 : backendPID)));
2362 6 : signal_child(bp->pid, SIGINT);
2363 : }
2364 : else
2365 : /* Right PID, wrong key: no way, Jose */
2366 0 : ereport(LOG,
2367 : (errmsg("wrong key in cancel request for process %d",
2368 : backendPID)));
2369 6 : return;
2370 : }
2371 : #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2372 : }
2373 : #else
2374 : }
2375 : #endif
2376 :
2377 : /* No matching backend */
2378 0 : ereport(LOG,
2379 : (errmsg("PID %d in cancel request did not match any process",
2380 : backendPID)));
2381 : }
2382 :
2383 : /*
2384 : * canAcceptConnections --- check to see if database state allows connections
2385 : * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2386 : * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2387 : * know whether a NORMAL connection might turn into a walsender.)
2388 : */
2389 : static CAC_state
2390 24384 : canAcceptConnections(int backend_type)
2391 : {
2392 24384 : CAC_state result = CAC_OK;
2393 :
2394 : /*
2395 : * Can't start backends when in startup/shutdown/inconsistent recovery
2396 : * state. We treat autovac workers the same as user backends for this
2397 : * purpose. However, bgworkers are excluded from this test; we expect
2398 : * bgworker_should_start_now() decided whether the DB state allows them.
2399 : */
2400 24384 : if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2401 : backend_type != BACKEND_TYPE_BGWORKER)
2402 : {
2403 194 : if (Shutdown > NoShutdown)
2404 2 : return CAC_SHUTDOWN; /* shutdown is pending */
2405 192 : else if (!FatalError && pmState == PM_STARTUP)
2406 184 : return CAC_STARTUP; /* normal startup */
2407 8 : else if (!FatalError && pmState == PM_RECOVERY)
2408 8 : return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2409 : * state */
2410 : else
2411 0 : return CAC_RECOVERY; /* else must be crash recovery */
2412 : }
2413 :
2414 : /*
2415 : * "Smart shutdown" restrictions are applied only to normal connections,
2416 : * not to autovac workers or bgworkers.
2417 : */
2418 24190 : if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2419 0 : return CAC_SHUTDOWN; /* shutdown is pending */
2420 :
2421 : /*
2422 : * Don't start too many children.
2423 : *
2424 : * We allow more connections here than we can have backends because some
2425 : * might still be authenticating; they might fail auth, or some existing
2426 : * backend might exit before the auth cycle is completed. The exact
2427 : * MaxBackends limit is enforced when a new backend tries to join the
2428 : * shared-inval backend array.
2429 : *
2430 : * The limit here must match the sizes of the per-child-process arrays;
2431 : * see comments for MaxLivePostmasterChildren().
2432 : */
2433 24190 : if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
2434 0 : result = CAC_TOOMANY;
2435 :
2436 24190 : return result;
2437 : }
2438 :
2439 :
2440 : /*
2441 : * ConnCreate -- create a local connection data structure
2442 : *
2443 : * Returns NULL on failure, other than out-of-memory which is fatal.
2444 : */
2445 : static Port *
2446 19926 : ConnCreate(int serverFd)
2447 : {
2448 : Port *port;
2449 :
2450 19926 : if (!(port = (Port *) calloc(1, sizeof(Port))))
2451 : {
2452 0 : ereport(LOG,
2453 : (errcode(ERRCODE_OUT_OF_MEMORY),
2454 : errmsg("out of memory")));
2455 0 : ExitPostmaster(1);
2456 : }
2457 :
2458 19926 : if (StreamConnection(serverFd, port) != STATUS_OK)
2459 : {
2460 0 : if (port->sock != PGINVALID_SOCKET)
2461 0 : StreamClose(port->sock);
2462 0 : ConnFree(port);
2463 0 : return NULL;
2464 : }
2465 :
2466 19926 : return port;
2467 : }
2468 :
2469 :
2470 : /*
2471 : * ConnFree -- free a local connection data structure
2472 : *
2473 : * Caller has already closed the socket if any, so there's not much
2474 : * to do here.
2475 : */
2476 : static void
2477 19924 : ConnFree(Port *port)
2478 : {
2479 19924 : free(port);
2480 19924 : }
2481 :
2482 :
2483 : /*
2484 : * ClosePostmasterPorts -- close all the postmaster's open sockets
2485 : *
2486 : * This is called during child process startup to release file descriptors
2487 : * that are not needed by that child process. The postmaster still has
2488 : * them open, of course.
2489 : *
2490 : * Note: we pass am_syslogger as a boolean because we don't want to set
2491 : * the global variable yet when this is called.
2492 : */
2493 : void
2494 27910 : ClosePostmasterPorts(bool am_syslogger)
2495 : {
2496 : /* Release resources held by the postmaster's WaitEventSet. */
2497 27910 : if (pm_wait_set)
2498 : {
2499 25242 : FreeWaitEventSetAfterFork(pm_wait_set);
2500 25242 : pm_wait_set = NULL;
2501 : }
2502 :
2503 : #ifndef WIN32
2504 :
2505 : /*
2506 : * Close the write end of postmaster death watch pipe. It's important to
2507 : * do this as early as possible, so that if postmaster dies, others won't
2508 : * think that it's still running because we're holding the pipe open.
2509 : */
2510 27910 : if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0)
2511 0 : ereport(FATAL,
2512 : (errcode_for_file_access(),
2513 : errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2514 27910 : postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
2515 : /* Notify fd.c that we released one pipe FD. */
2516 27910 : ReleaseExternalFD();
2517 : #endif
2518 :
2519 : /*
2520 : * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2521 : * so we don't call ReleaseExternalFD() here.
2522 : *
2523 : * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
2524 : * EXEC_BACKEND mode.
2525 : */
2526 : #ifndef EXEC_BACKEND
2527 27910 : if (ListenSockets)
2528 : {
2529 56602 : for (int i = 0; i < NumListenSockets; i++)
2530 28694 : StreamClose(ListenSockets[i]);
2531 27908 : pfree(ListenSockets);
2532 : }
2533 27910 : NumListenSockets = 0;
2534 27910 : ListenSockets = NULL;
2535 : #endif
2536 :
2537 : /*
2538 : * If using syslogger, close the read side of the pipe. We don't bother
2539 : * tracking this in fd.c, either.
2540 : */
2541 27910 : if (!am_syslogger)
2542 : {
2543 : #ifndef WIN32
2544 27908 : if (syslogPipe[0] >= 0)
2545 28 : close(syslogPipe[0]);
2546 27908 : syslogPipe[0] = -1;
2547 : #else
2548 : if (syslogPipe[0])
2549 : CloseHandle(syslogPipe[0]);
2550 : syslogPipe[0] = 0;
2551 : #endif
2552 : }
2553 :
2554 : #ifdef USE_BONJOUR
2555 : /* If using Bonjour, close the connection to the mDNS daemon */
2556 : if (bonjour_sdref)
2557 : close(DNSServiceRefSockFD(bonjour_sdref));
2558 : #endif
2559 27910 : }
2560 :
2561 :
2562 : /*
2563 : * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2564 : *
2565 : * Called early in the postmaster and every backend.
2566 : */
2567 : void
2568 29534 : InitProcessGlobals(void)
2569 : {
2570 29534 : MyProcPid = getpid();
2571 29534 : MyStartTimestamp = GetCurrentTimestamp();
2572 29534 : MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2573 :
2574 : /*
2575 : * Set a different global seed in every process. We want something
2576 : * unpredictable, so if possible, use high-quality random bits for the
2577 : * seed. Otherwise, fall back to a seed based on timestamp and PID.
2578 : */
2579 29534 : if (unlikely(!pg_prng_strong_seed(&pg_global_prng_state)))
2580 : {
2581 : uint64 rseed;
2582 :
2583 : /*
2584 : * Since PIDs and timestamps tend to change more frequently in their
2585 : * least significant bits, shift the timestamp left to allow a larger
2586 : * total number of seeds in a given time period. Since that would
2587 : * leave only 20 bits of the timestamp that cycle every ~1 second,
2588 : * also mix in some higher bits.
2589 : */
2590 0 : rseed = ((uint64) MyProcPid) ^
2591 0 : ((uint64) MyStartTimestamp << 12) ^
2592 0 : ((uint64) MyStartTimestamp >> 20);
2593 :
2594 0 : pg_prng_seed(&pg_global_prng_state, rseed);
2595 : }
2596 :
2597 : /*
2598 : * Also make sure that we've set a good seed for random(3). Use of that
2599 : * is deprecated in core Postgres, but extensions might use it.
2600 : */
2601 : #ifndef WIN32
2602 29534 : srandom(pg_prng_uint32(&pg_global_prng_state));
2603 : #endif
2604 29534 : }
2605 :
2606 : /*
2607 : * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2608 : * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2609 : */
2610 : static void
2611 4930 : handle_pm_pmsignal_signal(SIGNAL_ARGS)
2612 : {
2613 4930 : int save_errno = errno;
2614 :
2615 4930 : pending_pm_pmsignal = true;
2616 4930 : SetLatch(MyLatch);
2617 :
2618 4930 : errno = save_errno;
2619 4930 : }
2620 :
2621 : /*
2622 : * pg_ctl uses SIGHUP to request a reload of the configuration files.
2623 : */
2624 : static void
2625 208 : handle_pm_reload_request_signal(SIGNAL_ARGS)
2626 : {
2627 208 : int save_errno = errno;
2628 :
2629 208 : pending_pm_reload_request = true;
2630 208 : SetLatch(MyLatch);
2631 :
2632 208 : errno = save_errno;
2633 208 : }
2634 :
2635 : /*
2636 : * Re-read config files, and tell children to do same.
2637 : */
2638 : static void
2639 208 : process_pm_reload_request(void)
2640 : {
2641 208 : pending_pm_reload_request = false;
2642 :
2643 208 : ereport(DEBUG2,
2644 : (errmsg_internal("postmaster received reload request signal")));
2645 :
2646 208 : if (Shutdown <= SmartShutdown)
2647 : {
2648 208 : ereport(LOG,
2649 : (errmsg("received SIGHUP, reloading configuration files")));
2650 208 : ProcessConfigFile(PGC_SIGHUP);
2651 208 : SignalChildren(SIGHUP);
2652 208 : if (StartupPID != 0)
2653 36 : signal_child(StartupPID, SIGHUP);
2654 208 : if (BgWriterPID != 0)
2655 208 : signal_child(BgWriterPID, SIGHUP);
2656 208 : if (CheckpointerPID != 0)
2657 208 : signal_child(CheckpointerPID, SIGHUP);
2658 208 : if (WalWriterPID != 0)
2659 172 : signal_child(WalWriterPID, SIGHUP);
2660 208 : if (WalReceiverPID != 0)
2661 32 : signal_child(WalReceiverPID, SIGHUP);
2662 208 : if (AutoVacPID != 0)
2663 168 : signal_child(AutoVacPID, SIGHUP);
2664 208 : if (PgArchPID != 0)
2665 8 : signal_child(PgArchPID, SIGHUP);
2666 208 : if (SysLoggerPID != 0)
2667 0 : signal_child(SysLoggerPID, SIGHUP);
2668 :
2669 : /* Reload authentication config files too */
2670 208 : if (!load_hba())
2671 0 : ereport(LOG,
2672 : /* translator: %s is a configuration file */
2673 : (errmsg("%s was not reloaded", HbaFileName)));
2674 :
2675 208 : if (!load_ident())
2676 0 : ereport(LOG,
2677 : (errmsg("%s was not reloaded", IdentFileName)));
2678 :
2679 : #ifdef USE_SSL
2680 : /* Reload SSL configuration as well */
2681 208 : if (EnableSSL)
2682 : {
2683 0 : if (secure_initialize(false) == 0)
2684 0 : LoadedSSL = true;
2685 : else
2686 0 : ereport(LOG,
2687 : (errmsg("SSL configuration was not reloaded")));
2688 : }
2689 : else
2690 : {
2691 208 : secure_destroy();
2692 208 : LoadedSSL = false;
2693 : }
2694 : #endif
2695 :
2696 : #ifdef EXEC_BACKEND
2697 : /* Update the starting-point file for future children */
2698 : write_nondefault_variables(PGC_SIGHUP);
2699 : #endif
2700 : }
2701 208 : }
2702 :
2703 : /*
2704 : * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2705 : * shutdown.
2706 : */
2707 : static void
2708 1268 : handle_pm_shutdown_request_signal(SIGNAL_ARGS)
2709 : {
2710 1268 : int save_errno = errno;
2711 :
2712 1268 : switch (postgres_signal_arg)
2713 : {
2714 24 : case SIGTERM:
2715 : /* smart is implied if the other two flags aren't set */
2716 24 : pending_pm_shutdown_request = true;
2717 24 : break;
2718 710 : case SIGINT:
2719 710 : pending_pm_fast_shutdown_request = true;
2720 710 : pending_pm_shutdown_request = true;
2721 710 : break;
2722 534 : case SIGQUIT:
2723 534 : pending_pm_immediate_shutdown_request = true;
2724 534 : pending_pm_shutdown_request = true;
2725 534 : break;
2726 : }
2727 1268 : SetLatch(MyLatch);
2728 :
2729 1268 : errno = save_errno;
2730 1268 : }
2731 :
2732 : /*
2733 : * Process shutdown request.
2734 : */
2735 : static void
2736 1268 : process_pm_shutdown_request(void)
2737 : {
2738 : int mode;
2739 :
2740 1268 : ereport(DEBUG2,
2741 : (errmsg_internal("postmaster received shutdown request signal")));
2742 :
2743 1268 : pending_pm_shutdown_request = false;
2744 :
2745 : /*
2746 : * If more than one shutdown request signal arrived since the last server
2747 : * loop, take the one that is the most immediate. That matches the
2748 : * priority that would apply if we processed them one by one in any order.
2749 : */
2750 1268 : if (pending_pm_immediate_shutdown_request)
2751 : {
2752 534 : pending_pm_immediate_shutdown_request = false;
2753 534 : pending_pm_fast_shutdown_request = false;
2754 534 : mode = ImmediateShutdown;
2755 : }
2756 734 : else if (pending_pm_fast_shutdown_request)
2757 : {
2758 710 : pending_pm_fast_shutdown_request = false;
2759 710 : mode = FastShutdown;
2760 : }
2761 : else
2762 24 : mode = SmartShutdown;
2763 :
2764 1268 : switch (mode)
2765 : {
2766 24 : case SmartShutdown:
2767 :
2768 : /*
2769 : * Smart Shutdown:
2770 : *
2771 : * Wait for children to end their work, then shut down.
2772 : */
2773 24 : if (Shutdown >= SmartShutdown)
2774 0 : break;
2775 24 : Shutdown = SmartShutdown;
2776 24 : ereport(LOG,
2777 : (errmsg("received smart shutdown request")));
2778 :
2779 : /* Report status */
2780 24 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2781 : #ifdef USE_SYSTEMD
2782 : sd_notify(0, "STOPPING=1");
2783 : #endif
2784 :
2785 : /*
2786 : * If we reached normal running, we go straight to waiting for
2787 : * client backends to exit. If already in PM_STOP_BACKENDS or a
2788 : * later state, do not change it.
2789 : */
2790 24 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2791 24 : connsAllowed = false;
2792 0 : else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2793 : {
2794 : /* There should be no clients, so proceed to stop children */
2795 0 : pmState = PM_STOP_BACKENDS;
2796 : }
2797 :
2798 : /*
2799 : * Now wait for online backup mode to end and backends to exit. If
2800 : * that is already the case, PostmasterStateMachine will take the
2801 : * next step.
2802 : */
2803 24 : PostmasterStateMachine();
2804 24 : break;
2805 :
2806 710 : case FastShutdown:
2807 :
2808 : /*
2809 : * Fast Shutdown:
2810 : *
2811 : * Abort all children with SIGTERM (rollback active transactions
2812 : * and exit) and shut down when they are gone.
2813 : */
2814 710 : if (Shutdown >= FastShutdown)
2815 0 : break;
2816 710 : Shutdown = FastShutdown;
2817 710 : ereport(LOG,
2818 : (errmsg("received fast shutdown request")));
2819 :
2820 : /* Report status */
2821 710 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2822 : #ifdef USE_SYSTEMD
2823 : sd_notify(0, "STOPPING=1");
2824 : #endif
2825 :
2826 710 : if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2827 : {
2828 : /* Just shut down background processes silently */
2829 0 : pmState = PM_STOP_BACKENDS;
2830 : }
2831 710 : else if (pmState == PM_RUN ||
2832 62 : pmState == PM_HOT_STANDBY)
2833 : {
2834 : /* Report that we're about to zap live client sessions */
2835 710 : ereport(LOG,
2836 : (errmsg("aborting any active transactions")));
2837 710 : pmState = PM_STOP_BACKENDS;
2838 : }
2839 :
2840 : /*
2841 : * PostmasterStateMachine will issue any necessary signals, or
2842 : * take the next step if no child processes need to be killed.
2843 : */
2844 710 : PostmasterStateMachine();
2845 710 : break;
2846 :
2847 534 : case ImmediateShutdown:
2848 :
2849 : /*
2850 : * Immediate Shutdown:
2851 : *
2852 : * abort all children with SIGQUIT, wait for them to exit,
2853 : * terminate remaining ones with SIGKILL, then exit without
2854 : * attempt to properly shut down the data base system.
2855 : */
2856 534 : if (Shutdown >= ImmediateShutdown)
2857 0 : break;
2858 534 : Shutdown = ImmediateShutdown;
2859 534 : ereport(LOG,
2860 : (errmsg("received immediate shutdown request")));
2861 :
2862 : /* Report status */
2863 534 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2864 : #ifdef USE_SYSTEMD
2865 : sd_notify(0, "STOPPING=1");
2866 : #endif
2867 :
2868 : /* tell children to shut down ASAP */
2869 : /* (note we don't apply send_abort_for_crash here) */
2870 534 : SetQuitSignalReason(PMQUIT_FOR_STOP);
2871 534 : TerminateChildren(SIGQUIT);
2872 534 : pmState = PM_WAIT_BACKENDS;
2873 :
2874 : /* set stopwatch for them to die */
2875 534 : AbortStartTime = time(NULL);
2876 :
2877 : /*
2878 : * Now wait for backends to exit. If there are none,
2879 : * PostmasterStateMachine will take the next step.
2880 : */
2881 534 : PostmasterStateMachine();
2882 534 : break;
2883 : }
2884 1268 : }
2885 :
2886 : static void
2887 29046 : handle_pm_child_exit_signal(SIGNAL_ARGS)
2888 : {
2889 29046 : int save_errno = errno;
2890 :
2891 29046 : pending_pm_child_exit = true;
2892 29046 : SetLatch(MyLatch);
2893 :
2894 29046 : errno = save_errno;
2895 29046 : }
2896 :
2897 : /*
2898 : * Cleanup after a child process dies.
2899 : */
2900 : static void
2901 28856 : process_pm_child_exit(void)
2902 : {
2903 : int pid; /* process id of dead child process */
2904 : int exitstatus; /* its exit status */
2905 :
2906 28856 : pending_pm_child_exit = false;
2907 :
2908 28856 : ereport(DEBUG4,
2909 : (errmsg_internal("reaping dead processes")));
2910 :
2911 59636 : while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2912 : {
2913 : /*
2914 : * Check if this child was a startup process.
2915 : */
2916 30784 : if (pid == StartupPID)
2917 : {
2918 1288 : StartupPID = 0;
2919 :
2920 : /*
2921 : * Startup process exited in response to a shutdown request (or it
2922 : * completed normally regardless of the shutdown request).
2923 : */
2924 1288 : if (Shutdown > NoShutdown &&
2925 148 : (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2926 : {
2927 62 : StartupStatus = STARTUP_NOT_RUNNING;
2928 62 : pmState = PM_WAIT_BACKENDS;
2929 : /* PostmasterStateMachine logic does the rest */
2930 62 : continue;
2931 : }
2932 :
2933 1226 : if (EXIT_STATUS_3(exitstatus))
2934 : {
2935 0 : ereport(LOG,
2936 : (errmsg("shutdown at recovery target")));
2937 0 : StartupStatus = STARTUP_NOT_RUNNING;
2938 0 : Shutdown = Max(Shutdown, SmartShutdown);
2939 0 : TerminateChildren(SIGTERM);
2940 0 : pmState = PM_WAIT_BACKENDS;
2941 : /* PostmasterStateMachine logic does the rest */
2942 0 : continue;
2943 : }
2944 :
2945 : /*
2946 : * Unexpected exit of startup process (including FATAL exit)
2947 : * during PM_STARTUP is treated as catastrophic. There are no
2948 : * other processes running yet, so we can just exit.
2949 : */
2950 1226 : if (pmState == PM_STARTUP &&
2951 878 : StartupStatus != STARTUP_SIGNALED &&
2952 878 : !EXIT_STATUS_0(exitstatus))
2953 : {
2954 0 : LogChildExit(LOG, _("startup process"),
2955 : pid, exitstatus);
2956 0 : ereport(LOG,
2957 : (errmsg("aborting startup due to startup process failure")));
2958 0 : ExitPostmaster(1);
2959 : }
2960 :
2961 : /*
2962 : * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2963 : * the startup process is catastrophic, so kill other children,
2964 : * and set StartupStatus so we don't try to reinitialize after
2965 : * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2966 : * then we previously sent the startup process a SIGQUIT; so
2967 : * that's probably the reason it died, and we do want to try to
2968 : * restart in that case.
2969 : *
2970 : * This stanza also handles the case where we sent a SIGQUIT
2971 : * during PM_STARTUP due to some dead_end child crashing: in that
2972 : * situation, if the startup process dies on the SIGQUIT, we need
2973 : * to transition to PM_WAIT_BACKENDS state which will allow
2974 : * PostmasterStateMachine to restart the startup process. (On the
2975 : * other hand, the startup process might complete normally, if we
2976 : * were too late with the SIGQUIT. In that case we'll fall
2977 : * through and commence normal operations.)
2978 : */
2979 1226 : if (!EXIT_STATUS_0(exitstatus))
2980 : {
2981 92 : if (StartupStatus == STARTUP_SIGNALED)
2982 : {
2983 86 : StartupStatus = STARTUP_NOT_RUNNING;
2984 86 : if (pmState == PM_STARTUP)
2985 0 : pmState = PM_WAIT_BACKENDS;
2986 : }
2987 : else
2988 6 : StartupStatus = STARTUP_CRASHED;
2989 92 : HandleChildCrash(pid, exitstatus,
2990 92 : _("startup process"));
2991 92 : continue;
2992 : }
2993 :
2994 : /*
2995 : * Startup succeeded, commence normal operations
2996 : */
2997 1134 : StartupStatus = STARTUP_NOT_RUNNING;
2998 1134 : FatalError = false;
2999 1134 : AbortStartTime = 0;
3000 1134 : ReachedNormalRunning = true;
3001 1134 : pmState = PM_RUN;
3002 1134 : connsAllowed = true;
3003 :
3004 : /*
3005 : * Crank up the background tasks, if we didn't do that already
3006 : * when we entered consistent recovery state. It doesn't matter
3007 : * if this fails, we'll just try again later.
3008 : */
3009 1134 : if (CheckpointerPID == 0)
3010 0 : CheckpointerPID = StartCheckpointer();
3011 1134 : if (BgWriterPID == 0)
3012 0 : BgWriterPID = StartBackgroundWriter();
3013 1134 : if (WalWriterPID == 0)
3014 1134 : WalWriterPID = StartWalWriter();
3015 :
3016 : /*
3017 : * Likewise, start other special children as needed. In a restart
3018 : * situation, some of them may be alive already.
3019 : */
3020 1134 : if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
3021 1004 : AutoVacPID = StartAutoVacLauncher();
3022 1134 : if (PgArchStartupAllowed() && PgArchPID == 0)
3023 60 : PgArchPID = StartArchiver();
3024 :
3025 : /* workers may be scheduled to start now */
3026 1134 : maybe_start_bgworkers();
3027 :
3028 : /* at this point we are really open for business */
3029 1130 : ereport(LOG,
3030 : (errmsg("database system is ready to accept connections")));
3031 :
3032 : /* Report status */
3033 1130 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
3034 : #ifdef USE_SYSTEMD
3035 : sd_notify(0, "READY=1");
3036 : #endif
3037 :
3038 1130 : continue;
3039 : }
3040 :
3041 : /*
3042 : * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3043 : * one at the next iteration of the postmaster's main loop, if
3044 : * necessary. Any other exit condition is treated as a crash.
3045 : */
3046 29496 : if (pid == BgWriterPID)
3047 : {
3048 1282 : BgWriterPID = 0;
3049 1282 : if (!EXIT_STATUS_0(exitstatus))
3050 548 : HandleChildCrash(pid, exitstatus,
3051 548 : _("background writer process"));
3052 1282 : continue;
3053 : }
3054 :
3055 : /*
3056 : * Was it the checkpointer?
3057 : */
3058 28214 : if (pid == CheckpointerPID)
3059 : {
3060 1282 : CheckpointerPID = 0;
3061 1282 : if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3062 : {
3063 : /*
3064 : * OK, we saw normal exit of the checkpointer after it's been
3065 : * told to shut down. We expect that it wrote a shutdown
3066 : * checkpoint. (If for some reason it didn't, recovery will
3067 : * occur on next postmaster start.)
3068 : *
3069 : * At this point we should have no normal backend children
3070 : * left (else we'd not be in PM_SHUTDOWN state) but we might
3071 : * have dead_end children to wait for.
3072 : *
3073 : * If we have an archiver subprocess, tell it to do a last
3074 : * archive cycle and quit. Likewise, if we have walsender
3075 : * processes, tell them to send any remaining WAL and quit.
3076 : */
3077 : Assert(Shutdown > NoShutdown);
3078 :
3079 : /* Waken archiver for the last time */
3080 734 : if (PgArchPID != 0)
3081 24 : signal_child(PgArchPID, SIGUSR2);
3082 :
3083 : /*
3084 : * Waken walsenders for the last time. No regular backends
3085 : * should be around anymore.
3086 : */
3087 734 : SignalChildren(SIGUSR2);
3088 :
3089 734 : pmState = PM_SHUTDOWN_2;
3090 : }
3091 : else
3092 : {
3093 : /*
3094 : * Any unexpected exit of the checkpointer (including FATAL
3095 : * exit) is treated as a crash.
3096 : */
3097 548 : HandleChildCrash(pid, exitstatus,
3098 548 : _("checkpointer process"));
3099 : }
3100 :
3101 1282 : continue;
3102 : }
3103 :
3104 : /*
3105 : * Was it the wal writer? Normal exit can be ignored; we'll start a
3106 : * new one at the next iteration of the postmaster's main loop, if
3107 : * necessary. Any other exit condition is treated as a crash.
3108 : */
3109 26932 : if (pid == WalWriterPID)
3110 : {
3111 1128 : WalWriterPID = 0;
3112 1128 : if (!EXIT_STATUS_0(exitstatus))
3113 456 : HandleChildCrash(pid, exitstatus,
3114 456 : _("WAL writer process"));
3115 1128 : continue;
3116 : }
3117 :
3118 : /*
3119 : * Was it the wal receiver? If exit status is zero (normal) or one
3120 : * (FATAL exit), we assume everything is all right just like normal
3121 : * backends. (If we need a new wal receiver, we'll start one at the
3122 : * next iteration of the postmaster's main loop.)
3123 : */
3124 25804 : if (pid == WalReceiverPID)
3125 : {
3126 368 : WalReceiverPID = 0;
3127 368 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3128 42 : HandleChildCrash(pid, exitstatus,
3129 42 : _("WAL receiver process"));
3130 368 : continue;
3131 : }
3132 :
3133 : /*
3134 : * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3135 : * start a new one at the next iteration of the postmaster's main
3136 : * loop, if necessary. Any other exit condition is treated as a
3137 : * crash.
3138 : */
3139 25436 : if (pid == AutoVacPID)
3140 : {
3141 998 : AutoVacPID = 0;
3142 998 : if (!EXIT_STATUS_0(exitstatus))
3143 390 : HandleChildCrash(pid, exitstatus,
3144 390 : _("autovacuum launcher process"));
3145 998 : continue;
3146 : }
3147 :
3148 : /*
3149 : * Was it the archiver? If exit status is zero (normal) or one (FATAL
3150 : * exit), we assume everything is all right just like normal backends
3151 : * and just try to restart a new one so that we immediately retry
3152 : * archiving remaining files. (If fail, we'll try again in future
3153 : * cycles of the postmaster's main loop.) Unless we were waiting for
3154 : * it to shut down; don't restart it in that case, and
3155 : * PostmasterStateMachine() will advance to the next shutdown step.
3156 : */
3157 24438 : if (pid == PgArchPID)
3158 : {
3159 66 : PgArchPID = 0;
3160 66 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3161 42 : HandleChildCrash(pid, exitstatus,
3162 42 : _("archiver process"));
3163 66 : if (PgArchStartupAllowed())
3164 0 : PgArchPID = StartArchiver();
3165 66 : continue;
3166 : }
3167 :
3168 : /* Was it the system logger? If so, try to start a new one */
3169 24372 : if (pid == SysLoggerPID)
3170 : {
3171 0 : SysLoggerPID = 0;
3172 : /* for safety's sake, launch new logger *first* */
3173 0 : SysLoggerPID = SysLogger_Start();
3174 0 : if (!EXIT_STATUS_0(exitstatus))
3175 0 : LogChildExit(LOG, _("system logger process"),
3176 : pid, exitstatus);
3177 0 : continue;
3178 : }
3179 :
3180 : /* Was it one of our background workers? */
3181 24372 : if (CleanupBackgroundWorker(pid, exitstatus))
3182 : {
3183 : /* have it be restarted */
3184 4430 : HaveCrashedWorker = true;
3185 4430 : continue;
3186 : }
3187 :
3188 : /*
3189 : * Else do standard backend child cleanup.
3190 : */
3191 19942 : CleanupBackend(pid, exitstatus);
3192 : } /* loop over pending child-death reports */
3193 :
3194 : /*
3195 : * After cleaning out the SIGCHLD queue, see if we have any state changes
3196 : * or actions to make.
3197 : */
3198 28852 : PostmasterStateMachine();
3199 27578 : }
3200 :
3201 : /*
3202 : * Scan the bgworkers list and see if the given PID (which has just stopped
3203 : * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3204 : * bgworker, return false.
3205 : *
3206 : * This is heavily based on CleanupBackend. One important difference is that
3207 : * we don't know yet that the dying process is a bgworker, so we must be silent
3208 : * until we're sure it is.
3209 : */
3210 : static bool
3211 24372 : CleanupBackgroundWorker(int pid,
3212 : int exitstatus) /* child's exit status */
3213 : {
3214 : char namebuf[MAXPGPATH];
3215 : slist_mutable_iter iter;
3216 :
3217 47990 : slist_foreach_modify(iter, &BackgroundWorkerList)
3218 : {
3219 : RegisteredBgWorker *rw;
3220 :
3221 28048 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3222 :
3223 28048 : if (rw->rw_pid != pid)
3224 23618 : continue;
3225 :
3226 : #ifdef WIN32
3227 : /* see CleanupBackend */
3228 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3229 : exitstatus = 0;
3230 : #endif
3231 :
3232 4430 : snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3233 4430 : rw->rw_worker.bgw_type);
3234 :
3235 :
3236 4430 : if (!EXIT_STATUS_0(exitstatus))
3237 : {
3238 : /* Record timestamp, so we know when to restart the worker. */
3239 1420 : rw->rw_crashed_at = GetCurrentTimestamp();
3240 : }
3241 : else
3242 : {
3243 : /* Zero exit status means terminate */
3244 3010 : rw->rw_crashed_at = 0;
3245 3010 : rw->rw_terminate = true;
3246 : }
3247 :
3248 : /*
3249 : * Additionally, just like a backend, any exit status other than 0 or
3250 : * 1 is considered a crash and causes a system-wide restart.
3251 : */
3252 4430 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3253 : {
3254 488 : HandleChildCrash(pid, exitstatus, namebuf);
3255 488 : return true;
3256 : }
3257 :
3258 : /*
3259 : * We must release the postmaster child slot. If the worker failed to
3260 : * do so, it did not clean up after itself, requiring a crash-restart
3261 : * cycle.
3262 : */
3263 3942 : if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
3264 : {
3265 0 : HandleChildCrash(pid, exitstatus, namebuf);
3266 0 : return true;
3267 : }
3268 :
3269 : /* Get it out of the BackendList and clear out remaining data */
3270 3942 : dlist_delete(&rw->rw_backend->elem);
3271 : #ifdef EXEC_BACKEND
3272 : ShmemBackendArrayRemove(rw->rw_backend);
3273 : #endif
3274 :
3275 : /*
3276 : * It's possible that this background worker started some OTHER
3277 : * background worker and asked to be notified when that worker started
3278 : * or stopped. If so, cancel any notifications destined for the
3279 : * now-dead backend.
3280 : */
3281 3942 : if (rw->rw_backend->bgworker_notify)
3282 272 : BackgroundWorkerStopNotifications(rw->rw_pid);
3283 3942 : pfree(rw->rw_backend);
3284 3942 : rw->rw_backend = NULL;
3285 3942 : rw->rw_pid = 0;
3286 3942 : rw->rw_child_slot = 0;
3287 3942 : ReportBackgroundWorkerExit(&iter); /* report child death */
3288 :
3289 3942 : LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3290 : namebuf, pid, exitstatus);
3291 :
3292 3942 : return true;
3293 : }
3294 :
3295 19942 : return false;
3296 : }
3297 :
3298 : /*
3299 : * CleanupBackend -- cleanup after terminated backend.
3300 : *
3301 : * Remove all local state associated with backend.
3302 : *
3303 : * If you change this, see also CleanupBackgroundWorker.
3304 : */
3305 : static void
3306 19942 : CleanupBackend(int pid,
3307 : int exitstatus) /* child's exit status. */
3308 : {
3309 : dlist_mutable_iter iter;
3310 :
3311 19942 : LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3312 :
3313 : /*
3314 : * If a backend dies in an ugly way then we must signal all other backends
3315 : * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3316 : * assume everything is all right and proceed to remove the backend from
3317 : * the active backend list.
3318 : */
3319 :
3320 : #ifdef WIN32
3321 :
3322 : /*
3323 : * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3324 : * since that sometimes happens under load when the process fails to start
3325 : * properly (long before it starts using shared memory). Microsoft reports
3326 : * it is related to mutex failure:
3327 : * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3328 : */
3329 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3330 : {
3331 : LogChildExit(LOG, _("server process"), pid, exitstatus);
3332 : exitstatus = 0;
3333 : }
3334 : #endif
3335 :
3336 19942 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3337 : {
3338 364 : HandleChildCrash(pid, exitstatus, _("server process"));
3339 364 : return;
3340 : }
3341 :
3342 38242 : dlist_foreach_modify(iter, &BackendList)
3343 : {
3344 38242 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3345 :
3346 38242 : if (bp->pid == pid)
3347 : {
3348 19578 : if (!bp->dead_end)
3349 : {
3350 19384 : if (!ReleasePostmasterChildSlot(bp->child_slot))
3351 : {
3352 : /*
3353 : * Uh-oh, the child failed to clean itself up. Treat as a
3354 : * crash after all.
3355 : */
3356 0 : HandleChildCrash(pid, exitstatus, _("server process"));
3357 0 : return;
3358 : }
3359 : #ifdef EXEC_BACKEND
3360 : ShmemBackendArrayRemove(bp);
3361 : #endif
3362 : }
3363 19578 : if (bp->bgworker_notify)
3364 : {
3365 : /*
3366 : * This backend may have been slated to receive SIGUSR1 when
3367 : * some background worker started or stopped. Cancel those
3368 : * notifications, as we don't want to signal PIDs that are not
3369 : * PostgreSQL backends. This gets skipped in the (probably
3370 : * very common) case where the backend has never requested any
3371 : * such notifications.
3372 : */
3373 124 : BackgroundWorkerStopNotifications(bp->pid);
3374 : }
3375 19578 : dlist_delete(iter.cur);
3376 19578 : pfree(bp);
3377 19578 : break;
3378 : }
3379 : }
3380 : }
3381 :
3382 : /*
3383 : * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3384 : * walwriter, autovacuum, archiver or background worker.
3385 : *
3386 : * The objectives here are to clean up our local state about the child
3387 : * process, and to signal all other remaining children to quickdie.
3388 : */
3389 : static void
3390 2970 : HandleChildCrash(int pid, int exitstatus, const char *procname)
3391 : {
3392 : dlist_mutable_iter iter;
3393 : slist_iter siter;
3394 : Backend *bp;
3395 : bool take_action;
3396 :
3397 : /*
3398 : * We only log messages and send signals if this is the first process
3399 : * crash and we're not doing an immediate shutdown; otherwise, we're only
3400 : * here to update postmaster's idea of live processes. If we have already
3401 : * signaled children, nonzero exit status is to be expected, so don't
3402 : * clutter log.
3403 : */
3404 2970 : take_action = !FatalError && Shutdown != ImmediateShutdown;
3405 :
3406 2970 : if (take_action)
3407 : {
3408 14 : LogChildExit(LOG, procname, pid, exitstatus);
3409 14 : ereport(LOG,
3410 : (errmsg("terminating any other active server processes")));
3411 14 : SetQuitSignalReason(PMQUIT_FOR_CRASH);
3412 : }
3413 :
3414 : /* Process background workers. */
3415 6212 : slist_foreach(siter, &BackgroundWorkerList)
3416 : {
3417 : RegisteredBgWorker *rw;
3418 :
3419 3242 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3420 3242 : if (rw->rw_pid == 0)
3421 1342 : continue; /* not running */
3422 1900 : if (rw->rw_pid == pid)
3423 : {
3424 : /*
3425 : * Found entry for freshly-dead worker, so remove it.
3426 : */
3427 488 : (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3428 488 : dlist_delete(&rw->rw_backend->elem);
3429 : #ifdef EXEC_BACKEND
3430 : ShmemBackendArrayRemove(rw->rw_backend);
3431 : #endif
3432 488 : pfree(rw->rw_backend);
3433 488 : rw->rw_backend = NULL;
3434 488 : rw->rw_pid = 0;
3435 488 : rw->rw_child_slot = 0;
3436 : /* don't reset crashed_at */
3437 : /* don't report child stop, either */
3438 : /* Keep looping so we can signal remaining workers */
3439 : }
3440 : else
3441 : {
3442 : /*
3443 : * This worker is still alive. Unless we did so already, tell it
3444 : * to commit hara-kiri.
3445 : */
3446 1412 : if (take_action)
3447 8 : sigquit_child(rw->rw_pid);
3448 : }
3449 : }
3450 :
3451 : /* Process regular backends */
3452 6248 : dlist_foreach_modify(iter, &BackendList)
3453 : {
3454 3278 : bp = dlist_container(Backend, elem, iter.cur);
3455 :
3456 3278 : if (bp->pid == pid)
3457 : {
3458 : /*
3459 : * Found entry for freshly-dead backend, so remove it.
3460 : */
3461 364 : if (!bp->dead_end)
3462 : {
3463 364 : (void) ReleasePostmasterChildSlot(bp->child_slot);
3464 : #ifdef EXEC_BACKEND
3465 : ShmemBackendArrayRemove(bp);
3466 : #endif
3467 : }
3468 364 : dlist_delete(iter.cur);
3469 364 : pfree(bp);
3470 : /* Keep looping so we can signal remaining backends */
3471 : }
3472 : else
3473 : {
3474 : /*
3475 : * This backend is still alive. Unless we did so already, tell it
3476 : * to commit hara-kiri.
3477 : *
3478 : * We could exclude dead_end children here, but at least when
3479 : * sending SIGABRT it seems better to include them.
3480 : *
3481 : * Background workers were already processed above; ignore them
3482 : * here.
3483 : */
3484 2914 : if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3485 1412 : continue;
3486 :
3487 1502 : if (take_action)
3488 8 : sigquit_child(bp->pid);
3489 : }
3490 : }
3491 :
3492 : /* Take care of the startup process too */
3493 2970 : if (pid == StartupPID)
3494 : {
3495 0 : StartupPID = 0;
3496 : /* Caller adjusts StartupStatus, so don't touch it here */
3497 : }
3498 2970 : else if (StartupPID != 0 && take_action)
3499 : {
3500 0 : sigquit_child(StartupPID);
3501 0 : StartupStatus = STARTUP_SIGNALED;
3502 : }
3503 :
3504 : /* Take care of the bgwriter too */
3505 2970 : if (pid == BgWriterPID)
3506 0 : BgWriterPID = 0;
3507 2970 : else if (BgWriterPID != 0 && take_action)
3508 14 : sigquit_child(BgWriterPID);
3509 :
3510 : /* Take care of the checkpointer too */
3511 2970 : if (pid == CheckpointerPID)
3512 0 : CheckpointerPID = 0;
3513 2970 : else if (CheckpointerPID != 0 && take_action)
3514 14 : sigquit_child(CheckpointerPID);
3515 :
3516 : /* Take care of the walwriter too */
3517 2970 : if (pid == WalWriterPID)
3518 0 : WalWriterPID = 0;
3519 2970 : else if (WalWriterPID != 0 && take_action)
3520 8 : sigquit_child(WalWriterPID);
3521 :
3522 : /* Take care of the walreceiver too */
3523 2970 : if (pid == WalReceiverPID)
3524 0 : WalReceiverPID = 0;
3525 2970 : else if (WalReceiverPID != 0 && take_action)
3526 0 : sigquit_child(WalReceiverPID);
3527 :
3528 : /* Take care of the autovacuum launcher too */
3529 2970 : if (pid == AutoVacPID)
3530 0 : AutoVacPID = 0;
3531 2970 : else if (AutoVacPID != 0 && take_action)
3532 8 : sigquit_child(AutoVacPID);
3533 :
3534 : /* Take care of the archiver too */
3535 2970 : if (pid == PgArchPID)
3536 0 : PgArchPID = 0;
3537 2970 : else if (PgArchPID != 0 && take_action)
3538 0 : sigquit_child(PgArchPID);
3539 :
3540 : /* We do NOT restart the syslogger */
3541 :
3542 2970 : if (Shutdown != ImmediateShutdown)
3543 74 : FatalError = true;
3544 :
3545 : /* We now transit into a state of waiting for children to die */
3546 2970 : if (pmState == PM_RECOVERY ||
3547 2966 : pmState == PM_HOT_STANDBY ||
3548 2964 : pmState == PM_RUN ||
3549 2956 : pmState == PM_STOP_BACKENDS ||
3550 2956 : pmState == PM_SHUTDOWN)
3551 14 : pmState = PM_WAIT_BACKENDS;
3552 :
3553 : /*
3554 : * .. and if this doesn't happen quickly enough, now the clock is ticking
3555 : * for us to kill them without mercy.
3556 : */
3557 2970 : if (AbortStartTime == 0)
3558 14 : AbortStartTime = time(NULL);
3559 2970 : }
3560 :
3561 : /*
3562 : * Log the death of a child process.
3563 : */
3564 : static void
3565 23898 : LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3566 : {
3567 : /*
3568 : * size of activity_buffer is arbitrary, but set equal to default
3569 : * track_activity_query_size
3570 : */
3571 : char activity_buffer[1024];
3572 23898 : const char *activity = NULL;
3573 :
3574 23898 : if (!EXIT_STATUS_0(exitstatus))
3575 1694 : activity = pgstat_get_crashed_backend_activity(pid,
3576 : activity_buffer,
3577 : sizeof(activity_buffer));
3578 :
3579 23898 : if (WIFEXITED(exitstatus))
3580 23886 : ereport(lev,
3581 :
3582 : /*------
3583 : translator: %s is a noun phrase describing a child process, such as
3584 : "server process" */
3585 : (errmsg("%s (PID %d) exited with exit code %d",
3586 : procname, pid, WEXITSTATUS(exitstatus)),
3587 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3588 12 : else if (WIFSIGNALED(exitstatus))
3589 : {
3590 : #if defined(WIN32)
3591 : ereport(lev,
3592 :
3593 : /*------
3594 : translator: %s is a noun phrase describing a child process, such as
3595 : "server process" */
3596 : (errmsg("%s (PID %d) was terminated by exception 0x%X",
3597 : procname, pid, WTERMSIG(exitstatus)),
3598 : errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3599 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3600 : #else
3601 12 : ereport(lev,
3602 :
3603 : /*------
3604 : translator: %s is a noun phrase describing a child process, such as
3605 : "server process" */
3606 : (errmsg("%s (PID %d) was terminated by signal %d: %s",
3607 : procname, pid, WTERMSIG(exitstatus),
3608 : pg_strsignal(WTERMSIG(exitstatus))),
3609 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3610 : #endif
3611 : }
3612 : else
3613 0 : ereport(lev,
3614 :
3615 : /*------
3616 : translator: %s is a noun phrase describing a child process, such as
3617 : "server process" */
3618 : (errmsg("%s (PID %d) exited with unrecognized status %d",
3619 : procname, pid, exitstatus),
3620 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3621 23898 : }
3622 :
3623 : /*
3624 : * Advance the postmaster's state machine and take actions as appropriate
3625 : *
3626 : * This is common code for process_pm_shutdown_request(),
3627 : * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3628 : * that might mean we need to change state.
3629 : */
3630 : static void
3631 31922 : PostmasterStateMachine(void)
3632 : {
3633 : /* If we're doing a smart shutdown, try to advance that state. */
3634 31922 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3635 : {
3636 24676 : if (!connsAllowed)
3637 : {
3638 : /*
3639 : * This state ends when we have no normal client backends running.
3640 : * Then we're ready to stop other children.
3641 : */
3642 48 : if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3643 24 : pmState = PM_STOP_BACKENDS;
3644 : }
3645 : }
3646 :
3647 : /*
3648 : * If we're ready to do so, signal child processes to shut down. (This
3649 : * isn't a persistent state, but treating it as a distinct pmState allows
3650 : * us to share this code across multiple shutdown code paths.)
3651 : */
3652 31922 : if (pmState == PM_STOP_BACKENDS)
3653 : {
3654 : /*
3655 : * Forget any pending requests for background workers, since we're no
3656 : * longer willing to launch any new workers. (If additional requests
3657 : * arrive, BackgroundWorkerStateChange will reject them.)
3658 : */
3659 734 : ForgetUnstartedBackgroundWorkers();
3660 :
3661 : /* Signal all backend children except walsenders */
3662 734 : SignalSomeChildren(SIGTERM,
3663 : BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
3664 : /* and the autovac launcher too */
3665 734 : if (AutoVacPID != 0)
3666 608 : signal_child(AutoVacPID, SIGTERM);
3667 : /* and the bgwriter too */
3668 734 : if (BgWriterPID != 0)
3669 734 : signal_child(BgWriterPID, SIGTERM);
3670 : /* and the walwriter too */
3671 734 : if (WalWriterPID != 0)
3672 672 : signal_child(WalWriterPID, SIGTERM);
3673 : /* If we're in recovery, also stop startup and walreceiver procs */
3674 734 : if (StartupPID != 0)
3675 62 : signal_child(StartupPID, SIGTERM);
3676 734 : if (WalReceiverPID != 0)
3677 48 : signal_child(WalReceiverPID, SIGTERM);
3678 : /* checkpointer, archiver, stats, and syslogger may continue for now */
3679 :
3680 : /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3681 734 : pmState = PM_WAIT_BACKENDS;
3682 : }
3683 :
3684 : /*
3685 : * If we are in a state-machine state that implies waiting for backends to
3686 : * exit, see if they're all gone, and change state if so.
3687 : */
3688 31922 : if (pmState == PM_WAIT_BACKENDS)
3689 : {
3690 : /*
3691 : * PM_WAIT_BACKENDS state ends when we have no regular backends
3692 : * (including autovac workers), no bgworkers (including unconnected
3693 : * ones), and no walwriter, autovac launcher or bgwriter. If we are
3694 : * doing crash recovery or an immediate shutdown then we expect the
3695 : * checkpointer to exit as well, otherwise not. The stats and
3696 : * syslogger processes are disregarded since they are not connected to
3697 : * shared memory; we also disregard dead_end children here. Walsenders
3698 : * and archiver are also disregarded, they will be terminated later
3699 : * after writing the checkpoint record.
3700 : */
3701 6364 : if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
3702 2960 : StartupPID == 0 &&
3703 2728 : WalReceiverPID == 0 &&
3704 2666 : BgWriterPID == 0 &&
3705 1866 : (CheckpointerPID == 0 ||
3706 1208 : (!FatalError && Shutdown < ImmediateShutdown)) &&
3707 1746 : WalWriterPID == 0 &&
3708 1436 : AutoVacPID == 0)
3709 : {
3710 1282 : if (Shutdown >= ImmediateShutdown || FatalError)
3711 : {
3712 : /*
3713 : * Start waiting for dead_end children to die. This state
3714 : * change causes ServerLoop to stop creating new ones.
3715 : */
3716 548 : pmState = PM_WAIT_DEAD_END;
3717 :
3718 : /*
3719 : * We already SIGQUIT'd the archiver and stats processes, if
3720 : * any, when we started immediate shutdown or entered
3721 : * FatalError state.
3722 : */
3723 : }
3724 : else
3725 : {
3726 : /*
3727 : * If we get here, we are proceeding with normal shutdown. All
3728 : * the regular children are gone, and it's time to tell the
3729 : * checkpointer to do a shutdown checkpoint.
3730 : */
3731 : Assert(Shutdown > NoShutdown);
3732 : /* Start the checkpointer if not running */
3733 734 : if (CheckpointerPID == 0)
3734 0 : CheckpointerPID = StartCheckpointer();
3735 : /* And tell it to shut down */
3736 734 : if (CheckpointerPID != 0)
3737 : {
3738 734 : signal_child(CheckpointerPID, SIGUSR2);
3739 734 : pmState = PM_SHUTDOWN;
3740 : }
3741 : else
3742 : {
3743 : /*
3744 : * If we failed to fork a checkpointer, just shut down.
3745 : * Any required cleanup will happen at next restart. We
3746 : * set FatalError so that an "abnormal shutdown" message
3747 : * gets logged when we exit.
3748 : *
3749 : * We don't consult send_abort_for_crash here, as it's
3750 : * unlikely that dumping cores would illuminate the reason
3751 : * for checkpointer fork failure.
3752 : */
3753 0 : FatalError = true;
3754 0 : pmState = PM_WAIT_DEAD_END;
3755 :
3756 : /* Kill the walsenders and archiver too */
3757 0 : SignalChildren(SIGQUIT);
3758 0 : if (PgArchPID != 0)
3759 0 : signal_child(PgArchPID, SIGQUIT);
3760 : }
3761 : }
3762 : }
3763 : }
3764 :
3765 31922 : if (pmState == PM_SHUTDOWN_2)
3766 : {
3767 : /*
3768 : * PM_SHUTDOWN_2 state ends when there's no other children than
3769 : * dead_end children left. There shouldn't be any regular backends
3770 : * left by now anyway; what we're really waiting for is walsenders and
3771 : * archiver.
3772 : */
3773 800 : if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3774 : {
3775 734 : pmState = PM_WAIT_DEAD_END;
3776 : }
3777 : }
3778 :
3779 31922 : if (pmState == PM_WAIT_DEAD_END)
3780 : {
3781 : /* Don't allow any new socket connection events. */
3782 1300 : ConfigurePostmasterWaitSet(false);
3783 :
3784 : /*
3785 : * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3786 : * (ie, no dead_end children remain), and the archiver is gone too.
3787 : *
3788 : * The reason we wait for those two is to protect them against a new
3789 : * postmaster starting conflicting subprocesses; this isn't an
3790 : * ironclad protection, but it at least helps in the
3791 : * shutdown-and-immediately-restart scenario. Note that they have
3792 : * already been sent appropriate shutdown signals, either during a
3793 : * normal state transition leading up to PM_WAIT_DEAD_END, or during
3794 : * FatalError processing.
3795 : */
3796 1300 : if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3797 : {
3798 : /* These other guys should be dead already */
3799 : Assert(StartupPID == 0);
3800 : Assert(WalReceiverPID == 0);
3801 : Assert(BgWriterPID == 0);
3802 : Assert(CheckpointerPID == 0);
3803 : Assert(WalWriterPID == 0);
3804 : Assert(AutoVacPID == 0);
3805 : /* syslogger is not considered here */
3806 1282 : pmState = PM_NO_CHILDREN;
3807 : }
3808 : }
3809 :
3810 : /*
3811 : * If we've been told to shut down, we exit as soon as there are no
3812 : * remaining children. If there was a crash, cleanup will occur at the
3813 : * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3814 : * crash before exiting, but that seems unwise if we are quitting because
3815 : * we got SIGTERM from init --- there may well not be time for recovery
3816 : * before init decides to SIGKILL us.)
3817 : *
3818 : * Note that the syslogger continues to run. It will exit when it sees
3819 : * EOF on its input pipe, which happens when there are no more upstream
3820 : * processes.
3821 : */
3822 31922 : if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3823 : {
3824 1268 : if (FatalError)
3825 : {
3826 0 : ereport(LOG, (errmsg("abnormal database system shutdown")));
3827 0 : ExitPostmaster(1);
3828 : }
3829 : else
3830 : {
3831 : /*
3832 : * Normal exit from the postmaster is here. We don't need to log
3833 : * anything here, since the UnlinkLockFiles proc_exit callback
3834 : * will do so, and that should be the last user-visible action.
3835 : */
3836 1268 : ExitPostmaster(0);
3837 : }
3838 : }
3839 :
3840 : /*
3841 : * If the startup process failed, or the user does not want an automatic
3842 : * restart after backend crashes, wait for all non-syslogger children to
3843 : * exit, and then exit postmaster. We don't try to reinitialize when the
3844 : * startup process fails, because more than likely it will just fail again
3845 : * and we will keep trying forever.
3846 : */
3847 30654 : if (pmState == PM_NO_CHILDREN)
3848 : {
3849 14 : if (StartupStatus == STARTUP_CRASHED)
3850 : {
3851 6 : ereport(LOG,
3852 : (errmsg("shutting down due to startup process failure")));
3853 6 : ExitPostmaster(1);
3854 : }
3855 8 : if (!restart_after_crash)
3856 : {
3857 0 : ereport(LOG,
3858 : (errmsg("shutting down because restart_after_crash is off")));
3859 0 : ExitPostmaster(1);
3860 : }
3861 : }
3862 :
3863 : /*
3864 : * If we need to recover from a crash, wait for all non-syslogger children
3865 : * to exit, then reset shmem and StartupDataBase.
3866 : */
3867 30648 : if (FatalError && pmState == PM_NO_CHILDREN)
3868 : {
3869 8 : ereport(LOG,
3870 : (errmsg("all server processes terminated; reinitializing")));
3871 :
3872 : /* remove leftover temporary files after a crash */
3873 8 : if (remove_temp_files_after_crash)
3874 6 : RemovePgTempFiles();
3875 :
3876 : /* allow background workers to immediately restart */
3877 8 : ResetBackgroundWorkerCrashTimes();
3878 :
3879 8 : shmem_exit(1);
3880 :
3881 : /* re-read control file into local memory */
3882 8 : LocalProcessControlFile(true);
3883 :
3884 : /* re-create shared memory and semaphores */
3885 8 : CreateSharedMemoryAndSemaphores();
3886 :
3887 8 : StartupPID = StartupDataBase();
3888 : Assert(StartupPID != 0);
3889 8 : StartupStatus = STARTUP_RUNNING;
3890 8 : pmState = PM_STARTUP;
3891 : /* crash recovery started, reset SIGKILL flag */
3892 8 : AbortStartTime = 0;
3893 :
3894 : /* start accepting server socket connection events again */
3895 8 : ConfigurePostmasterWaitSet(true);
3896 : }
3897 30648 : }
3898 :
3899 :
3900 : /*
3901 : * Send a signal to a postmaster child process
3902 : *
3903 : * On systems that have setsid(), each child process sets itself up as a
3904 : * process group leader. For signals that are generally interpreted in the
3905 : * appropriate fashion, we signal the entire process group not just the
3906 : * direct child process. This allows us to, for example, SIGQUIT a blocked
3907 : * archive_recovery script, or SIGINT a script being run by a backend via
3908 : * system().
3909 : *
3910 : * There is a race condition for recently-forked children: they might not
3911 : * have executed setsid() yet. So we signal the child directly as well as
3912 : * the group. We assume such a child will handle the signal before trying
3913 : * to spawn any grandchild processes. We also assume that signaling the
3914 : * child twice will not cause any problems.
3915 : */
3916 : static void
3917 8368 : signal_child(pid_t pid, int signal)
3918 : {
3919 8368 : if (kill(pid, signal) < 0)
3920 0 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3921 : #ifdef HAVE_SETSID
3922 8368 : switch (signal)
3923 : {
3924 6198 : case SIGINT:
3925 : case SIGTERM:
3926 : case SIGQUIT:
3927 : case SIGKILL:
3928 : case SIGABRT:
3929 6198 : if (kill(-pid, signal) < 0)
3930 22 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3931 6198 : break;
3932 2170 : default:
3933 2170 : break;
3934 : }
3935 : #endif
3936 8368 : }
3937 :
3938 : /*
3939 : * Convenience function for killing a child process after a crash of some
3940 : * other child process. We log the action at a higher level than we would
3941 : * otherwise do, and we apply send_abort_for_crash to decide which signal
3942 : * to send. Normally it's SIGQUIT -- and most other comments in this file
3943 : * are written on the assumption that it is -- but developers might prefer
3944 : * to use SIGABRT to collect per-child core dumps.
3945 : */
3946 : static void
3947 60 : sigquit_child(pid_t pid)
3948 : {
3949 60 : ereport(DEBUG2,
3950 : (errmsg_internal("sending %s to process %d",
3951 : (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
3952 : (int) pid)));
3953 60 : signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT));
3954 60 : }
3955 :
3956 : /*
3957 : * Send a signal to the targeted children (but NOT special children;
3958 : * dead_end children are never signaled, either).
3959 : */
3960 : static bool
3961 2210 : SignalSomeChildren(int signal, int target)
3962 : {
3963 : dlist_iter iter;
3964 2210 : bool signaled = false;
3965 :
3966 4712 : dlist_foreach(iter, &BackendList)
3967 : {
3968 2502 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3969 :
3970 2502 : if (bp->dead_end)
3971 2 : continue;
3972 :
3973 : /*
3974 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
3975 : * it first and avoid touching shared memory for every child.
3976 : */
3977 2500 : if (target != BACKEND_TYPE_ALL)
3978 : {
3979 : /*
3980 : * Assign bkend_type for any recently announced WAL Sender
3981 : * processes.
3982 : */
3983 1618 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3984 458 : IsPostmasterChildWalSender(bp->child_slot))
3985 56 : bp->bkend_type = BACKEND_TYPE_WALSND;
3986 :
3987 1160 : if (!(target & bp->bkend_type))
3988 56 : continue;
3989 : }
3990 :
3991 2444 : ereport(DEBUG4,
3992 : (errmsg_internal("sending signal %d to process %d",
3993 : signal, (int) bp->pid)));
3994 2444 : signal_child(bp->pid, signal);
3995 2444 : signaled = true;
3996 : }
3997 2210 : return signaled;
3998 : }
3999 :
4000 : /*
4001 : * Send a termination signal to children. This considers all of our children
4002 : * processes, except syslogger and dead_end backends.
4003 : */
4004 : static void
4005 534 : TerminateChildren(int signal)
4006 : {
4007 534 : SignalChildren(signal);
4008 534 : if (StartupPID != 0)
4009 : {
4010 86 : signal_child(StartupPID, signal);
4011 86 : if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
4012 86 : StartupStatus = STARTUP_SIGNALED;
4013 : }
4014 534 : if (BgWriterPID != 0)
4015 534 : signal_child(BgWriterPID, signal);
4016 534 : if (CheckpointerPID != 0)
4017 534 : signal_child(CheckpointerPID, signal);
4018 534 : if (WalWriterPID != 0)
4019 448 : signal_child(WalWriterPID, signal);
4020 534 : if (WalReceiverPID != 0)
4021 42 : signal_child(WalReceiverPID, signal);
4022 534 : if (AutoVacPID != 0)
4023 382 : signal_child(AutoVacPID, signal);
4024 534 : if (PgArchPID != 0)
4025 42 : signal_child(PgArchPID, signal);
4026 534 : }
4027 :
4028 : /*
4029 : * BackendStartup -- start backend process
4030 : *
4031 : * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4032 : *
4033 : * Note: if you change this code, also consider StartAutovacuumWorker.
4034 : */
4035 : static int
4036 19926 : BackendStartup(Port *port)
4037 : {
4038 : Backend *bn; /* for backend cleanup */
4039 : pid_t pid;
4040 :
4041 : /*
4042 : * Create backend data structure. Better before the fork() so we can
4043 : * handle failure cleanly.
4044 : */
4045 19926 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
4046 19926 : if (!bn)
4047 : {
4048 0 : ereport(LOG,
4049 : (errcode(ERRCODE_OUT_OF_MEMORY),
4050 : errmsg("out of memory")));
4051 0 : return STATUS_ERROR;
4052 : }
4053 :
4054 : /*
4055 : * Compute the cancel key that will be assigned to this backend. The
4056 : * backend will have its own copy in the forked-off process' value of
4057 : * MyCancelKey, so that it can transmit the key to the frontend.
4058 : */
4059 19926 : if (!RandomCancelKey(&MyCancelKey))
4060 : {
4061 0 : pfree(bn);
4062 0 : ereport(LOG,
4063 : (errcode(ERRCODE_INTERNAL_ERROR),
4064 : errmsg("could not generate random cancel key")));
4065 0 : return STATUS_ERROR;
4066 : }
4067 :
4068 19926 : bn->cancel_key = MyCancelKey;
4069 :
4070 : /* Pass down canAcceptConnections state */
4071 19926 : port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4072 19926 : bn->dead_end = (port->canAcceptConnections != CAC_OK);
4073 :
4074 : /*
4075 : * Unless it's a dead_end child, assign it a child slot number
4076 : */
4077 19926 : if (!bn->dead_end)
4078 19732 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4079 : else
4080 194 : bn->child_slot = 0;
4081 :
4082 : /* Hasn't asked to be notified about any bgworkers yet */
4083 19926 : bn->bgworker_notify = false;
4084 :
4085 : #ifdef EXEC_BACKEND
4086 : pid = backend_forkexec(port);
4087 : #else /* !EXEC_BACKEND */
4088 19926 : pid = fork_process();
4089 39532 : if (pid == 0) /* child */
4090 : {
4091 : /* Detangle from postmaster */
4092 19608 : InitPostmasterChild();
4093 :
4094 : /* Close the postmaster's sockets */
4095 19608 : ClosePostmasterPorts(false);
4096 :
4097 : /* Perform additional initialization and collect startup packet */
4098 19608 : BackendInitialize(port);
4099 :
4100 : /* And run the backend */
4101 19346 : BackendRun(port);
4102 : }
4103 : #endif /* EXEC_BACKEND */
4104 :
4105 19924 : if (pid < 0)
4106 : {
4107 : /* in parent, fork failed */
4108 0 : int save_errno = errno;
4109 :
4110 0 : if (!bn->dead_end)
4111 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
4112 0 : pfree(bn);
4113 0 : errno = save_errno;
4114 0 : ereport(LOG,
4115 : (errmsg("could not fork new process for connection: %m")));
4116 0 : report_fork_failure_to_client(port, save_errno);
4117 0 : return STATUS_ERROR;
4118 : }
4119 :
4120 : /* in parent, successful fork */
4121 19924 : ereport(DEBUG2,
4122 : (errmsg_internal("forked new backend, pid=%d socket=%d",
4123 : (int) pid, (int) port->sock)));
4124 :
4125 : /*
4126 : * Everything's been successful, it's safe to add this backend to our list
4127 : * of backends.
4128 : */
4129 19924 : bn->pid = pid;
4130 19924 : bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4131 19924 : dlist_push_head(&BackendList, &bn->elem);
4132 :
4133 : #ifdef EXEC_BACKEND
4134 : if (!bn->dead_end)
4135 : ShmemBackendArrayAdd(bn);
4136 : #endif
4137 :
4138 19924 : return STATUS_OK;
4139 : }
4140 :
4141 : /*
4142 : * Try to report backend fork() failure to client before we close the
4143 : * connection. Since we do not care to risk blocking the postmaster on
4144 : * this connection, we set the connection to non-blocking and try only once.
4145 : *
4146 : * This is grungy special-purpose code; we cannot use backend libpq since
4147 : * it's not up and running.
4148 : */
4149 : static void
4150 0 : report_fork_failure_to_client(Port *port, int errnum)
4151 : {
4152 : char buffer[1000];
4153 : int rc;
4154 :
4155 : /* Format the error message packet (always V2 protocol) */
4156 0 : snprintf(buffer, sizeof(buffer), "E%s%s\n",
4157 : _("could not fork new process for connection: "),
4158 : strerror(errnum));
4159 :
4160 : /* Set port to non-blocking. Don't do send() if this fails */
4161 0 : if (!pg_set_noblock(port->sock))
4162 0 : return;
4163 :
4164 : /* We'll retry after EINTR, but ignore all other failures */
4165 : do
4166 : {
4167 0 : rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4168 0 : } while (rc < 0 && errno == EINTR);
4169 : }
4170 :
4171 :
4172 : /*
4173 : * BackendInitialize -- initialize an interactive (postmaster-child)
4174 : * backend process, and collect the client's startup packet.
4175 : *
4176 : * returns: nothing. Will not return at all if there's any failure.
4177 : *
4178 : * Note: this code does not depend on having any access to shared memory.
4179 : * Indeed, our approach to SIGTERM/timeout handling *requires* that
4180 : * shared memory not have been touched yet; see comments within.
4181 : * In the EXEC_BACKEND case, we are physically attached to shared memory
4182 : * but have not yet set up most of our local pointers to shmem structures.
4183 : */
4184 : static void
4185 19608 : BackendInitialize(Port *port)
4186 : {
4187 : int status;
4188 : int ret;
4189 : char remote_host[NI_MAXHOST];
4190 : char remote_port[NI_MAXSERV];
4191 : StringInfoData ps_data;
4192 :
4193 : /* Save port etc. for ps status */
4194 19608 : MyProcPort = port;
4195 :
4196 : /* Tell fd.c about the long-lived FD associated with the port */
4197 19608 : ReserveExternalFD();
4198 :
4199 : /*
4200 : * PreAuthDelay is a debugging aid for investigating problems in the
4201 : * authentication cycle: it can be set in postgresql.conf to allow time to
4202 : * attach to the newly-forked backend with a debugger. (See also
4203 : * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4204 : * is not honored until after authentication.)
4205 : */
4206 19608 : if (PreAuthDelay > 0)
4207 0 : pg_usleep(PreAuthDelay * 1000000L);
4208 :
4209 : /* This flag will remain set until InitPostgres finishes authentication */
4210 19608 : ClientAuthInProgress = true; /* limit visibility of log messages */
4211 :
4212 : /* set these to empty in case they are needed before we set them up */
4213 19608 : port->remote_host = "";
4214 19608 : port->remote_port = "";
4215 :
4216 : /*
4217 : * Initialize libpq and enable reporting of ereport errors to the client.
4218 : * Must do this now because authentication uses libpq to send messages.
4219 : */
4220 19608 : pq_init(); /* initialize libpq to talk to client */
4221 19608 : whereToSendOutput = DestRemote; /* now safe to ereport to client */
4222 :
4223 : /*
4224 : * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4225 : * to collect the startup packet; while SIGQUIT results in _exit(2).
4226 : * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4227 : * cleanly if a buggy client fails to send the packet promptly.
4228 : *
4229 : * Exiting with _exit(1) is only possible because we have not yet touched
4230 : * shared memory; therefore no outside-the-process state needs to get
4231 : * cleaned up.
4232 : */
4233 19608 : pqsignal(SIGTERM, process_startup_packet_die);
4234 : /* SIGQUIT handler was already set up by InitPostmasterChild */
4235 19608 : InitializeTimeouts(); /* establishes SIGALRM handler */
4236 19608 : sigprocmask(SIG_SETMASK, &StartupBlockSig, NULL);
4237 :
4238 : /*
4239 : * Get the remote host name and port for logging and status display.
4240 : */
4241 19608 : remote_host[0] = '\0';
4242 19608 : remote_port[0] = '\0';
4243 19608 : if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4244 : remote_host, sizeof(remote_host),
4245 : remote_port, sizeof(remote_port),
4246 : (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4247 0 : ereport(WARNING,
4248 : (errmsg_internal("pg_getnameinfo_all() failed: %s",
4249 : gai_strerror(ret))));
4250 :
4251 : /*
4252 : * Save remote_host and remote_port in port structure (after this, they
4253 : * will appear in log_line_prefix data for log messages).
4254 : */
4255 19608 : port->remote_host = strdup(remote_host);
4256 19608 : port->remote_port = strdup(remote_port);
4257 :
4258 : /* And now we can issue the Log_connections message, if wanted */
4259 19608 : if (Log_connections)
4260 : {
4261 688 : if (remote_port[0])
4262 230 : ereport(LOG,
4263 : (errmsg("connection received: host=%s port=%s",
4264 : remote_host,
4265 : remote_port)));
4266 : else
4267 458 : ereport(LOG,
4268 : (errmsg("connection received: host=%s",
4269 : remote_host)));
4270 : }
4271 :
4272 : /*
4273 : * If we did a reverse lookup to name, we might as well save the results
4274 : * rather than possibly repeating the lookup during authentication.
4275 : *
4276 : * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4277 : * get nothing useful for a client without an rDNS entry. Therefore, we
4278 : * must check whether we got a numeric IPv4 or IPv6 address, and not save
4279 : * it into remote_hostname if so. (This test is conservative and might
4280 : * sometimes classify a hostname as numeric, but an error in that
4281 : * direction is safe; it only results in a possible extra lookup.)
4282 : */
4283 19608 : if (log_hostname &&
4284 230 : ret == 0 &&
4285 230 : strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4286 230 : strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4287 230 : port->remote_hostname = strdup(remote_host);
4288 :
4289 : /*
4290 : * Ready to begin client interaction. We will give up and _exit(1) after
4291 : * a time delay, so that a broken client can't hog a connection
4292 : * indefinitely. PreAuthDelay and any DNS interactions above don't count
4293 : * against the time limit.
4294 : *
4295 : * Note: AuthenticationTimeout is applied here while waiting for the
4296 : * startup packet, and then again in InitPostgres for the duration of any
4297 : * authentication operations. So a hostile client could tie up the
4298 : * process for nearly twice AuthenticationTimeout before we kick him off.
4299 : *
4300 : * Note: because PostgresMain will call InitializeTimeouts again, the
4301 : * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4302 : * since we never use it again after this function.
4303 : */
4304 19608 : RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler);
4305 19608 : enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000);
4306 :
4307 : /*
4308 : * Receive the startup packet (which might turn out to be a cancel request
4309 : * packet).
4310 : */
4311 19608 : status = ProcessStartupPacket(port, false, false);
4312 :
4313 : /*
4314 : * If we're going to reject the connection due to database state, say so
4315 : * now instead of wasting cycles on an authentication exchange. (This also
4316 : * allows a pg_ping utility to be written.)
4317 : */
4318 19608 : switch (port->canAcceptConnections)
4319 : {
4320 184 : case CAC_STARTUP:
4321 184 : ereport(FATAL,
4322 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4323 : errmsg("the database system is starting up")));
4324 : break;
4325 8 : case CAC_NOTCONSISTENT:
4326 8 : if (EnableHotStandby)
4327 8 : ereport(FATAL,
4328 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4329 : errmsg("the database system is not yet accepting connections"),
4330 : errdetail("Consistent recovery state has not been yet reached.")));
4331 : else
4332 0 : ereport(FATAL,
4333 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4334 : errmsg("the database system is not accepting connections"),
4335 : errdetail("Hot standby mode is disabled.")));
4336 : break;
4337 2 : case CAC_SHUTDOWN:
4338 2 : ereport(FATAL,
4339 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4340 : errmsg("the database system is shutting down")));
4341 : break;
4342 0 : case CAC_RECOVERY:
4343 0 : ereport(FATAL,
4344 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4345 : errmsg("the database system is in recovery mode")));
4346 : break;
4347 0 : case CAC_TOOMANY:
4348 0 : ereport(FATAL,
4349 : (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
4350 : errmsg("sorry, too many clients already")));
4351 : break;
4352 19414 : case CAC_OK:
4353 19414 : break;
4354 : }
4355 :
4356 : /*
4357 : * Disable the timeout, and prevent SIGTERM again.
4358 : */
4359 19414 : disable_timeout(STARTUP_PACKET_TIMEOUT, false);
4360 19414 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4361 :
4362 : /*
4363 : * As a safety check that nothing in startup has yet performed
4364 : * shared-memory modifications that would need to be undone if we had
4365 : * exited through SIGTERM or timeout above, check that no on_shmem_exit
4366 : * handlers have been registered yet. (This isn't terribly bulletproof,
4367 : * since someone might misuse an on_proc_exit handler for shmem cleanup,
4368 : * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4369 : * handlers unfortunately, since pq_init() already registered one.)
4370 : */
4371 19414 : check_on_shmem_exit_lists_are_empty();
4372 :
4373 : /*
4374 : * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4375 : * already did any appropriate error reporting.
4376 : */
4377 19414 : if (status != STATUS_OK)
4378 68 : proc_exit(0);
4379 :
4380 : /*
4381 : * Now that we have the user and database name, we can set the process
4382 : * title for ps. It's good to do this as early as possible in startup.
4383 : */
4384 19346 : initStringInfo(&ps_data);
4385 19346 : if (am_walsender)
4386 1720 : appendStringInfo(&ps_data, "%s ", GetBackendTypeDesc(B_WAL_SENDER));
4387 19346 : appendStringInfo(&ps_data, "%s ", port->user_name);
4388 19346 : if (port->database_name[0] != '\0')
4389 18656 : appendStringInfo(&ps_data, "%s ", port->database_name);
4390 19346 : appendStringInfoString(&ps_data, port->remote_host);
4391 19346 : if (port->remote_port[0] != '\0')
4392 452 : appendStringInfo(&ps_data, "(%s)", port->remote_port);
4393 :
4394 19346 : init_ps_display(ps_data.data);
4395 19346 : pfree(ps_data.data);
4396 :
4397 19346 : set_ps_display("initializing");
4398 19346 : }
4399 :
4400 :
4401 : /*
4402 : * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4403 : *
4404 : * returns:
4405 : * Doesn't return at all.
4406 : */
4407 : static void
4408 19346 : BackendRun(Port *port)
4409 : {
4410 : /*
4411 : * Create a per-backend PGPROC struct in shared memory. We must do this
4412 : * before we can use LWLocks or access any shared memory.
4413 : */
4414 19346 : InitProcess();
4415 :
4416 : /*
4417 : * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4418 : * just yet, though, because InitPostgres will need the HBA data.)
4419 : */
4420 19342 : MemoryContextSwitchTo(TopMemoryContext);
4421 :
4422 19342 : PostgresMain(port->database_name, port->user_name);
4423 : }
4424 :
4425 :
4426 : #ifdef EXEC_BACKEND
4427 :
4428 : /*
4429 : * postmaster_forkexec -- fork and exec a postmaster subprocess
4430 : *
4431 : * The caller must have set up the argv array already, except for argv[2]
4432 : * which will be filled with the name of the temp variable file.
4433 : *
4434 : * Returns the child process PID, or -1 on fork failure (a suitable error
4435 : * message has been logged on failure).
4436 : *
4437 : * All uses of this routine will dispatch to SubPostmasterMain in the
4438 : * child process.
4439 : */
4440 : pid_t
4441 : postmaster_forkexec(int argc, char *argv[])
4442 : {
4443 : return internal_forkexec(argc, argv, NULL, NULL);
4444 : }
4445 :
4446 : /*
4447 : * backend_forkexec -- fork/exec off a backend process
4448 : *
4449 : * Some operating systems (WIN32) don't have fork() so we have to simulate
4450 : * it by storing parameters that need to be passed to the child and
4451 : * then create a new child process.
4452 : *
4453 : * returns the pid of the fork/exec'd process, or -1 on failure
4454 : */
4455 : static pid_t
4456 : backend_forkexec(Port *port)
4457 : {
4458 : char *av[4];
4459 : int ac = 0;
4460 :
4461 : av[ac++] = "postgres";
4462 : av[ac++] = "--forkbackend";
4463 : av[ac++] = NULL; /* filled in by internal_forkexec */
4464 :
4465 : av[ac] = NULL;
4466 : Assert(ac < lengthof(av));
4467 :
4468 : return internal_forkexec(ac, av, port, NULL);
4469 : }
4470 :
4471 : #ifndef WIN32
4472 :
4473 : /*
4474 : * internal_forkexec non-win32 implementation
4475 : *
4476 : * - writes out backend variables to the parameter file
4477 : * - fork():s, and then exec():s the child process
4478 : */
4479 : static pid_t
4480 : internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker)
4481 : {
4482 : static unsigned long tmpBackendFileNum = 0;
4483 : pid_t pid;
4484 : char tmpfilename[MAXPGPATH];
4485 : BackendParameters param;
4486 : FILE *fp;
4487 :
4488 : /*
4489 : * Make sure padding bytes are initialized, to prevent Valgrind from
4490 : * complaining about writing uninitialized bytes to the file. This isn't
4491 : * performance critical, and the win32 implementation initializes the
4492 : * padding bytes to zeros, so do it even when not using Valgrind.
4493 : */
4494 : memset(¶m, 0, sizeof(BackendParameters));
4495 :
4496 : if (!save_backend_variables(¶m, port, worker))
4497 : return -1; /* log made by save_backend_variables */
4498 :
4499 : /* Calculate name for temp file */
4500 : snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4501 : PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
4502 : MyProcPid, ++tmpBackendFileNum);
4503 :
4504 : /* Open file */
4505 : fp = AllocateFile(tmpfilename, PG_BINARY_W);
4506 : if (!fp)
4507 : {
4508 : /*
4509 : * As in OpenTemporaryFileInTablespace, try to make the temp-file
4510 : * directory, ignoring errors.
4511 : */
4512 : (void) MakePGDirectory(PG_TEMP_FILES_DIR);
4513 :
4514 : fp = AllocateFile(tmpfilename, PG_BINARY_W);
4515 : if (!fp)
4516 : {
4517 : ereport(LOG,
4518 : (errcode_for_file_access(),
4519 : errmsg("could not create file \"%s\": %m",
4520 : tmpfilename)));
4521 : return -1;
4522 : }
4523 : }
4524 :
4525 : if (fwrite(¶m, sizeof(param), 1, fp) != 1)
4526 : {
4527 : ereport(LOG,
4528 : (errcode_for_file_access(),
4529 : errmsg("could not write to file \"%s\": %m", tmpfilename)));
4530 : FreeFile(fp);
4531 : return -1;
4532 : }
4533 :
4534 : /* Release file */
4535 : if (FreeFile(fp))
4536 : {
4537 : ereport(LOG,
4538 : (errcode_for_file_access(),
4539 : errmsg("could not write to file \"%s\": %m", tmpfilename)));
4540 : return -1;
4541 : }
4542 :
4543 : /* Make sure caller set up argv properly */
4544 : Assert(argc >= 3);
4545 : Assert(argv[argc] == NULL);
4546 : Assert(strncmp(argv[1], "--fork", 6) == 0);
4547 : Assert(argv[2] == NULL);
4548 :
4549 : /* Insert temp file name after --fork argument */
4550 : argv[2] = tmpfilename;
4551 :
4552 : /* Fire off execv in child */
4553 : if ((pid = fork_process()) == 0)
4554 : {
4555 : if (execv(postgres_exec_path, argv) < 0)
4556 : {
4557 : ereport(LOG,
4558 : (errmsg("could not execute server process \"%s\": %m",
4559 : postgres_exec_path)));
4560 : /* We're already in the child process here, can't return */
4561 : exit(1);
4562 : }
4563 : }
4564 :
4565 : return pid; /* Parent returns pid, or -1 on fork failure */
4566 : }
4567 : #else /* WIN32 */
4568 :
4569 : /*
4570 : * internal_forkexec win32 implementation
4571 : *
4572 : * - starts backend using CreateProcess(), in suspended state
4573 : * - writes out backend variables to the parameter file
4574 : * - during this, duplicates handles and sockets required for
4575 : * inheritance into the new process
4576 : * - resumes execution of the new process once the backend parameter
4577 : * file is complete.
4578 : */
4579 : static pid_t
4580 : internal_forkexec(int argc, char *argv[], Port *port, BackgroundWorker *worker)
4581 : {
4582 : int retry_count = 0;
4583 : STARTUPINFO si;
4584 : PROCESS_INFORMATION pi;
4585 : int i;
4586 : int j;
4587 : char cmdLine[MAXPGPATH * 2];
4588 : HANDLE paramHandle;
4589 : BackendParameters *param;
4590 : SECURITY_ATTRIBUTES sa;
4591 : char paramHandleStr[32];
4592 : win32_deadchild_waitinfo *childinfo;
4593 :
4594 : /* Make sure caller set up argv properly */
4595 : Assert(argc >= 3);
4596 : Assert(argv[argc] == NULL);
4597 : Assert(strncmp(argv[1], "--fork", 6) == 0);
4598 : Assert(argv[2] == NULL);
4599 :
4600 : /* Resume here if we need to retry */
4601 : retry:
4602 :
4603 : /* Set up shared memory for parameter passing */
4604 : ZeroMemory(&sa, sizeof(sa));
4605 : sa.nLength = sizeof(sa);
4606 : sa.bInheritHandle = TRUE;
4607 : paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4608 : &sa,
4609 : PAGE_READWRITE,
4610 : 0,
4611 : sizeof(BackendParameters),
4612 : NULL);
4613 : if (paramHandle == INVALID_HANDLE_VALUE)
4614 : {
4615 : ereport(LOG,
4616 : (errmsg("could not create backend parameter file mapping: error code %lu",
4617 : GetLastError())));
4618 : return -1;
4619 : }
4620 :
4621 : param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4622 : if (!param)
4623 : {
4624 : ereport(LOG,
4625 : (errmsg("could not map backend parameter memory: error code %lu",
4626 : GetLastError())));
4627 : CloseHandle(paramHandle);
4628 : return -1;
4629 : }
4630 :
4631 : /* Insert temp file name after --fork argument */
4632 : #ifdef _WIN64
4633 : sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4634 : #else
4635 : sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4636 : #endif
4637 : argv[2] = paramHandleStr;
4638 :
4639 : /* Format the cmd line */
4640 : cmdLine[sizeof(cmdLine) - 1] = '\0';
4641 : cmdLine[sizeof(cmdLine) - 2] = '\0';
4642 : snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4643 : i = 0;
4644 : while (argv[++i] != NULL)
4645 : {
4646 : j = strlen(cmdLine);
4647 : snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4648 : }
4649 : if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4650 : {
4651 : ereport(LOG,
4652 : (errmsg("subprocess command line too long")));
4653 : UnmapViewOfFile(param);
4654 : CloseHandle(paramHandle);
4655 : return -1;
4656 : }
4657 :
4658 : memset(&pi, 0, sizeof(pi));
4659 : memset(&si, 0, sizeof(si));
4660 : si.cb = sizeof(si);
4661 :
4662 : /*
4663 : * Create the subprocess in a suspended state. This will be resumed later,
4664 : * once we have written out the parameter file.
4665 : */
4666 : if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4667 : NULL, NULL, &si, &pi))
4668 : {
4669 : ereport(LOG,
4670 : (errmsg("CreateProcess() call failed: %m (error code %lu)",
4671 : GetLastError())));
4672 : UnmapViewOfFile(param);
4673 : CloseHandle(paramHandle);
4674 : return -1;
4675 : }
4676 :
4677 : if (!save_backend_variables(param, port, worker, pi.hProcess, pi.dwProcessId))
4678 : {
4679 : /*
4680 : * log made by save_backend_variables, but we have to clean up the
4681 : * mess with the half-started process
4682 : */
4683 : if (!TerminateProcess(pi.hProcess, 255))
4684 : ereport(LOG,
4685 : (errmsg_internal("could not terminate unstarted process: error code %lu",
4686 : GetLastError())));
4687 : CloseHandle(pi.hProcess);
4688 : CloseHandle(pi.hThread);
4689 : UnmapViewOfFile(param);
4690 : CloseHandle(paramHandle);
4691 : return -1; /* log made by save_backend_variables */
4692 : }
4693 :
4694 : /* Drop the parameter shared memory that is now inherited to the backend */
4695 : if (!UnmapViewOfFile(param))
4696 : ereport(LOG,
4697 : (errmsg("could not unmap view of backend parameter file: error code %lu",
4698 : GetLastError())));
4699 : if (!CloseHandle(paramHandle))
4700 : ereport(LOG,
4701 : (errmsg("could not close handle to backend parameter file: error code %lu",
4702 : GetLastError())));
4703 :
4704 : /*
4705 : * Reserve the memory region used by our main shared memory segment before
4706 : * we resume the child process. Normally this should succeed, but if ASLR
4707 : * is active then it might sometimes fail due to the stack or heap having
4708 : * gotten mapped into that range. In that case, just terminate the
4709 : * process and retry.
4710 : */
4711 : if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4712 : {
4713 : /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4714 : if (!TerminateProcess(pi.hProcess, 255))
4715 : ereport(LOG,
4716 : (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4717 : GetLastError())));
4718 : CloseHandle(pi.hProcess);
4719 : CloseHandle(pi.hThread);
4720 : if (++retry_count < 100)
4721 : goto retry;
4722 : ereport(LOG,
4723 : (errmsg("giving up after too many tries to reserve shared memory"),
4724 : errhint("This might be caused by ASLR or antivirus software.")));
4725 : return -1;
4726 : }
4727 :
4728 : /*
4729 : * Now that the backend variables are written out, we start the child
4730 : * thread so it can start initializing while we set up the rest of the
4731 : * parent state.
4732 : */
4733 : if (ResumeThread(pi.hThread) == -1)
4734 : {
4735 : if (!TerminateProcess(pi.hProcess, 255))
4736 : {
4737 : ereport(LOG,
4738 : (errmsg_internal("could not terminate unstartable process: error code %lu",
4739 : GetLastError())));
4740 : CloseHandle(pi.hProcess);
4741 : CloseHandle(pi.hThread);
4742 : return -1;
4743 : }
4744 : CloseHandle(pi.hProcess);
4745 : CloseHandle(pi.hThread);
4746 : ereport(LOG,
4747 : (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4748 : GetLastError())));
4749 : return -1;
4750 : }
4751 :
4752 : /*
4753 : * Queue a waiter to signal when this child dies. The wait will be handled
4754 : * automatically by an operating system thread pool. The memory will be
4755 : * freed by a later call to waitpid().
4756 : */
4757 : childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4758 : childinfo->procHandle = pi.hProcess;
4759 : childinfo->procId = pi.dwProcessId;
4760 :
4761 : if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4762 : pi.hProcess,
4763 : pgwin32_deadchild_callback,
4764 : childinfo,
4765 : INFINITE,
4766 : WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4767 : ereport(FATAL,
4768 : (errmsg_internal("could not register process for wait: error code %lu",
4769 : GetLastError())));
4770 :
4771 : /* Don't close pi.hProcess here - waitpid() needs access to it */
4772 :
4773 : CloseHandle(pi.hThread);
4774 :
4775 : return pi.dwProcessId;
4776 : }
4777 : #endif /* WIN32 */
4778 :
4779 :
4780 : /*
4781 : * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4782 : * to what it would be if we'd simply forked on Unix, and then
4783 : * dispatch to the appropriate place.
4784 : *
4785 : * The first two command line arguments are expected to be "--forkFOO"
4786 : * (where FOO indicates which postmaster child we are to become), and
4787 : * the name of a variables file that we can read to load data that would
4788 : * have been inherited by fork() on Unix. Remaining arguments go to the
4789 : * subprocess FooMain() routine.
4790 : */
4791 : void
4792 : SubPostmasterMain(int argc, char *argv[])
4793 : {
4794 : Port *port;
4795 : BackgroundWorker *worker;
4796 :
4797 : /* In EXEC_BACKEND case we will not have inherited these settings */
4798 : IsPostmasterEnvironment = true;
4799 : whereToSendOutput = DestNone;
4800 :
4801 : /* Setup essential subsystems (to ensure elog() behaves sanely) */
4802 : InitializeGUCOptions();
4803 :
4804 : /* Check we got appropriate args */
4805 : if (argc < 3)
4806 : elog(FATAL, "invalid subpostmaster invocation");
4807 :
4808 : /* Read in the variables file */
4809 : read_backend_variables(argv[2], &port, &worker);
4810 :
4811 : /* Close the postmaster's sockets (as soon as we know them) */
4812 : ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4813 :
4814 : /* Setup as postmaster child */
4815 : InitPostmasterChild();
4816 :
4817 : /*
4818 : * If appropriate, physically re-attach to shared memory segment. We want
4819 : * to do this before going any further to ensure that we can attach at the
4820 : * same address the postmaster used. On the other hand, if we choose not
4821 : * to re-attach, we may have other cleanup to do.
4822 : *
4823 : * If testing EXEC_BACKEND on Linux, you should run this as root before
4824 : * starting the postmaster:
4825 : *
4826 : * sysctl -w kernel.randomize_va_space=0
4827 : *
4828 : * This prevents using randomized stack and code addresses that cause the
4829 : * child process's memory map to be different from the parent's, making it
4830 : * sometimes impossible to attach to shared memory at the desired address.
4831 : * Return the setting to its old value (usually '1' or '2') when finished.
4832 : */
4833 : if (strcmp(argv[1], "--forkbackend") == 0 ||
4834 : strcmp(argv[1], "--forkavlauncher") == 0 ||
4835 : strcmp(argv[1], "--forkavworker") == 0 ||
4836 : strcmp(argv[1], "--forkaux") == 0 ||
4837 : strcmp(argv[1], "--forkbgworker") == 0)
4838 : PGSharedMemoryReAttach();
4839 : else
4840 : PGSharedMemoryNoReAttach();
4841 :
4842 : /* Read in remaining GUC variables */
4843 : read_nondefault_variables();
4844 :
4845 : /*
4846 : * Check that the data directory looks valid, which will also check the
4847 : * privileges on the data directory and update our umask and file/group
4848 : * variables for creating files later. Note: this should really be done
4849 : * before we create any files or directories.
4850 : */
4851 : checkDataDir();
4852 :
4853 : /*
4854 : * (re-)read control file, as it contains config. The postmaster will
4855 : * already have read this, but this process doesn't know about that.
4856 : */
4857 : LocalProcessControlFile(false);
4858 :
4859 : /*
4860 : * Reload any libraries that were preloaded by the postmaster. Since we
4861 : * exec'd this process, those libraries didn't come along with us; but we
4862 : * should load them into all child processes to be consistent with the
4863 : * non-EXEC_BACKEND behavior.
4864 : */
4865 : process_shared_preload_libraries();
4866 :
4867 : /* Run backend or appropriate child */
4868 : if (strcmp(argv[1], "--forkbackend") == 0)
4869 : {
4870 : Assert(argc == 3); /* shouldn't be any more args */
4871 :
4872 : /*
4873 : * Need to reinitialize the SSL library in the backend, since the
4874 : * context structures contain function pointers and cannot be passed
4875 : * through the parameter file.
4876 : *
4877 : * If for some reason reload fails (maybe the user installed broken
4878 : * key files), soldier on without SSL; that's better than all
4879 : * connections becoming impossible.
4880 : *
4881 : * XXX should we do this in all child processes? For the moment it's
4882 : * enough to do it in backend children.
4883 : */
4884 : #ifdef USE_SSL
4885 : if (EnableSSL)
4886 : {
4887 : if (secure_initialize(false) == 0)
4888 : LoadedSSL = true;
4889 : else
4890 : ereport(LOG,
4891 : (errmsg("SSL configuration could not be loaded in child process")));
4892 : }
4893 : #endif
4894 :
4895 : /*
4896 : * Perform additional initialization and collect startup packet.
4897 : *
4898 : * We want to do this before InitProcess() for a couple of reasons: 1.
4899 : * so that we aren't eating up a PGPROC slot while waiting on the
4900 : * client. 2. so that if InitProcess() fails due to being out of
4901 : * PGPROC slots, we have already initialized libpq and are able to
4902 : * report the error to the client.
4903 : */
4904 : BackendInitialize(port);
4905 :
4906 : /* Restore basic shared memory pointers */
4907 : InitShmemAccess(UsedShmemSegAddr);
4908 :
4909 : /* And run the backend */
4910 : BackendRun(port); /* does not return */
4911 : }
4912 : if (strcmp(argv[1], "--forkaux") == 0)
4913 : {
4914 : AuxProcType auxtype;
4915 :
4916 : Assert(argc == 4);
4917 :
4918 : /* Restore basic shared memory pointers */
4919 : InitShmemAccess(UsedShmemSegAddr);
4920 :
4921 : auxtype = atoi(argv[3]);
4922 : AuxiliaryProcessMain(auxtype); /* does not return */
4923 : }
4924 : if (strcmp(argv[1], "--forkavlauncher") == 0)
4925 : {
4926 : /* Restore basic shared memory pointers */
4927 : InitShmemAccess(UsedShmemSegAddr);
4928 :
4929 : AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4930 : }
4931 : if (strcmp(argv[1], "--forkavworker") == 0)
4932 : {
4933 : /* Restore basic shared memory pointers */
4934 : InitShmemAccess(UsedShmemSegAddr);
4935 :
4936 : AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4937 : }
4938 : if (strcmp(argv[1], "--forkbgworker") == 0)
4939 : {
4940 : /* do this as early as possible; in particular, before InitProcess() */
4941 : IsBackgroundWorker = true;
4942 :
4943 : /* Restore basic shared memory pointers */
4944 : InitShmemAccess(UsedShmemSegAddr);
4945 :
4946 : MyBgworkerEntry = worker;
4947 : BackgroundWorkerMain();
4948 : }
4949 : if (strcmp(argv[1], "--forklog") == 0)
4950 : {
4951 : /* Do not want to attach to shared memory */
4952 :
4953 : SysLoggerMain(argc, argv); /* does not return */
4954 : }
4955 :
4956 : abort(); /* shouldn't get here */
4957 : }
4958 : #endif /* EXEC_BACKEND */
4959 :
4960 :
4961 : /*
4962 : * ExitPostmaster -- cleanup
4963 : *
4964 : * Do NOT call exit() directly --- always go through here!
4965 : */
4966 : static void
4967 1278 : ExitPostmaster(int status)
4968 : {
4969 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
4970 :
4971 : /*
4972 : * There is no known cause for a postmaster to become multithreaded after
4973 : * startup. Recheck to account for the possibility of unknown causes.
4974 : * This message uses LOG level, because an unclean shutdown at this point
4975 : * would usually not look much different from a clean shutdown.
4976 : */
4977 : if (pthread_is_threaded_np() != 0)
4978 : ereport(LOG,
4979 : (errcode(ERRCODE_INTERNAL_ERROR),
4980 : errmsg_internal("postmaster became multithreaded"),
4981 : errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
4982 : #endif
4983 :
4984 : /* should cleanup shared memory and kill all backends */
4985 :
4986 : /*
4987 : * Not sure of the semantics here. When the Postmaster dies, should the
4988 : * backends all be killed? probably not.
4989 : *
4990 : * MUST -- vadim 05-10-1999
4991 : */
4992 :
4993 1278 : proc_exit(status);
4994 : }
4995 :
4996 : /*
4997 : * Handle pmsignal conditions representing requests from backends,
4998 : * and check for promote and logrotate requests from pg_ctl.
4999 : */
5000 : static void
5001 4756 : process_pm_pmsignal(void)
5002 : {
5003 4756 : pending_pm_pmsignal = false;
5004 :
5005 4756 : ereport(DEBUG2,
5006 : (errmsg_internal("postmaster received pmsignal signal")));
5007 :
5008 : /*
5009 : * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5010 : * unexpected states. If the startup process quickly starts up, completes
5011 : * recovery, exits, we might process the death of the startup process
5012 : * first. We don't want to go back to recovery in that case.
5013 : */
5014 4756 : if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
5015 410 : pmState == PM_STARTUP && Shutdown == NoShutdown)
5016 : {
5017 : /* WAL redo has started. We're out of reinitialization. */
5018 410 : FatalError = false;
5019 410 : AbortStartTime = 0;
5020 :
5021 : /*
5022 : * Start the archiver if we're responsible for (re-)archiving received
5023 : * files.
5024 : */
5025 : Assert(PgArchPID == 0);
5026 410 : if (XLogArchivingAlways())
5027 6 : PgArchPID = StartArchiver();
5028 :
5029 : /*
5030 : * If we aren't planning to enter hot standby mode later, treat
5031 : * RECOVERY_STARTED as meaning we're out of startup, and report status
5032 : * accordingly.
5033 : */
5034 410 : if (!EnableHotStandby)
5035 : {
5036 4 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
5037 : #ifdef USE_SYSTEMD
5038 : sd_notify(0, "READY=1");
5039 : #endif
5040 : }
5041 :
5042 410 : pmState = PM_RECOVERY;
5043 : }
5044 :
5045 4756 : if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
5046 230 : pmState == PM_RECOVERY && Shutdown == NoShutdown)
5047 : {
5048 230 : ereport(LOG,
5049 : (errmsg("database system is ready to accept read-only connections")));
5050 :
5051 : /* Report status */
5052 230 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
5053 : #ifdef USE_SYSTEMD
5054 : sd_notify(0, "READY=1");
5055 : #endif
5056 :
5057 230 : pmState = PM_HOT_STANDBY;
5058 230 : connsAllowed = true;
5059 :
5060 : /* Some workers may be scheduled to start now */
5061 230 : StartWorkerNeeded = true;
5062 : }
5063 :
5064 : /* Process background worker state changes. */
5065 4756 : if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
5066 : {
5067 : /* Accept new worker requests only if not stopping. */
5068 1900 : BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS);
5069 1900 : StartWorkerNeeded = true;
5070 : }
5071 :
5072 4756 : if (StartWorkerNeeded || HaveCrashedWorker)
5073 2134 : maybe_start_bgworkers();
5074 :
5075 : /* Tell syslogger to rotate logfile if requested */
5076 4756 : if (SysLoggerPID != 0)
5077 : {
5078 2 : if (CheckLogrotateSignal())
5079 : {
5080 2 : signal_child(SysLoggerPID, SIGUSR1);
5081 2 : RemoveLogrotateSignalFiles();
5082 : }
5083 0 : else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
5084 : {
5085 0 : signal_child(SysLoggerPID, SIGUSR1);
5086 : }
5087 : }
5088 :
5089 4756 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
5090 0 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5091 : {
5092 : /*
5093 : * Start one iteration of the autovacuum daemon, even if autovacuuming
5094 : * is nominally not enabled. This is so we can have an active defense
5095 : * against transaction ID wraparound. We set a flag for the main loop
5096 : * to do it rather than trying to do it here --- this is because the
5097 : * autovac process itself may send the signal, and we want to handle
5098 : * that by launching another iteration as soon as the current one
5099 : * completes.
5100 : */
5101 0 : start_autovac_launcher = true;
5102 : }
5103 :
5104 4756 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
5105 22 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5106 : {
5107 : /* The autovacuum launcher wants us to start a worker process. */
5108 22 : StartAutovacuumWorker();
5109 : }
5110 :
5111 4756 : if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
5112 : {
5113 : /* Startup Process wants us to start the walreceiver process. */
5114 : /* Start immediately if possible, else remember request for later. */
5115 374 : WalReceiverRequested = true;
5116 374 : MaybeStartWalReceiver();
5117 : }
5118 :
5119 : /*
5120 : * Try to advance postmaster's state machine, if a child requests it.
5121 : *
5122 : * Be careful about the order of this action relative to this function's
5123 : * other actions. Generally, this should be after other actions, in case
5124 : * they have effects PostmasterStateMachine would need to know about.
5125 : * However, we should do it before the CheckPromoteSignal step, which
5126 : * cannot have any (immediate) effect on the state machine, but does
5127 : * depend on what state we're in now.
5128 : */
5129 4756 : if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
5130 : {
5131 1802 : PostmasterStateMachine();
5132 : }
5133 :
5134 4756 : if (StartupPID != 0 &&
5135 1142 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5136 1912 : pmState == PM_HOT_STANDBY) &&
5137 1142 : CheckPromoteSignal())
5138 : {
5139 : /*
5140 : * Tell startup process to finish recovery.
5141 : *
5142 : * Leave the promote signal file in place and let the Startup process
5143 : * do the unlink.
5144 : */
5145 74 : signal_child(StartupPID, SIGUSR2);
5146 : }
5147 4756 : }
5148 :
5149 : /*
5150 : * SIGTERM while processing startup packet.
5151 : *
5152 : * Running proc_exit() from a signal handler would be quite unsafe.
5153 : * However, since we have not yet touched shared memory, we can just
5154 : * pull the plug and exit without running any atexit handlers.
5155 : *
5156 : * One might be tempted to try to send a message, or log one, indicating
5157 : * why we are disconnecting. However, that would be quite unsafe in itself.
5158 : * Also, it seems undesirable to provide clues about the database's state
5159 : * to a client that has not yet completed authentication, or even sent us
5160 : * a startup packet.
5161 : */
5162 : static void
5163 0 : process_startup_packet_die(SIGNAL_ARGS)
5164 : {
5165 0 : _exit(1);
5166 : }
5167 :
5168 : /*
5169 : * Dummy signal handler
5170 : *
5171 : * We use this for signals that we don't actually use in the postmaster,
5172 : * but we do use in backends. If we were to SIG_IGN such signals in the
5173 : * postmaster, then a newly started backend might drop a signal that arrives
5174 : * before it's able to reconfigure its signal processing. (See notes in
5175 : * tcop/postgres.c.)
5176 : */
5177 : static void
5178 0 : dummy_handler(SIGNAL_ARGS)
5179 : {
5180 0 : }
5181 :
5182 : /*
5183 : * Timeout while processing startup packet.
5184 : * As for process_startup_packet_die(), we exit via _exit(1).
5185 : */
5186 : static void
5187 0 : StartupPacketTimeoutHandler(void)
5188 : {
5189 0 : _exit(1);
5190 : }
5191 :
5192 :
5193 : /*
5194 : * Generate a random cancel key.
5195 : */
5196 : static bool
5197 24384 : RandomCancelKey(int32 *cancel_key)
5198 : {
5199 24384 : return pg_strong_random(cancel_key, sizeof(int32));
5200 : }
5201 :
5202 : /*
5203 : * Count up number of child processes of specified types (dead_end children
5204 : * are always excluded).
5205 : */
5206 : static int
5207 31374 : CountChildren(int target)
5208 : {
5209 : dlist_iter iter;
5210 31374 : int cnt = 0;
5211 :
5212 102174 : dlist_foreach(iter, &BackendList)
5213 : {
5214 70800 : Backend *bp = dlist_container(Backend, elem, iter.cur);
5215 :
5216 70800 : if (bp->dead_end)
5217 176 : continue;
5218 :
5219 : /*
5220 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
5221 : * it first and avoid touching shared memory for every child.
5222 : */
5223 70624 : if (target != BACKEND_TYPE_ALL)
5224 : {
5225 : /*
5226 : * Assign bkend_type for any recently announced WAL Sender
5227 : * processes.
5228 : */
5229 6932 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5230 1726 : IsPostmasterChildWalSender(bp->child_slot))
5231 82 : bp->bkend_type = BACKEND_TYPE_WALSND;
5232 :
5233 5206 : if (!(target & bp->bkend_type))
5234 522 : continue;
5235 : }
5236 :
5237 70102 : cnt++;
5238 : }
5239 31374 : return cnt;
5240 : }
5241 :
5242 :
5243 : /*
5244 : * StartChildProcess -- start an auxiliary process for the postmaster
5245 : *
5246 : * "type" determines what kind of child will be started. All child types
5247 : * initially go to AuxiliaryProcessMain, which will handle common setup.
5248 : *
5249 : * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5250 : * to start subprocess.
5251 : */
5252 : static pid_t
5253 5432 : StartChildProcess(AuxProcType type)
5254 : {
5255 : pid_t pid;
5256 :
5257 : #ifdef EXEC_BACKEND
5258 : {
5259 : char *av[10];
5260 : int ac = 0;
5261 : char typebuf[32];
5262 :
5263 : /*
5264 : * Set up command-line arguments for subprocess
5265 : */
5266 : av[ac++] = "postgres";
5267 : av[ac++] = "--forkaux";
5268 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
5269 :
5270 : snprintf(typebuf, sizeof(typebuf), "%d", type);
5271 : av[ac++] = typebuf;
5272 :
5273 : av[ac] = NULL;
5274 : Assert(ac < lengthof(av));
5275 :
5276 : pid = postmaster_forkexec(ac, av);
5277 : }
5278 : #else /* !EXEC_BACKEND */
5279 5432 : pid = fork_process();
5280 :
5281 9146 : if (pid == 0) /* child */
5282 : {
5283 3714 : InitPostmasterChild();
5284 :
5285 : /* Close the postmaster's sockets */
5286 3714 : ClosePostmasterPorts(false);
5287 :
5288 : /* Release postmaster's working memory context */
5289 3714 : MemoryContextSwitchTo(TopMemoryContext);
5290 3714 : MemoryContextDelete(PostmasterContext);
5291 3714 : PostmasterContext = NULL;
5292 :
5293 3714 : AuxiliaryProcessMain(type); /* does not return */
5294 : }
5295 : #endif /* EXEC_BACKEND */
5296 :
5297 5432 : if (pid < 0)
5298 : {
5299 : /* in parent, fork failed */
5300 0 : int save_errno = errno;
5301 :
5302 0 : errno = save_errno;
5303 0 : switch (type)
5304 : {
5305 0 : case StartupProcess:
5306 0 : ereport(LOG,
5307 : (errmsg("could not fork startup process: %m")));
5308 0 : break;
5309 0 : case ArchiverProcess:
5310 0 : ereport(LOG,
5311 : (errmsg("could not fork archiver process: %m")));
5312 0 : break;
5313 0 : case BgWriterProcess:
5314 0 : ereport(LOG,
5315 : (errmsg("could not fork background writer process: %m")));
5316 0 : break;
5317 0 : case CheckpointerProcess:
5318 0 : ereport(LOG,
5319 : (errmsg("could not fork checkpointer process: %m")));
5320 0 : break;
5321 0 : case WalWriterProcess:
5322 0 : ereport(LOG,
5323 : (errmsg("could not fork WAL writer process: %m")));
5324 0 : break;
5325 0 : case WalReceiverProcess:
5326 0 : ereport(LOG,
5327 : (errmsg("could not fork WAL receiver process: %m")));
5328 0 : break;
5329 0 : default:
5330 0 : ereport(LOG,
5331 : (errmsg("could not fork process: %m")));
5332 0 : break;
5333 : }
5334 :
5335 : /*
5336 : * fork failure is fatal during startup, but there's no need to choke
5337 : * immediately if starting other child types fails.
5338 : */
5339 0 : if (type == StartupProcess)
5340 0 : ExitPostmaster(1);
5341 0 : return 0;
5342 : }
5343 :
5344 : /*
5345 : * in parent, successful fork
5346 : */
5347 5432 : return pid;
5348 : }
5349 :
5350 : /*
5351 : * StartAutovacuumWorker
5352 : * Start an autovac worker process.
5353 : *
5354 : * This function is here because it enters the resulting PID into the
5355 : * postmaster's private backends list.
5356 : *
5357 : * NB -- this code very roughly matches BackendStartup.
5358 : */
5359 : static void
5360 22 : StartAutovacuumWorker(void)
5361 : {
5362 : Backend *bn;
5363 :
5364 : /*
5365 : * If not in condition to run a process, don't try, but handle it like a
5366 : * fork failure. This does not normally happen, since the signal is only
5367 : * supposed to be sent by autovacuum launcher when it's OK to do it, but
5368 : * we have to check to avoid race-condition problems during DB state
5369 : * changes.
5370 : */
5371 22 : if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK)
5372 : {
5373 : /*
5374 : * Compute the cancel key that will be assigned to this session. We
5375 : * probably don't need cancel keys for autovac workers, but we'd
5376 : * better have something random in the field to prevent unfriendly
5377 : * people from sending cancels to them.
5378 : */
5379 22 : if (!RandomCancelKey(&MyCancelKey))
5380 : {
5381 0 : ereport(LOG,
5382 : (errcode(ERRCODE_INTERNAL_ERROR),
5383 : errmsg("could not generate random cancel key")));
5384 0 : return;
5385 : }
5386 :
5387 22 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
5388 22 : if (bn)
5389 : {
5390 22 : bn->cancel_key = MyCancelKey;
5391 :
5392 : /* Autovac workers are not dead_end and need a child slot */
5393 22 : bn->dead_end = false;
5394 22 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5395 22 : bn->bgworker_notify = false;
5396 :
5397 22 : bn->pid = StartAutoVacWorker();
5398 22 : if (bn->pid > 0)
5399 : {
5400 22 : bn->bkend_type = BACKEND_TYPE_AUTOVAC;
5401 22 : dlist_push_head(&BackendList, &bn->elem);
5402 : #ifdef EXEC_BACKEND
5403 : ShmemBackendArrayAdd(bn);
5404 : #endif
5405 : /* all OK */
5406 22 : return;
5407 : }
5408 :
5409 : /*
5410 : * fork failed, fall through to report -- actual error message was
5411 : * logged by StartAutoVacWorker
5412 : */
5413 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
5414 0 : pfree(bn);
5415 : }
5416 : else
5417 0 : ereport(LOG,
5418 : (errcode(ERRCODE_OUT_OF_MEMORY),
5419 : errmsg("out of memory")));
5420 : }
5421 :
5422 : /*
5423 : * Report the failure to the launcher, if it's running. (If it's not, we
5424 : * might not even be connected to shared memory, so don't try to call
5425 : * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5426 : * responds to the condition, but we don't do that here, instead waiting
5427 : * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5428 : * quick succession between the autovac launcher and postmaster in case
5429 : * things get ugly.
5430 : */
5431 0 : if (AutoVacPID != 0)
5432 : {
5433 0 : AutoVacWorkerFailed();
5434 0 : avlauncher_needs_signal = true;
5435 : }
5436 : }
5437 :
5438 : /*
5439 : * MaybeStartWalReceiver
5440 : * Start the WAL receiver process, if not running and our state allows.
5441 : *
5442 : * Note: if WalReceiverPID is already nonzero, it might seem that we should
5443 : * clear WalReceiverRequested. However, there's a race condition if the
5444 : * walreceiver terminates and the startup process immediately requests a new
5445 : * one: it's quite possible to get the signal for the request before reaping
5446 : * the dead walreceiver process. Better to risk launching an extra
5447 : * walreceiver than to miss launching one we need. (The walreceiver code
5448 : * has logic to recognize that it should go away if not needed.)
5449 : */
5450 : static void
5451 698 : MaybeStartWalReceiver(void)
5452 : {
5453 698 : if (WalReceiverPID == 0 &&
5454 368 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5455 366 : pmState == PM_HOT_STANDBY) &&
5456 368 : Shutdown <= SmartShutdown)
5457 : {
5458 368 : WalReceiverPID = StartWalReceiver();
5459 368 : if (WalReceiverPID != 0)
5460 368 : WalReceiverRequested = false;
5461 : /* else leave the flag set, so we'll try again later */
5462 : }
5463 698 : }
5464 :
5465 :
5466 : /*
5467 : * Create the opts file
5468 : */
5469 : static bool
5470 1280 : CreateOptsFile(int argc, char *argv[], char *fullprogname)
5471 : {
5472 : FILE *fp;
5473 : int i;
5474 :
5475 : #define OPTS_FILE "postmaster.opts"
5476 :
5477 1280 : if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5478 : {
5479 0 : ereport(LOG,
5480 : (errcode_for_file_access(),
5481 : errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5482 0 : return false;
5483 : }
5484 :
5485 1280 : fprintf(fp, "%s", fullprogname);
5486 6220 : for (i = 1; i < argc; i++)
5487 4940 : fprintf(fp, " \"%s\"", argv[i]);
5488 1280 : fputs("\n", fp);
5489 :
5490 1280 : if (fclose(fp))
5491 : {
5492 0 : ereport(LOG,
5493 : (errcode_for_file_access(),
5494 : errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5495 0 : return false;
5496 : }
5497 :
5498 1280 : return true;
5499 : }
5500 :
5501 :
5502 : /*
5503 : * MaxLivePostmasterChildren
5504 : *
5505 : * This reports the number of entries needed in per-child-process arrays
5506 : * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5507 : * These arrays include regular backends, autovac workers, walsenders
5508 : * and background workers, but not special children nor dead_end children.
5509 : * This allows the arrays to have a fixed maximum size, to wit the same
5510 : * too-many-children limit enforced by canAcceptConnections(). The exact value
5511 : * isn't too critical as long as it's more than MaxBackends.
5512 : */
5513 : int
5514 31810 : MaxLivePostmasterChildren(void)
5515 : {
5516 63620 : return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5517 31810 : max_wal_senders + max_worker_processes);
5518 : }
5519 :
5520 : /*
5521 : * Connect background worker to a database.
5522 : */
5523 : void
5524 648 : BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
5525 : {
5526 648 : BackgroundWorker *worker = MyBgworkerEntry;
5527 648 : bits32 init_flags = 0; /* never honor session_preload_libraries */
5528 :
5529 : /* ignore datallowconn? */
5530 648 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
5531 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5532 : /* ignore rolcanlogin? */
5533 648 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5534 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5535 :
5536 : /* XXX is this the right errcode? */
5537 648 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5538 0 : ereport(FATAL,
5539 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5540 : errmsg("database connection requirement not indicated during registration")));
5541 :
5542 648 : InitPostgres(dbname, InvalidOid, /* database to connect to */
5543 : username, InvalidOid, /* role to connect as */
5544 : init_flags,
5545 : NULL); /* no out_dbname */
5546 :
5547 : /* it had better not gotten out of "init" mode yet */
5548 648 : if (!IsInitProcessingMode())
5549 0 : ereport(ERROR,
5550 : (errmsg("invalid processing mode in background worker")));
5551 648 : SetProcessingMode(NormalProcessing);
5552 648 : }
5553 :
5554 : /*
5555 : * Connect background worker to a database using OIDs.
5556 : */
5557 : void
5558 3284 : BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
5559 : {
5560 3284 : BackgroundWorker *worker = MyBgworkerEntry;
5561 3284 : bits32 init_flags = 0; /* never honor session_preload_libraries */
5562 :
5563 : /* ignore datallowconn? */
5564 3284 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
5565 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5566 : /* ignore rolcanlogin? */
5567 3284 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5568 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5569 :
5570 : /* XXX is this the right errcode? */
5571 3284 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5572 0 : ereport(FATAL,
5573 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5574 : errmsg("database connection requirement not indicated during registration")));
5575 :
5576 3284 : InitPostgres(NULL, dboid, /* database to connect to */
5577 : NULL, useroid, /* role to connect as */
5578 : init_flags,
5579 : NULL); /* no out_dbname */
5580 :
5581 : /* it had better not gotten out of "init" mode yet */
5582 3272 : if (!IsInitProcessingMode())
5583 0 : ereport(ERROR,
5584 : (errmsg("invalid processing mode in background worker")));
5585 3272 : SetProcessingMode(NormalProcessing);
5586 3272 : }
5587 :
5588 : /*
5589 : * Block/unblock signals in a background worker
5590 : */
5591 : void
5592 0 : BackgroundWorkerBlockSignals(void)
5593 : {
5594 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
5595 0 : }
5596 :
5597 : void
5598 4086 : BackgroundWorkerUnblockSignals(void)
5599 : {
5600 4086 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
5601 4086 : }
5602 :
5603 : #ifdef EXEC_BACKEND
5604 : static pid_t
5605 : bgworker_forkexec(BackgroundWorker *worker)
5606 : {
5607 : char *av[10];
5608 : int ac = 0;
5609 :
5610 : av[ac++] = "postgres";
5611 : av[ac++] = "--forkbgworker";
5612 : av[ac++] = NULL; /* filled in by internal_forkexec */
5613 : av[ac] = NULL;
5614 :
5615 : Assert(ac < lengthof(av));
5616 :
5617 : return internal_forkexec(ac, av, NULL, worker);
5618 : }
5619 : #endif
5620 :
5621 : /*
5622 : * Start a new bgworker.
5623 : * Starting time conditions must have been checked already.
5624 : *
5625 : * Returns true on success, false on failure.
5626 : * In either case, update the RegisteredBgWorker's state appropriately.
5627 : *
5628 : * This code is heavily based on autovacuum.c, q.v.
5629 : */
5630 : static bool
5631 4436 : do_start_bgworker(RegisteredBgWorker *rw)
5632 : {
5633 : pid_t worker_pid;
5634 :
5635 : Assert(rw->rw_pid == 0);
5636 :
5637 : /*
5638 : * Allocate and assign the Backend element. Note we must do this before
5639 : * forking, so that we can handle failures (out of memory or child-process
5640 : * slots) cleanly.
5641 : *
5642 : * Treat failure as though the worker had crashed. That way, the
5643 : * postmaster will wait a bit before attempting to start it again; if we
5644 : * tried again right away, most likely we'd find ourselves hitting the
5645 : * same resource-exhaustion condition.
5646 : */
5647 4436 : if (!assign_backendlist_entry(rw))
5648 : {
5649 0 : rw->rw_crashed_at = GetCurrentTimestamp();
5650 0 : return false;
5651 : }
5652 :
5653 4436 : ereport(DEBUG1,
5654 : (errmsg_internal("starting background worker process \"%s\"",
5655 : rw->rw_worker.bgw_name)));
5656 :
5657 : #ifdef EXEC_BACKEND
5658 : switch ((worker_pid = bgworker_forkexec(&rw->rw_worker)))
5659 : #else
5660 4436 : switch ((worker_pid = fork_process()))
5661 : #endif
5662 : {
5663 0 : case -1:
5664 : /* in postmaster, fork failed ... */
5665 0 : ereport(LOG,
5666 : (errmsg("could not fork worker process: %m")));
5667 : /* undo what assign_backendlist_entry did */
5668 0 : ReleasePostmasterChildSlot(rw->rw_child_slot);
5669 0 : rw->rw_child_slot = 0;
5670 0 : pfree(rw->rw_backend);
5671 0 : rw->rw_backend = NULL;
5672 : /* mark entry as crashed, so we'll try again later */
5673 0 : rw->rw_crashed_at = GetCurrentTimestamp();
5674 0 : break;
5675 :
5676 : #ifndef EXEC_BACKEND
5677 3950 : case 0:
5678 : /* in postmaster child ... */
5679 3950 : InitPostmasterChild();
5680 :
5681 : /* Close the postmaster's sockets */
5682 3950 : ClosePostmasterPorts(false);
5683 :
5684 : /*
5685 : * Before blowing away PostmasterContext, save this bgworker's
5686 : * data where it can find it.
5687 : */
5688 3950 : MyBgworkerEntry = (BackgroundWorker *)
5689 3950 : MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
5690 3950 : memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5691 :
5692 : /* Release postmaster's working memory context */
5693 3950 : MemoryContextSwitchTo(TopMemoryContext);
5694 3950 : MemoryContextDelete(PostmasterContext);
5695 3950 : PostmasterContext = NULL;
5696 :
5697 3950 : BackgroundWorkerMain();
5698 :
5699 : exit(1); /* should not get here */
5700 : break;
5701 : #endif
5702 4432 : default:
5703 : /* in postmaster, fork successful ... */
5704 4432 : rw->rw_pid = worker_pid;
5705 4432 : rw->rw_backend->pid = rw->rw_pid;
5706 4432 : ReportBackgroundWorkerPID(rw);
5707 : /* add new worker to lists of backends */
5708 4432 : dlist_push_head(&BackendList, &rw->rw_backend->elem);
5709 : #ifdef EXEC_BACKEND
5710 : ShmemBackendArrayAdd(rw->rw_backend);
5711 : #endif
5712 4432 : return true;
5713 : }
5714 :
5715 0 : return false;
5716 : }
5717 :
5718 : /*
5719 : * Does the current postmaster state require starting a worker with the
5720 : * specified start_time?
5721 : */
5722 : static bool
5723 5926 : bgworker_should_start_now(BgWorkerStartTime start_time)
5724 : {
5725 5926 : switch (pmState)
5726 : {
5727 0 : case PM_NO_CHILDREN:
5728 : case PM_WAIT_DEAD_END:
5729 : case PM_SHUTDOWN_2:
5730 : case PM_SHUTDOWN:
5731 : case PM_WAIT_BACKENDS:
5732 : case PM_STOP_BACKENDS:
5733 0 : break;
5734 :
5735 4436 : case PM_RUN:
5736 4436 : if (start_time == BgWorkerStart_RecoveryFinished)
5737 1822 : return true;
5738 : /* fall through */
5739 :
5740 : case PM_HOT_STANDBY:
5741 2844 : if (start_time == BgWorkerStart_ConsistentState)
5742 2614 : return true;
5743 : /* fall through */
5744 :
5745 : case PM_RECOVERY:
5746 : case PM_STARTUP:
5747 : case PM_INIT:
5748 1490 : if (start_time == BgWorkerStart_PostmasterStart)
5749 0 : return true;
5750 : /* fall through */
5751 : }
5752 :
5753 1490 : return false;
5754 : }
5755 :
5756 : /*
5757 : * Allocate the Backend struct for a connected background worker, but don't
5758 : * add it to the list of backends just yet.
5759 : *
5760 : * On failure, return false without changing any worker state.
5761 : *
5762 : * Some info from the Backend is copied into the passed rw.
5763 : */
5764 : static bool
5765 4436 : assign_backendlist_entry(RegisteredBgWorker *rw)
5766 : {
5767 : Backend *bn;
5768 :
5769 : /*
5770 : * Check that database state allows another connection. Currently the
5771 : * only possible failure is CAC_TOOMANY, so we just log an error message
5772 : * based on that rather than checking the error code precisely.
5773 : */
5774 4436 : if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK)
5775 : {
5776 0 : ereport(LOG,
5777 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
5778 : errmsg("no slot available for new worker process")));
5779 0 : return false;
5780 : }
5781 :
5782 : /*
5783 : * Compute the cancel key that will be assigned to this session. We
5784 : * probably don't need cancel keys for background workers, but we'd better
5785 : * have something random in the field to prevent unfriendly people from
5786 : * sending cancels to them.
5787 : */
5788 4436 : if (!RandomCancelKey(&MyCancelKey))
5789 : {
5790 0 : ereport(LOG,
5791 : (errcode(ERRCODE_INTERNAL_ERROR),
5792 : errmsg("could not generate random cancel key")));
5793 0 : return false;
5794 : }
5795 :
5796 4436 : bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
5797 4436 : if (bn == NULL)
5798 : {
5799 0 : ereport(LOG,
5800 : (errcode(ERRCODE_OUT_OF_MEMORY),
5801 : errmsg("out of memory")));
5802 0 : return false;
5803 : }
5804 :
5805 4436 : bn->cancel_key = MyCancelKey;
5806 4436 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5807 4436 : bn->bkend_type = BACKEND_TYPE_BGWORKER;
5808 4436 : bn->dead_end = false;
5809 4436 : bn->bgworker_notify = false;
5810 :
5811 4436 : rw->rw_backend = bn;
5812 4436 : rw->rw_child_slot = bn->child_slot;
5813 :
5814 4436 : return true;
5815 : }
5816 :
5817 : /*
5818 : * If the time is right, start background worker(s).
5819 : *
5820 : * As a side effect, the bgworker control variables are set or reset
5821 : * depending on whether more workers may need to be started.
5822 : *
5823 : * We limit the number of workers started per call, to avoid consuming the
5824 : * postmaster's attention for too long when many such requests are pending.
5825 : * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5826 : * call this function again after dealing with any other issues.
5827 : */
5828 : static void
5829 10252 : maybe_start_bgworkers(void)
5830 : {
5831 : #define MAX_BGWORKERS_TO_LAUNCH 100
5832 10252 : int num_launched = 0;
5833 10252 : TimestampTz now = 0;
5834 : slist_mutable_iter iter;
5835 :
5836 : /*
5837 : * During crash recovery, we have no need to be called until the state
5838 : * transition out of recovery.
5839 : */
5840 10252 : if (FatalError)
5841 : {
5842 8 : StartWorkerNeeded = false;
5843 8 : HaveCrashedWorker = false;
5844 8 : return;
5845 : }
5846 :
5847 : /* Don't need to be called again unless we find a reason for it below */
5848 10244 : StartWorkerNeeded = false;
5849 10244 : HaveCrashedWorker = false;
5850 :
5851 28372 : slist_foreach_modify(iter, &BackgroundWorkerList)
5852 : {
5853 : RegisteredBgWorker *rw;
5854 :
5855 18132 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5856 :
5857 : /* ignore if already running */
5858 18132 : if (rw->rw_pid != 0)
5859 9580 : continue;
5860 :
5861 : /* if marked for death, clean up and remove from list */
5862 8552 : if (rw->rw_terminate)
5863 : {
5864 0 : ForgetBackgroundWorker(&iter);
5865 0 : continue;
5866 : }
5867 :
5868 : /*
5869 : * If this worker has crashed previously, maybe it needs to be
5870 : * restarted (unless on registration it specified it doesn't want to
5871 : * be restarted at all). Check how long ago did a crash last happen.
5872 : * If the last crash is too recent, don't start it right away; let it
5873 : * be restarted once enough time has passed.
5874 : */
5875 8552 : if (rw->rw_crashed_at != 0)
5876 : {
5877 2626 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
5878 : {
5879 : int notify_pid;
5880 :
5881 18 : notify_pid = rw->rw_worker.bgw_notify_pid;
5882 :
5883 18 : ForgetBackgroundWorker(&iter);
5884 :
5885 : /* Report worker is gone now. */
5886 18 : if (notify_pid != 0)
5887 4 : kill(notify_pid, SIGUSR1);
5888 :
5889 18 : continue;
5890 : }
5891 :
5892 : /* read system time only when needed */
5893 2608 : if (now == 0)
5894 2608 : now = GetCurrentTimestamp();
5895 :
5896 2608 : if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
5897 2608 : rw->rw_worker.bgw_restart_time * 1000))
5898 : {
5899 : /* Set flag to remember that we have workers to start later */
5900 2608 : HaveCrashedWorker = true;
5901 2608 : continue;
5902 : }
5903 : }
5904 :
5905 5926 : if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
5906 : {
5907 : /* reset crash time before trying to start worker */
5908 4436 : rw->rw_crashed_at = 0;
5909 :
5910 : /*
5911 : * Try to start the worker.
5912 : *
5913 : * On failure, give up processing workers for now, but set
5914 : * StartWorkerNeeded so we'll come back here on the next iteration
5915 : * of ServerLoop to try again. (We don't want to wait, because
5916 : * there might be additional ready-to-run workers.) We could set
5917 : * HaveCrashedWorker as well, since this worker is now marked
5918 : * crashed, but there's no need because the next run of this
5919 : * function will do that.
5920 : */
5921 4436 : if (!do_start_bgworker(rw))
5922 : {
5923 0 : StartWorkerNeeded = true;
5924 0 : return;
5925 : }
5926 :
5927 : /*
5928 : * If we've launched as many workers as allowed, quit, but have
5929 : * ServerLoop call us again to look for additional ready-to-run
5930 : * workers. There might not be any, but we'll find out the next
5931 : * time we run.
5932 : */
5933 4432 : if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
5934 : {
5935 0 : StartWorkerNeeded = true;
5936 0 : return;
5937 : }
5938 : }
5939 : }
5940 : }
5941 :
5942 : /*
5943 : * When a backend asks to be notified about worker state changes, we
5944 : * set a flag in its backend entry. The background worker machinery needs
5945 : * to know when such backends exit.
5946 : */
5947 : bool
5948 3322 : PostmasterMarkPIDForWorkerNotify(int pid)
5949 : {
5950 : dlist_iter iter;
5951 : Backend *bp;
5952 :
5953 7026 : dlist_foreach(iter, &BackendList)
5954 : {
5955 7026 : bp = dlist_container(Backend, elem, iter.cur);
5956 7026 : if (bp->pid == pid)
5957 : {
5958 3322 : bp->bgworker_notify = true;
5959 3322 : return true;
5960 : }
5961 : }
5962 0 : return false;
5963 : }
5964 :
5965 : #ifdef EXEC_BACKEND
5966 :
5967 : /*
5968 : * The following need to be available to the save/restore_backend_variables
5969 : * functions. They are marked NON_EXEC_STATIC in their home modules.
5970 : */
5971 : extern slock_t *ShmemLock;
5972 : extern slock_t *ProcStructLock;
5973 : extern PGPROC *AuxiliaryProcs;
5974 : extern PMSignalData *PMSignalState;
5975 : extern pg_time_t first_syslogger_file_time;
5976 :
5977 : #ifndef WIN32
5978 : #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
5979 : #define read_inheritable_socket(dest, src) (*(dest) = *(src))
5980 : #else
5981 : static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
5982 : static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
5983 : pid_t childPid);
5984 : static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
5985 : #endif
5986 :
5987 :
5988 : /* Save critical backend variables into the BackendParameters struct */
5989 : #ifndef WIN32
5990 : static bool
5991 : save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker)
5992 : #else
5993 : static bool
5994 : save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *worker,
5995 : HANDLE childProcess, pid_t childPid)
5996 : #endif
5997 : {
5998 : if (port)
5999 : {
6000 : memcpy(¶m->port, port, sizeof(Port));
6001 : if (!write_inheritable_socket(¶m->portsocket, port->sock, childPid))
6002 : return false;
6003 : param->has_port = true;
6004 : }
6005 : else
6006 : {
6007 : memset(¶m->port, 0, sizeof(Port));
6008 : param->has_port = false;
6009 : }
6010 :
6011 : if (worker)
6012 : {
6013 : memcpy(¶m->bgworker, worker, sizeof(BackgroundWorker));
6014 : param->has_bgworker = true;
6015 : }
6016 : else
6017 : {
6018 : memset(¶m->bgworker, 0, sizeof(BackgroundWorker));
6019 : param->has_bgworker = false;
6020 : }
6021 :
6022 : strlcpy(param->DataDir, DataDir, MAXPGPATH);
6023 :
6024 : param->MyCancelKey = MyCancelKey;
6025 : param->MyPMChildSlot = MyPMChildSlot;
6026 :
6027 : #ifdef WIN32
6028 : param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6029 : #endif
6030 : param->UsedShmemSegID = UsedShmemSegID;
6031 : param->UsedShmemSegAddr = UsedShmemSegAddr;
6032 :
6033 : param->ShmemLock = ShmemLock;
6034 : param->ShmemBackendArray = ShmemBackendArray;
6035 :
6036 : #ifndef HAVE_SPINLOCKS
6037 : param->SpinlockSemaArray = SpinlockSemaArray;
6038 : #endif
6039 : param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
6040 : param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
6041 : param->MainLWLockArray = MainLWLockArray;
6042 : param->ProcStructLock = ProcStructLock;
6043 : param->ProcGlobal = ProcGlobal;
6044 : param->AuxiliaryProcs = AuxiliaryProcs;
6045 : param->PreparedXactProcs = PreparedXactProcs;
6046 : param->PMSignalState = PMSignalState;
6047 :
6048 : param->PostmasterPid = PostmasterPid;
6049 : param->PgStartTime = PgStartTime;
6050 : param->PgReloadTime = PgReloadTime;
6051 : param->first_syslogger_file_time = first_syslogger_file_time;
6052 :
6053 : param->redirection_done = redirection_done;
6054 : param->IsBinaryUpgrade = IsBinaryUpgrade;
6055 : param->query_id_enabled = query_id_enabled;
6056 : param->max_safe_fds = max_safe_fds;
6057 :
6058 : param->MaxBackends = MaxBackends;
6059 :
6060 : #ifdef WIN32
6061 : param->PostmasterHandle = PostmasterHandle;
6062 : if (!write_duplicated_handle(¶m->initial_signal_pipe,
6063 : pgwin32_create_signal_listener(childPid),
6064 : childProcess))
6065 : return false;
6066 : #else
6067 : memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds,
6068 : sizeof(postmaster_alive_fds));
6069 : #endif
6070 :
6071 : memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe));
6072 :
6073 : strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
6074 :
6075 : strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
6076 :
6077 : return true;
6078 : }
6079 :
6080 :
6081 : #ifdef WIN32
6082 : /*
6083 : * Duplicate a handle for usage in a child process, and write the child
6084 : * process instance of the handle to the parameter file.
6085 : */
6086 : static bool
6087 : write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
6088 : {
6089 : HANDLE hChild = INVALID_HANDLE_VALUE;
6090 :
6091 : if (!DuplicateHandle(GetCurrentProcess(),
6092 : src,
6093 : childProcess,
6094 : &hChild,
6095 : 0,
6096 : TRUE,
6097 : DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
6098 : {
6099 : ereport(LOG,
6100 : (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
6101 : GetLastError())));
6102 : return false;
6103 : }
6104 :
6105 : *dest = hChild;
6106 : return true;
6107 : }
6108 :
6109 : /*
6110 : * Duplicate a socket for usage in a child process, and write the resulting
6111 : * structure to the parameter file.
6112 : * This is required because a number of LSPs (Layered Service Providers) very
6113 : * common on Windows (antivirus, firewalls, download managers etc) break
6114 : * straight socket inheritance.
6115 : */
6116 : static bool
6117 : write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
6118 : {
6119 : dest->origsocket = src;
6120 : if (src != 0 && src != PGINVALID_SOCKET)
6121 : {
6122 : /* Actual socket */
6123 : if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
6124 : {
6125 : ereport(LOG,
6126 : (errmsg("could not duplicate socket %d for use in backend: error code %d",
6127 : (int) src, WSAGetLastError())));
6128 : return false;
6129 : }
6130 : }
6131 : return true;
6132 : }
6133 :
6134 : /*
6135 : * Read a duplicate socket structure back, and get the socket descriptor.
6136 : */
6137 : static void
6138 : read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
6139 : {
6140 : SOCKET s;
6141 :
6142 : if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
6143 : {
6144 : /* Not a real socket! */
6145 : *dest = src->origsocket;
6146 : }
6147 : else
6148 : {
6149 : /* Actual socket, so create from structure */
6150 : s = WSASocket(FROM_PROTOCOL_INFO,
6151 : FROM_PROTOCOL_INFO,
6152 : FROM_PROTOCOL_INFO,
6153 : &src->wsainfo,
6154 : 0,
6155 : 0);
6156 : if (s == INVALID_SOCKET)
6157 : {
6158 : write_stderr("could not create inherited socket: error code %d\n",
6159 : WSAGetLastError());
6160 : exit(1);
6161 : }
6162 : *dest = s;
6163 :
6164 : /*
6165 : * To make sure we don't get two references to the same socket, close
6166 : * the original one. (This would happen when inheritance actually
6167 : * works..
6168 : */
6169 : closesocket(src->origsocket);
6170 : }
6171 : }
6172 : #endif
6173 :
6174 : static void
6175 : read_backend_variables(char *id, Port **port, BackgroundWorker **worker)
6176 : {
6177 : BackendParameters param;
6178 :
6179 : #ifndef WIN32
6180 : /* Non-win32 implementation reads from file */
6181 : FILE *fp;
6182 :
6183 : /* Open file */
6184 : fp = AllocateFile(id, PG_BINARY_R);
6185 : if (!fp)
6186 : {
6187 : write_stderr("could not open backend variables file \"%s\": %s\n",
6188 : id, strerror(errno));
6189 : exit(1);
6190 : }
6191 :
6192 : if (fread(¶m, sizeof(param), 1, fp) != 1)
6193 : {
6194 : write_stderr("could not read from backend variables file \"%s\": %s\n",
6195 : id, strerror(errno));
6196 : exit(1);
6197 : }
6198 :
6199 : /* Release file */
6200 : FreeFile(fp);
6201 : if (unlink(id) != 0)
6202 : {
6203 : write_stderr("could not remove file \"%s\": %s\n",
6204 : id, strerror(errno));
6205 : exit(1);
6206 : }
6207 : #else
6208 : /* Win32 version uses mapped file */
6209 : HANDLE paramHandle;
6210 : BackendParameters *paramp;
6211 :
6212 : #ifdef _WIN64
6213 : paramHandle = (HANDLE) _atoi64(id);
6214 : #else
6215 : paramHandle = (HANDLE) atol(id);
6216 : #endif
6217 : paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
6218 : if (!paramp)
6219 : {
6220 : write_stderr("could not map view of backend variables: error code %lu\n",
6221 : GetLastError());
6222 : exit(1);
6223 : }
6224 :
6225 : memcpy(¶m, paramp, sizeof(BackendParameters));
6226 :
6227 : if (!UnmapViewOfFile(paramp))
6228 : {
6229 : write_stderr("could not unmap view of backend variables: error code %lu\n",
6230 : GetLastError());
6231 : exit(1);
6232 : }
6233 :
6234 : if (!CloseHandle(paramHandle))
6235 : {
6236 : write_stderr("could not close handle to backend parameter variables: error code %lu\n",
6237 : GetLastError());
6238 : exit(1);
6239 : }
6240 : #endif
6241 :
6242 : restore_backend_variables(¶m, port, worker);
6243 : }
6244 :
6245 : /* Restore critical backend variables from the BackendParameters struct */
6246 : static void
6247 : restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorker **worker)
6248 : {
6249 : if (param->has_port)
6250 : {
6251 : *port = (Port *) MemoryContextAlloc(TopMemoryContext, sizeof(Port));
6252 : memcpy(*port, ¶m->port, sizeof(Port));
6253 : read_inheritable_socket(&(*port)->sock, ¶m->portsocket);
6254 : }
6255 : else
6256 : *port = NULL;
6257 :
6258 : if (param->has_bgworker)
6259 : {
6260 : *worker = (BackgroundWorker *)
6261 : MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
6262 : memcpy(*worker, ¶m->bgworker, sizeof(BackgroundWorker));
6263 : }
6264 : else
6265 : *worker = NULL;
6266 :
6267 : SetDataDir(param->DataDir);
6268 :
6269 : MyCancelKey = param->MyCancelKey;
6270 : MyPMChildSlot = param->MyPMChildSlot;
6271 :
6272 : #ifdef WIN32
6273 : ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6274 : #endif
6275 : UsedShmemSegID = param->UsedShmemSegID;
6276 : UsedShmemSegAddr = param->UsedShmemSegAddr;
6277 :
6278 : ShmemLock = param->ShmemLock;
6279 : ShmemBackendArray = param->ShmemBackendArray;
6280 :
6281 : #ifndef HAVE_SPINLOCKS
6282 : SpinlockSemaArray = param->SpinlockSemaArray;
6283 : #endif
6284 : NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
6285 : NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
6286 : MainLWLockArray = param->MainLWLockArray;
6287 : ProcStructLock = param->ProcStructLock;
6288 : ProcGlobal = param->ProcGlobal;
6289 : AuxiliaryProcs = param->AuxiliaryProcs;
6290 : PreparedXactProcs = param->PreparedXactProcs;
6291 : PMSignalState = param->PMSignalState;
6292 :
6293 : PostmasterPid = param->PostmasterPid;
6294 : PgStartTime = param->PgStartTime;
6295 : PgReloadTime = param->PgReloadTime;
6296 : first_syslogger_file_time = param->first_syslogger_file_time;
6297 :
6298 : redirection_done = param->redirection_done;
6299 : IsBinaryUpgrade = param->IsBinaryUpgrade;
6300 : query_id_enabled = param->query_id_enabled;
6301 : max_safe_fds = param->max_safe_fds;
6302 :
6303 : MaxBackends = param->MaxBackends;
6304 :
6305 : #ifdef WIN32
6306 : PostmasterHandle = param->PostmasterHandle;
6307 : pgwin32_initial_signal_pipe = param->initial_signal_pipe;
6308 : #else
6309 : memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds,
6310 : sizeof(postmaster_alive_fds));
6311 : #endif
6312 :
6313 : memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe));
6314 :
6315 : strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
6316 :
6317 : strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
6318 :
6319 : /*
6320 : * We need to restore fd.c's counts of externally-opened FDs; to avoid
6321 : * confusion, be sure to do this after restoring max_safe_fds. (Note:
6322 : * BackendInitialize will handle this for port->sock.)
6323 : */
6324 : #ifndef WIN32
6325 : if (postmaster_alive_fds[0] >= 0)
6326 : ReserveExternalFD();
6327 : if (postmaster_alive_fds[1] >= 0)
6328 : ReserveExternalFD();
6329 : #endif
6330 : }
6331 :
6332 :
6333 : Size
6334 : ShmemBackendArraySize(void)
6335 : {
6336 : return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
6337 : }
6338 :
6339 : void
6340 : ShmemBackendArrayAllocation(void)
6341 : {
6342 : Size size = ShmemBackendArraySize();
6343 :
6344 : ShmemBackendArray = (Backend *) ShmemAlloc(size);
6345 : /* Mark all slots as empty */
6346 : memset(ShmemBackendArray, 0, size);
6347 : }
6348 :
6349 : static void
6350 : ShmemBackendArrayAdd(Backend *bn)
6351 : {
6352 : /* The array slot corresponding to my PMChildSlot should be free */
6353 : int i = bn->child_slot - 1;
6354 :
6355 : Assert(ShmemBackendArray[i].pid == 0);
6356 : ShmemBackendArray[i] = *bn;
6357 : }
6358 :
6359 : static void
6360 : ShmemBackendArrayRemove(Backend *bn)
6361 : {
6362 : int i = bn->child_slot - 1;
6363 :
6364 : Assert(ShmemBackendArray[i].pid == bn->pid);
6365 : /* Mark the slot as empty */
6366 : ShmemBackendArray[i].pid = 0;
6367 : }
6368 : #endif /* EXEC_BACKEND */
6369 :
6370 :
6371 : #ifdef WIN32
6372 :
6373 : /*
6374 : * Subset implementation of waitpid() for Windows. We assume pid is -1
6375 : * (that is, check all child processes) and options is WNOHANG (don't wait).
6376 : */
6377 : static pid_t
6378 : waitpid(pid_t pid, int *exitstatus, int options)
6379 : {
6380 : win32_deadchild_waitinfo *childinfo;
6381 : DWORD exitcode;
6382 : DWORD dwd;
6383 : ULONG_PTR key;
6384 : OVERLAPPED *ovl;
6385 :
6386 : /* Try to consume one win32_deadchild_waitinfo from the queue. */
6387 : if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
6388 : {
6389 : errno = EAGAIN;
6390 : return -1;
6391 : }
6392 :
6393 : childinfo = (win32_deadchild_waitinfo *) key;
6394 : pid = childinfo->procId;
6395 :
6396 : /*
6397 : * Remove handle from wait - required even though it's set to wait only
6398 : * once
6399 : */
6400 : UnregisterWaitEx(childinfo->waitHandle, NULL);
6401 :
6402 : if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
6403 : {
6404 : /*
6405 : * Should never happen. Inform user and set a fixed exitcode.
6406 : */
6407 : write_stderr("could not read exit code for process\n");
6408 : exitcode = 255;
6409 : }
6410 : *exitstatus = exitcode;
6411 :
6412 : /*
6413 : * Close the process handle. Only after this point can the PID can be
6414 : * recycled by the kernel.
6415 : */
6416 : CloseHandle(childinfo->procHandle);
6417 :
6418 : /*
6419 : * Free struct that was allocated before the call to
6420 : * RegisterWaitForSingleObject()
6421 : */
6422 : pfree(childinfo);
6423 :
6424 : return pid;
6425 : }
6426 :
6427 : /*
6428 : * Note! Code below executes on a thread pool! All operations must
6429 : * be thread safe! Note that elog() and friends must *not* be used.
6430 : */
6431 : static void WINAPI
6432 : pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
6433 : {
6434 : /* Should never happen, since we use INFINITE as timeout value. */
6435 : if (TimerOrWaitFired)
6436 : return;
6437 :
6438 : /*
6439 : * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If
6440 : * that fails, we leak the object, but we also leak a whole process and
6441 : * get into an unrecoverable state, so there's not much point in worrying
6442 : * about that. We'd like to panic, but we can't use that infrastructure
6443 : * from this thread.
6444 : */
6445 : if (!PostQueuedCompletionStatus(win32ChildQueue,
6446 : 0,
6447 : (ULONG_PTR) lpParameter,
6448 : NULL))
6449 : write_stderr("could not post child completion status\n");
6450 :
6451 : /* Queue SIGCHLD signal. */
6452 : pg_queue_signal(SIGCHLD);
6453 : }
6454 : #endif /* WIN32 */
6455 :
6456 : /*
6457 : * Initialize one and only handle for monitoring postmaster death.
6458 : *
6459 : * Called once in the postmaster, so that child processes can subsequently
6460 : * monitor if their parent is dead.
6461 : */
6462 : static void
6463 1280 : InitPostmasterDeathWatchHandle(void)
6464 : {
6465 : #ifndef WIN32
6466 :
6467 : /*
6468 : * Create a pipe. Postmaster holds the write end of the pipe open
6469 : * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
6470 : * the read file descriptor to select() to wake up in case postmaster
6471 : * dies, or check for postmaster death with a (read() == 0). Children must
6472 : * close the write end as soon as possible after forking, because EOF
6473 : * won't be signaled in the read end until all processes have closed the
6474 : * write fd. That is taken care of in ClosePostmasterPorts().
6475 : */
6476 : Assert(MyProcPid == PostmasterPid);
6477 1280 : if (pipe(postmaster_alive_fds) < 0)
6478 0 : ereport(FATAL,
6479 : (errcode_for_file_access(),
6480 : errmsg_internal("could not create pipe to monitor postmaster death: %m")));
6481 :
6482 : /* Notify fd.c that we've eaten two FDs for the pipe. */
6483 1280 : ReserveExternalFD();
6484 1280 : ReserveExternalFD();
6485 :
6486 : /*
6487 : * Set O_NONBLOCK to allow testing for the fd's presence with a read()
6488 : * call.
6489 : */
6490 1280 : if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
6491 0 : ereport(FATAL,
6492 : (errcode_for_socket_access(),
6493 : errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
6494 : #else
6495 :
6496 : /*
6497 : * On Windows, we use a process handle for the same purpose.
6498 : */
6499 : if (DuplicateHandle(GetCurrentProcess(),
6500 : GetCurrentProcess(),
6501 : GetCurrentProcess(),
6502 : &PostmasterHandle,
6503 : 0,
6504 : TRUE,
6505 : DUPLICATE_SAME_ACCESS) == 0)
6506 : ereport(FATAL,
6507 : (errmsg_internal("could not duplicate postmaster handle: error code %lu",
6508 : GetLastError())));
6509 : #endif /* WIN32 */
6510 1280 : }
|