Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * postmaster.c
4 : * This program acts as a clearing house for requests to the
5 : * POSTGRES system. Frontend programs send a startup message
6 : * to the Postmaster and the postmaster uses the info in the
7 : * message to setup a backend process.
8 : *
9 : * The postmaster also manages system-wide operations such as
10 : * startup and shutdown. The postmaster itself doesn't do those
11 : * operations, mind you --- it just forks off a subprocess to do them
12 : * at the right times. It also takes care of resetting the system
13 : * if a backend crashes.
14 : *
15 : * The postmaster process creates the shared memory and semaphore
16 : * pools during startup, but as a rule does not touch them itself.
17 : * In particular, it is not a member of the PGPROC array of backends
18 : * and so it cannot participate in lock-manager operations. Keeping
19 : * the postmaster away from shared memory operations makes it simpler
20 : * and more reliable. The postmaster is almost always able to recover
21 : * from crashes of individual backends by resetting shared memory;
22 : * if it did much with shared memory then it would be prone to crashing
23 : * along with the backends.
24 : *
25 : * When a request message is received, we now fork() immediately.
26 : * The child process performs authentication of the request, and
27 : * then becomes a backend if successful. This allows the auth code
28 : * to be written in a simple single-threaded style (as opposed to the
29 : * crufty "poor man's multitasking" code that used to be needed).
30 : * More importantly, it ensures that blockages in non-multithreaded
31 : * libraries like SSL or PAM cannot cause denial of service to other
32 : * clients.
33 : *
34 : *
35 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/postmaster/postmaster.c
41 : *
42 : * NOTES
43 : *
44 : * Initialization:
45 : * The Postmaster sets up shared memory data structures
46 : * for the backends.
47 : *
48 : * Synchronization:
49 : * The Postmaster shares memory with the backends but should avoid
50 : * touching shared memory, so as not to become stuck if a crashing
51 : * backend screws up locks or shared memory. Likewise, the Postmaster
52 : * should never block on messages from frontend clients.
53 : *
54 : * Garbage Collection:
55 : * The Postmaster cleans up after backends if they have an emergency
56 : * exit and/or core dump.
57 : *
58 : * Error Reporting:
59 : * Use write_stderr() only for reporting "interactive" errors
60 : * (essentially, bogus arguments on the command line). Once the
61 : * postmaster is launched, use ereport().
62 : *
63 : *-------------------------------------------------------------------------
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include <unistd.h>
69 : #include <signal.h>
70 : #include <time.h>
71 : #include <sys/wait.h>
72 : #include <ctype.h>
73 : #include <sys/stat.h>
74 : #include <sys/socket.h>
75 : #include <fcntl.h>
76 : #include <sys/param.h>
77 : #include <netdb.h>
78 : #include <limits.h>
79 :
80 : #ifdef HAVE_SYS_SELECT_H
81 : #include <sys/select.h>
82 : #endif
83 :
84 : #ifdef USE_BONJOUR
85 : #include <dns_sd.h>
86 : #endif
87 :
88 : #ifdef USE_SYSTEMD
89 : #include <systemd/sd-daemon.h>
90 : #endif
91 :
92 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
93 : #include <pthread.h>
94 : #endif
95 :
96 : #include "access/transam.h"
97 : #include "access/xlog.h"
98 : #include "access/xlogrecovery.h"
99 : #include "catalog/pg_control.h"
100 : #include "common/file_perm.h"
101 : #include "common/ip.h"
102 : #include "common/pg_prng.h"
103 : #include "common/string.h"
104 : #include "lib/ilist.h"
105 : #include "libpq/auth.h"
106 : #include "libpq/libpq.h"
107 : #include "libpq/pqformat.h"
108 : #include "libpq/pqsignal.h"
109 : #include "pg_getopt.h"
110 : #include "pgstat.h"
111 : #include "port/pg_bswap.h"
112 : #include "postmaster/autovacuum.h"
113 : #include "postmaster/auxprocess.h"
114 : #include "postmaster/bgworker_internals.h"
115 : #include "postmaster/fork_process.h"
116 : #include "postmaster/interrupt.h"
117 : #include "postmaster/pgarch.h"
118 : #include "postmaster/postmaster.h"
119 : #include "postmaster/syslogger.h"
120 : #include "replication/logicallauncher.h"
121 : #include "replication/walsender.h"
122 : #include "storage/fd.h"
123 : #include "storage/ipc.h"
124 : #include "storage/pg_shmem.h"
125 : #include "storage/pmsignal.h"
126 : #include "storage/proc.h"
127 : #include "tcop/tcopprot.h"
128 : #include "utils/builtins.h"
129 : #include "utils/datetime.h"
130 : #include "utils/memutils.h"
131 : #include "utils/pidfile.h"
132 : #include "utils/ps_status.h"
133 : #include "utils/queryjumble.h"
134 : #include "utils/timeout.h"
135 : #include "utils/timestamp.h"
136 : #include "utils/varlena.h"
137 :
138 : #ifdef EXEC_BACKEND
139 : #include "storage/spin.h"
140 : #endif
141 :
142 :
143 : /*
144 : * Possible types of a backend. Beyond being the possible bkend_type values in
145 : * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
146 : * and CountChildren().
147 : */
148 : #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
149 : #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
150 : #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
151 : #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
152 : #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
153 :
154 : /*
155 : * List of active backends (or child processes anyway; we don't actually
156 : * know whether a given child has become a backend or is still in the
157 : * authorization phase). This is used mainly to keep track of how many
158 : * children we have and send them appropriate signals when necessary.
159 : *
160 : * As shown in the above set of backend types, this list includes not only
161 : * "normal" client sessions, but also autovacuum workers, walsenders, and
162 : * background workers. (Note that at the time of launch, walsenders are
163 : * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
164 : * upon noticing they've changed their PMChildFlags entry. Hence that check
165 : * must be done before any operation that needs to distinguish walsenders
166 : * from normal backends.)
167 : *
168 : * Also, "dead_end" children are in it: these are children launched just for
169 : * the purpose of sending a friendly rejection message to a would-be client.
170 : * We must track them because they are attached to shared memory, but we know
171 : * they will never become live backends. dead_end children are not assigned a
172 : * PMChildSlot. dead_end children have bkend_type NORMAL.
173 : *
174 : * "Special" children such as the startup, bgwriter and autovacuum launcher
175 : * tasks are not in this list. They are tracked via StartupPID and other
176 : * pid_t variables below. (Thus, there can't be more than one of any given
177 : * "special" child process type. We use BackendList entries for any child
178 : * process there can be more than one of.)
179 : */
180 : typedef struct bkend
181 : {
182 : pid_t pid; /* process id of backend */
183 : int32 cancel_key; /* cancel key for cancels for this backend */
184 : int child_slot; /* PMChildSlot for this backend, if any */
185 : int bkend_type; /* child process flavor, see above */
186 : bool dead_end; /* is it going to send an error and quit? */
187 : bool bgworker_notify; /* gets bgworker start/stop notifications */
188 : dlist_node elem; /* list link in BackendList */
189 : } Backend;
190 :
191 : static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
192 :
193 : #ifdef EXEC_BACKEND
194 : static Backend *ShmemBackendArray;
195 : #endif
196 :
197 : BackgroundWorker *MyBgworkerEntry = NULL;
198 :
199 :
200 :
201 : /* The socket number we are listening for connections on */
202 : int PostPortNumber;
203 :
204 : /* The directory names for Unix socket(s) */
205 : char *Unix_socket_directories;
206 :
207 : /* The TCP listen address(es) */
208 : char *ListenAddresses;
209 :
210 : /*
211 : * ReservedBackends is the number of backends reserved for superuser use.
212 : * This number is taken out of the pool size given by MaxConnections so
213 : * number of backend slots available to non-superusers is
214 : * (MaxConnections - ReservedBackends). Note what this really means is
215 : * "if there are <= ReservedBackends connections available, only superusers
216 : * can make new connections" --- pre-existing superuser connections don't
217 : * count against the limit.
218 : */
219 : int ReservedBackends;
220 :
221 : /* The socket(s) we're listening to. */
222 : #define MAXLISTEN 64
223 : static pgsocket ListenSocket[MAXLISTEN];
224 :
225 : /*
226 : * These globals control the behavior of the postmaster in case some
227 : * backend dumps core. Normally, it kills all peers of the dead backend
228 : * and reinitializes shared memory. By specifying -s or -n, we can have
229 : * the postmaster stop (rather than kill) peers and not reinitialize
230 : * shared data structures. (Reinit is currently dead code, though.)
231 : */
232 : static bool Reinit = true;
233 : static int SendStop = false;
234 :
235 : /* still more option variables */
236 : bool EnableSSL = false;
237 :
238 : int PreAuthDelay = 0;
239 : int AuthenticationTimeout = 60;
240 :
241 : bool log_hostname; /* for ps display and logging */
242 : bool Log_connections = false;
243 : bool Db_user_namespace = false;
244 :
245 : bool enable_bonjour = false;
246 : char *bonjour_name;
247 : bool restart_after_crash = true;
248 : bool remove_temp_files_after_crash = true;
249 :
250 : /* PIDs of special child processes; 0 when not running */
251 : static pid_t StartupPID = 0,
252 : BgWriterPID = 0,
253 : CheckpointerPID = 0,
254 : WalWriterPID = 0,
255 : WalReceiverPID = 0,
256 : AutoVacPID = 0,
257 : PgArchPID = 0,
258 : SysLoggerPID = 0;
259 :
260 : /* Startup process's status */
261 : typedef enum
262 : {
263 : STARTUP_NOT_RUNNING,
264 : STARTUP_RUNNING,
265 : STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 : STARTUP_CRASHED
267 : } StartupStatusEnum;
268 :
269 : static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
270 :
271 : /* Startup/shutdown state */
272 : #define NoShutdown 0
273 : #define SmartShutdown 1
274 : #define FastShutdown 2
275 : #define ImmediateShutdown 3
276 :
277 : static int Shutdown = NoShutdown;
278 :
279 : static bool FatalError = false; /* T if recovering from backend crash */
280 :
281 : /*
282 : * We use a simple state machine to control startup, shutdown, and
283 : * crash recovery (which is rather like shutdown followed by startup).
284 : *
285 : * After doing all the postmaster initialization work, we enter PM_STARTUP
286 : * state and the startup process is launched. The startup process begins by
287 : * reading the control file and other preliminary initialization steps.
288 : * In a normal startup, or after crash recovery, the startup process exits
289 : * with exit code 0 and we switch to PM_RUN state. However, archive recovery
290 : * is handled specially since it takes much longer and we would like to support
291 : * hot standby during archive recovery.
292 : *
293 : * When the startup process is ready to start archive recovery, it signals the
294 : * postmaster, and we switch to PM_RECOVERY state. The background writer and
295 : * checkpointer are launched, while the startup process continues applying WAL.
296 : * If Hot Standby is enabled, then, after reaching a consistent point in WAL
297 : * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
298 : * state and begin accepting connections to perform read-only queries. When
299 : * archive recovery is finished, the startup process exits with exit code 0
300 : * and we switch to PM_RUN state.
301 : *
302 : * Normal child backends can only be launched when we are in PM_RUN or
303 : * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
304 : * In other states we handle connection requests by launching "dead_end"
305 : * child processes, which will simply send the client an error message and
306 : * quit. (We track these in the BackendList so that we can know when they
307 : * are all gone; this is important because they're still connected to shared
308 : * memory, and would interfere with an attempt to destroy the shmem segment,
309 : * possibly leading to SHMALL failure when we try to make a new one.)
310 : * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
311 : * to drain out of the system, and therefore stop accepting connection
312 : * requests at all until the last existing child has quit (which hopefully
313 : * will not be very long).
314 : *
315 : * Notice that this state variable does not distinguish *why* we entered
316 : * states later than PM_RUN --- Shutdown and FatalError must be consulted
317 : * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
318 : * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
319 : * states when trying to recover from a crash). It can be true in PM_STARTUP
320 : * state, because we don't clear it until we've successfully started WAL redo.
321 : */
322 : typedef enum
323 : {
324 : PM_INIT, /* postmaster starting */
325 : PM_STARTUP, /* waiting for startup subprocess */
326 : PM_RECOVERY, /* in archive recovery mode */
327 : PM_HOT_STANDBY, /* in hot standby mode */
328 : PM_RUN, /* normal "database is alive" state */
329 : PM_STOP_BACKENDS, /* need to stop remaining backends */
330 : PM_WAIT_BACKENDS, /* waiting for live backends to exit */
331 : PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
332 : * ckpt */
333 : PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
334 : * finish */
335 : PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
336 : PM_NO_CHILDREN /* all important children have exited */
337 : } PMState;
338 :
339 : static PMState pmState = PM_INIT;
340 :
341 : /*
342 : * While performing a "smart shutdown", we restrict new connections but stay
343 : * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
344 : * connsAllowed is a sub-state indicator showing the active restriction.
345 : * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
346 : */
347 : static bool connsAllowed = true;
348 :
349 : /* Start time of SIGKILL timeout during immediate shutdown or child crash */
350 : /* Zero means timeout is not running */
351 : static time_t AbortStartTime = 0;
352 :
353 : /* Length of said timeout */
354 : #define SIGKILL_CHILDREN_AFTER_SECS 5
355 :
356 : static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
357 :
358 : bool ClientAuthInProgress = false; /* T during new-client
359 : * authentication */
360 :
361 : bool redirection_done = false; /* stderr redirected for syslogger? */
362 :
363 : /* received START_AUTOVAC_LAUNCHER signal */
364 : static volatile sig_atomic_t start_autovac_launcher = false;
365 :
366 : /* the launcher needs to be signaled to communicate some condition */
367 : static volatile bool avlauncher_needs_signal = false;
368 :
369 : /* received START_WALRECEIVER signal */
370 : static volatile sig_atomic_t WalReceiverRequested = false;
371 :
372 : /* set when there's a worker that needs to be started up */
373 : static volatile bool StartWorkerNeeded = true;
374 : static volatile bool HaveCrashedWorker = false;
375 :
376 : #ifdef USE_SSL
377 : /* Set when and if SSL has been initialized properly */
378 : static bool LoadedSSL = false;
379 : #endif
380 :
381 : #ifdef USE_BONJOUR
382 : static DNSServiceRef bonjour_sdref = NULL;
383 : #endif
384 :
385 : /*
386 : * postmaster.c - function prototypes
387 : */
388 : static void CloseServerPorts(int status, Datum arg);
389 : static void unlink_external_pid_file(int status, Datum arg);
390 : static void getInstallationPaths(const char *argv0);
391 : static void checkControlFile(void);
392 : static Port *ConnCreate(int serverFd);
393 : static void ConnFree(Port *port);
394 : static void reset_shared(void);
395 : static void SIGHUP_handler(SIGNAL_ARGS);
396 : static void pmdie(SIGNAL_ARGS);
397 : static void reaper(SIGNAL_ARGS);
398 : static void sigusr1_handler(SIGNAL_ARGS);
399 : static void process_startup_packet_die(SIGNAL_ARGS);
400 : static void dummy_handler(SIGNAL_ARGS);
401 : static void StartupPacketTimeoutHandler(void);
402 : static void CleanupBackend(int pid, int exitstatus);
403 : static bool CleanupBackgroundWorker(int pid, int exitstatus);
404 : static void HandleChildCrash(int pid, int exitstatus, const char *procname);
405 : static void LogChildExit(int lev, const char *procname,
406 : int pid, int exitstatus);
407 : static void PostmasterStateMachine(void);
408 : static void BackendInitialize(Port *port);
409 : static void BackendRun(Port *port) pg_attribute_noreturn();
410 : static void ExitPostmaster(int status) pg_attribute_noreturn();
411 : static int ServerLoop(void);
412 : static int BackendStartup(Port *port);
413 : static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
414 : static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
415 : static void processCancelRequest(Port *port, void *pkt);
416 : static int initMasks(fd_set *rmask);
417 : static void report_fork_failure_to_client(Port *port, int errnum);
418 : static CAC_state canAcceptConnections(int backend_type);
419 : static bool RandomCancelKey(int32 *cancel_key);
420 : static void signal_child(pid_t pid, int signal);
421 : static bool SignalSomeChildren(int signal, int targets);
422 : static void TerminateChildren(int signal);
423 :
424 : #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
425 :
426 : static int CountChildren(int target);
427 : static bool assign_backendlist_entry(RegisteredBgWorker *rw);
428 : static void maybe_start_bgworkers(void);
429 : static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
430 : static pid_t StartChildProcess(AuxProcType type);
431 : static void StartAutovacuumWorker(void);
432 : static void MaybeStartWalReceiver(void);
433 : static void InitPostmasterDeathWatchHandle(void);
434 :
435 : /*
436 : * Archiver is allowed to start up at the current postmaster state?
437 : *
438 : * If WAL archiving is enabled always, we are allowed to start archiver
439 : * even during recovery.
440 : */
441 : #define PgArchStartupAllowed() \
442 : (((XLogArchivingActive() && pmState == PM_RUN) || \
443 : (XLogArchivingAlways() && \
444 : (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
445 : PgArchCanRestart())
446 :
447 : #ifdef EXEC_BACKEND
448 :
449 : #ifdef WIN32
450 : #define WNOHANG 0 /* ignored, so any integer value will do */
451 :
452 : static pid_t waitpid(pid_t pid, int *exitstatus, int options);
453 : static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
454 :
455 : static HANDLE win32ChildQueue;
456 :
457 : typedef struct
458 : {
459 : HANDLE waitHandle;
460 : HANDLE procHandle;
461 : DWORD procId;
462 : } win32_deadchild_waitinfo;
463 : #endif /* WIN32 */
464 :
465 : static pid_t backend_forkexec(Port *port);
466 : static pid_t internal_forkexec(int argc, char *argv[], Port *port);
467 :
468 : /* Type for a socket that can be inherited to a client process */
469 : #ifdef WIN32
470 : typedef struct
471 : {
472 : SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
473 : * if not a socket */
474 : WSAPROTOCOL_INFO wsainfo;
475 : } InheritableSocket;
476 : #else
477 : typedef int InheritableSocket;
478 : #endif
479 :
480 : /*
481 : * Structure contains all variables passed to exec:ed backends
482 : */
483 : typedef struct
484 : {
485 : Port port;
486 : InheritableSocket portsocket;
487 : char DataDir[MAXPGPATH];
488 : pgsocket ListenSocket[MAXLISTEN];
489 : int32 MyCancelKey;
490 : int MyPMChildSlot;
491 : #ifndef WIN32
492 : unsigned long UsedShmemSegID;
493 : #else
494 : void *ShmemProtectiveRegion;
495 : HANDLE UsedShmemSegID;
496 : #endif
497 : void *UsedShmemSegAddr;
498 : slock_t *ShmemLock;
499 : VariableCache ShmemVariableCache;
500 : Backend *ShmemBackendArray;
501 : #ifndef HAVE_SPINLOCKS
502 : PGSemaphore *SpinlockSemaArray;
503 : #endif
504 : int NamedLWLockTrancheRequests;
505 : NamedLWLockTranche *NamedLWLockTrancheArray;
506 : LWLockPadded *MainLWLockArray;
507 : slock_t *ProcStructLock;
508 : PROC_HDR *ProcGlobal;
509 : PGPROC *AuxiliaryProcs;
510 : PGPROC *PreparedXactProcs;
511 : PMSignalData *PMSignalState;
512 : pid_t PostmasterPid;
513 : TimestampTz PgStartTime;
514 : TimestampTz PgReloadTime;
515 : pg_time_t first_syslogger_file_time;
516 : bool redirection_done;
517 : bool IsBinaryUpgrade;
518 : bool query_id_enabled;
519 : int max_safe_fds;
520 : int MaxBackends;
521 : #ifdef WIN32
522 : HANDLE PostmasterHandle;
523 : HANDLE initial_signal_pipe;
524 : HANDLE syslogPipe[2];
525 : #else
526 : int postmaster_alive_fds[2];
527 : int syslogPipe[2];
528 : #endif
529 : char my_exec_path[MAXPGPATH];
530 : char pkglib_path[MAXPGPATH];
531 : } BackendParameters;
532 :
533 : static void read_backend_variables(char *id, Port *port);
534 : static void restore_backend_variables(BackendParameters *param, Port *port);
535 :
536 : #ifndef WIN32
537 : static bool save_backend_variables(BackendParameters *param, Port *port);
538 : #else
539 : static bool save_backend_variables(BackendParameters *param, Port *port,
540 : HANDLE childProcess, pid_t childPid);
541 : #endif
542 :
543 : static void ShmemBackendArrayAdd(Backend *bn);
544 : static void ShmemBackendArrayRemove(Backend *bn);
545 : #endif /* EXEC_BACKEND */
546 :
547 : #define StartupDataBase() StartChildProcess(StartupProcess)
548 : #define StartArchiver() StartChildProcess(ArchiverProcess)
549 : #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
550 : #define StartCheckpointer() StartChildProcess(CheckpointerProcess)
551 : #define StartWalWriter() StartChildProcess(WalWriterProcess)
552 : #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
553 :
554 : /* Macros to check exit status of a child process */
555 : #define EXIT_STATUS_0(st) ((st) == 0)
556 : #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
557 : #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
558 :
559 : #ifndef WIN32
560 : /*
561 : * File descriptors for pipe used to monitor if postmaster is alive.
562 : * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
563 : */
564 : int postmaster_alive_fds[2] = {-1, -1};
565 : #else
566 : /* Process handle of postmaster used for the same purpose on Windows */
567 : HANDLE PostmasterHandle;
568 : #endif
569 :
570 : /*
571 : * Postmaster main entry point
572 : */
573 : void
574 1060 : PostmasterMain(int argc, char *argv[])
575 : {
576 : int opt;
577 : int status;
578 1060 : char *userDoption = NULL;
579 1060 : bool listen_addr_saved = false;
580 : int i;
581 1060 : char *output_config_variable = NULL;
582 :
583 1060 : InitProcessGlobals();
584 :
585 1060 : PostmasterPid = MyProcPid;
586 :
587 1060 : IsPostmasterEnvironment = true;
588 :
589 : /*
590 : * Start our win32 signal implementation
591 : */
592 : #ifdef WIN32
593 : pgwin32_signal_initialize();
594 : #endif
595 :
596 : /*
597 : * We should not be creating any files or directories before we check the
598 : * data directory (see checkDataDir()), but just in case set the umask to
599 : * the most restrictive (owner-only) permissions.
600 : *
601 : * checkDataDir() will reset the umask based on the data directory
602 : * permissions.
603 : */
604 1060 : umask(PG_MODE_MASK_OWNER);
605 :
606 : /*
607 : * By default, palloc() requests in the postmaster will be allocated in
608 : * the PostmasterContext, which is space that can be recycled by backends.
609 : * Allocated data that needs to be available to backends should be
610 : * allocated in TopMemoryContext.
611 : */
612 1060 : PostmasterContext = AllocSetContextCreate(TopMemoryContext,
613 : "Postmaster",
614 : ALLOCSET_DEFAULT_SIZES);
615 1060 : MemoryContextSwitchTo(PostmasterContext);
616 :
617 : /* Initialize paths to installation files */
618 1060 : getInstallationPaths(argv[0]);
619 :
620 : /*
621 : * Set up signal handlers for the postmaster process.
622 : *
623 : * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which
624 : * is used by all child processes and client processes). That has a
625 : * couple of special behaviors:
626 : *
627 : * 1. Except on Windows, we tell sigaction() to block all signals for the
628 : * duration of the signal handler. This is faster than our old approach
629 : * of blocking/unblocking explicitly in the signal handler, and it should
630 : * also prevent excessive stack consumption if signals arrive quickly.
631 : *
632 : * 2. We do not set the SA_RESTART flag. This is because signals will be
633 : * blocked at all times except when ServerLoop is waiting for something to
634 : * happen, and during that window, we want signals to exit the select(2)
635 : * wait so that ServerLoop can respond if anything interesting happened.
636 : * On some platforms, signals marked SA_RESTART would not cause the
637 : * select() wait to end.
638 : *
639 : * Child processes will generally want SA_RESTART, so pqsignal() sets that
640 : * flag. We expect children to set up their own handlers before
641 : * unblocking signals.
642 : *
643 : * CAUTION: when changing this list, check for side-effects on the signal
644 : * handling setup of child processes. See tcop/postgres.c,
645 : * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
646 : * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
647 : * postmaster/bgworker.c and postmaster/checkpointer.c.
648 : */
649 1060 : pqinitmask();
650 1060 : PG_SETMASK(&BlockSig);
651 :
652 1060 : pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have
653 : * children do same */
654 1060 : pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
655 1060 : pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */
656 1060 : pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */
657 1060 : pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */
658 1060 : pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */
659 1060 : pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */
660 1060 : pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */
661 1060 : pqsignal_pm(SIGCHLD, reaper); /* handle child termination */
662 :
663 : #ifdef SIGURG
664 :
665 : /*
666 : * Ignore SIGURG for now. Child processes may change this (see
667 : * InitializeLatchSupport), but they will not receive any such signals
668 : * until they wait on a latch.
669 : */
670 1060 : pqsignal_pm(SIGURG, SIG_IGN); /* ignored */
671 : #endif
672 :
673 : /*
674 : * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
675 : * ignore those signals in a postmaster environment, so that there is no
676 : * risk of a child process freezing up due to writing to stderr. But for
677 : * a standalone backend, their default handling is reasonable. Hence, all
678 : * child processes should just allow the inherited settings to stand.
679 : */
680 : #ifdef SIGTTIN
681 1060 : pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */
682 : #endif
683 : #ifdef SIGTTOU
684 1060 : pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */
685 : #endif
686 :
687 : /* ignore SIGXFSZ, so that ulimit violations work like disk full */
688 : #ifdef SIGXFSZ
689 1060 : pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */
690 : #endif
691 :
692 : /*
693 : * Options setup
694 : */
695 1060 : InitializeGUCOptions();
696 :
697 1060 : opterr = 1;
698 :
699 : /*
700 : * Parse command-line options. CAUTION: keep this in sync with
701 : * tcop/postgres.c (the option sets should not conflict) and with the
702 : * common help() function in main/main.c.
703 : */
704 3518 : while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOPp:r:S:sTt:W:-:")) != -1)
705 : {
706 2458 : switch (opt)
707 : {
708 0 : case 'B':
709 0 : SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
710 0 : break;
711 :
712 8 : case 'b':
713 : /* Undocumented flag used for binary upgrades */
714 8 : IsBinaryUpgrade = true;
715 8 : break;
716 :
717 4 : case 'C':
718 4 : output_config_variable = strdup(optarg);
719 4 : break;
720 :
721 1060 : case 'D':
722 1060 : userDoption = strdup(optarg);
723 1060 : break;
724 :
725 0 : case 'd':
726 0 : set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
727 0 : break;
728 :
729 0 : case 'E':
730 0 : SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
731 0 : break;
732 :
733 0 : case 'e':
734 0 : SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
735 0 : break;
736 :
737 150 : case 'F':
738 150 : SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
739 150 : break;
740 :
741 0 : case 'f':
742 0 : if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
743 : {
744 0 : write_stderr("%s: invalid argument for option -f: \"%s\"\n",
745 : progname, optarg);
746 0 : ExitPostmaster(1);
747 : }
748 0 : break;
749 :
750 0 : case 'h':
751 0 : SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
752 0 : break;
753 :
754 0 : case 'i':
755 0 : SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
756 0 : break;
757 :
758 0 : case 'j':
759 : /* only used by interactive backend */
760 0 : break;
761 :
762 150 : case 'k':
763 150 : SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
764 150 : break;
765 :
766 0 : case 'l':
767 0 : SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
768 0 : break;
769 :
770 0 : case 'N':
771 0 : SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
772 0 : break;
773 :
774 0 : case 'n':
775 : /* Don't reinit shared mem after abnormal exit */
776 0 : Reinit = false;
777 0 : break;
778 :
779 0 : case 'O':
780 0 : SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
781 0 : break;
782 :
783 0 : case 'P':
784 0 : SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
785 0 : break;
786 :
787 8 : case 'p':
788 8 : SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
789 8 : break;
790 :
791 0 : case 'r':
792 : /* only used by single-user backend */
793 0 : break;
794 :
795 0 : case 'S':
796 0 : SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
797 0 : break;
798 :
799 0 : case 's':
800 0 : SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
801 0 : break;
802 :
803 0 : case 'T':
804 :
805 : /*
806 : * In the event that some backend dumps core, send SIGSTOP,
807 : * rather than SIGQUIT, to all its peers. This lets the wily
808 : * post_hacker collect core dumps from everyone.
809 : */
810 0 : SendStop = true;
811 0 : break;
812 :
813 0 : case 't':
814 : {
815 0 : const char *tmp = get_stats_option_name(optarg);
816 :
817 0 : if (tmp)
818 : {
819 0 : SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
820 : }
821 : else
822 : {
823 0 : write_stderr("%s: invalid argument for option -t: \"%s\"\n",
824 : progname, optarg);
825 0 : ExitPostmaster(1);
826 : }
827 0 : break;
828 : }
829 :
830 0 : case 'W':
831 0 : SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
832 0 : break;
833 :
834 1078 : case 'c':
835 : case '-':
836 : {
837 : char *name,
838 : *value;
839 :
840 1078 : ParseLongOption(optarg, &name, &value);
841 1078 : if (!value)
842 : {
843 0 : if (opt == '-')
844 0 : ereport(ERROR,
845 : (errcode(ERRCODE_SYNTAX_ERROR),
846 : errmsg("--%s requires a value",
847 : optarg)));
848 : else
849 0 : ereport(ERROR,
850 : (errcode(ERRCODE_SYNTAX_ERROR),
851 : errmsg("-c %s requires a value",
852 : optarg)));
853 : }
854 :
855 1078 : SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
856 1078 : free(name);
857 1078 : if (value)
858 1078 : free(value);
859 1078 : break;
860 : }
861 :
862 0 : default:
863 0 : write_stderr("Try \"%s --help\" for more information.\n",
864 : progname);
865 0 : ExitPostmaster(1);
866 : }
867 : }
868 :
869 : /*
870 : * Postmaster accepts no non-option switch arguments.
871 : */
872 1060 : if (optind < argc)
873 : {
874 0 : write_stderr("%s: invalid argument: \"%s\"\n",
875 0 : progname, argv[optind]);
876 0 : write_stderr("Try \"%s --help\" for more information.\n",
877 : progname);
878 0 : ExitPostmaster(1);
879 : }
880 :
881 : /*
882 : * Locate the proper configuration files and data directory, and read
883 : * postgresql.conf for the first time.
884 : */
885 1060 : if (!SelectConfigFiles(userDoption, progname))
886 0 : ExitPostmaster(2);
887 :
888 1058 : if (output_config_variable != NULL)
889 : {
890 : /*
891 : * If this is a runtime-computed GUC, it hasn't yet been initialized,
892 : * and the present value is not useful. However, this is a convenient
893 : * place to print the value for most GUCs because it is safe to run
894 : * postmaster startup to this point even if the server is already
895 : * running. For the handful of runtime-computed GUCs that we cannot
896 : * provide meaningful values for yet, we wait until later in
897 : * postmaster startup to print the value. We won't be able to use -C
898 : * on running servers for those GUCs, but using this option now would
899 : * lead to incorrect results for them.
900 : */
901 4 : int flags = GetConfigOptionFlags(output_config_variable, true);
902 :
903 4 : if ((flags & GUC_RUNTIME_COMPUTED) == 0)
904 : {
905 : /*
906 : * "-C guc" was specified, so print GUC's value and exit. No
907 : * extra permission check is needed because the user is reading
908 : * inside the data dir.
909 : */
910 2 : const char *config_val = GetConfigOption(output_config_variable,
911 : false, false);
912 :
913 2 : puts(config_val ? config_val : "");
914 2 : ExitPostmaster(0);
915 : }
916 :
917 : /*
918 : * A runtime-computed GUC will be printed later on. As we initialize
919 : * a server startup sequence, silence any log messages that may show
920 : * up in the output generated. FATAL and more severe messages are
921 : * useful to show, even if one would only expect at least PANIC. LOG
922 : * entries are hidden.
923 : */
924 2 : SetConfigOption("log_min_messages", "FATAL", PGC_INTERNAL,
925 : PGC_S_OVERRIDE);
926 : }
927 :
928 : /* Verify that DataDir looks reasonable */
929 1056 : checkDataDir();
930 :
931 : /* Check that pg_control exists */
932 1056 : checkControlFile();
933 :
934 : /* And switch working directory into it */
935 1056 : ChangeToDataDir();
936 :
937 : /*
938 : * Check for invalid combinations of GUC settings.
939 : */
940 1056 : if (ReservedBackends >= MaxConnections)
941 : {
942 0 : write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
943 : progname,
944 : ReservedBackends, MaxConnections);
945 0 : ExitPostmaster(1);
946 : }
947 1056 : if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
948 0 : ereport(ERROR,
949 : (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
950 1056 : if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
951 0 : ereport(ERROR,
952 : (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
953 :
954 : /*
955 : * Other one-time internal sanity checks can go here, if they are fast.
956 : * (Put any slow processing further down, after postmaster.pid creation.)
957 : */
958 1056 : if (!CheckDateTokenTables())
959 : {
960 0 : write_stderr("%s: invalid datetoken tables, please fix\n", progname);
961 0 : ExitPostmaster(1);
962 : }
963 :
964 : /*
965 : * Now that we are done processing the postmaster arguments, reset
966 : * getopt(3) library so that it will work correctly in subprocesses.
967 : */
968 1056 : optind = 1;
969 : #ifdef HAVE_INT_OPTRESET
970 : optreset = 1; /* some systems need this too */
971 : #endif
972 :
973 : /* For debugging: display postmaster environment */
974 : {
975 : extern char **environ;
976 : char **p;
977 :
978 1056 : ereport(DEBUG3,
979 : (errmsg_internal("%s: PostmasterMain: initial environment dump:",
980 : progname)));
981 1056 : ereport(DEBUG3,
982 : (errmsg_internal("-----------------------------------------")));
983 39544 : for (p = environ; *p; ++p)
984 38488 : ereport(DEBUG3,
985 : (errmsg_internal("\t%s", *p)));
986 1056 : ereport(DEBUG3,
987 : (errmsg_internal("-----------------------------------------")));
988 : }
989 :
990 : /*
991 : * Create lockfile for data directory.
992 : *
993 : * We want to do this before we try to grab the input sockets, because the
994 : * data directory interlock is more reliable than the socket-file
995 : * interlock (thanks to whoever decided to put socket files in /tmp :-().
996 : * For the same reason, it's best to grab the TCP socket(s) before the
997 : * Unix socket(s).
998 : *
999 : * Also note that this internally sets up the on_proc_exit function that
1000 : * is responsible for removing both data directory and socket lockfiles;
1001 : * so it must happen before opening sockets so that at exit, the socket
1002 : * lockfiles go away after CloseServerPorts runs.
1003 : */
1004 1056 : CreateDataDirLockFile(true);
1005 :
1006 : /*
1007 : * Read the control file (for error checking and config info).
1008 : *
1009 : * Since we verify the control file's CRC, this has a useful side effect
1010 : * on machines where we need a run-time test for CRC support instructions.
1011 : * The postmaster will do the test once at startup, and then its child
1012 : * processes will inherit the correct function pointer and not need to
1013 : * repeat the test.
1014 : */
1015 1054 : LocalProcessControlFile(false);
1016 :
1017 : /*
1018 : * Register the apply launcher. It's probably a good idea to call this
1019 : * before any modules had a chance to take the background worker slots.
1020 : */
1021 1054 : ApplyLauncherRegister();
1022 :
1023 : /*
1024 : * process any libraries that should be preloaded at postmaster start
1025 : */
1026 1054 : process_shared_preload_libraries();
1027 :
1028 : /*
1029 : * Initialize SSL library, if specified.
1030 : */
1031 : #ifdef USE_SSL
1032 1054 : if (EnableSSL)
1033 : {
1034 46 : (void) secure_initialize(true);
1035 40 : LoadedSSL = true;
1036 : }
1037 : #endif
1038 :
1039 : /*
1040 : * Now that loadable modules have had their chance to alter any GUCs,
1041 : * calculate MaxBackends.
1042 : */
1043 1048 : InitializeMaxBackends();
1044 :
1045 : /*
1046 : * Give preloaded libraries a chance to request additional shared memory.
1047 : */
1048 1048 : process_shmem_requests();
1049 :
1050 : /*
1051 : * Now that loadable modules have had their chance to request additional
1052 : * shared memory, determine the value of any runtime-computed GUCs that
1053 : * depend on the amount of shared memory required.
1054 : */
1055 1048 : InitializeShmemGUCs();
1056 :
1057 : /*
1058 : * Now that modules have been loaded, we can process any custom resource
1059 : * managers specified in the wal_consistency_checking GUC.
1060 : */
1061 1048 : InitializeWalConsistencyChecking();
1062 :
1063 : /*
1064 : * If -C was specified with a runtime-computed GUC, we held off printing
1065 : * the value earlier, as the GUC was not yet initialized. We handle -C
1066 : * for most GUCs before we lock the data directory so that the option may
1067 : * be used on a running server. However, a handful of GUCs are runtime-
1068 : * computed and do not have meaningful values until after locking the data
1069 : * directory, and we cannot safely calculate their values earlier on a
1070 : * running server. At this point, such GUCs should be properly
1071 : * initialized, and we haven't yet set up shared memory, so this is a good
1072 : * time to handle the -C option for these special GUCs.
1073 : */
1074 1048 : if (output_config_variable != NULL)
1075 : {
1076 2 : const char *config_val = GetConfigOption(output_config_variable,
1077 : false, false);
1078 :
1079 2 : puts(config_val ? config_val : "");
1080 2 : ExitPostmaster(0);
1081 : }
1082 :
1083 : /*
1084 : * Set up shared memory and semaphores.
1085 : */
1086 1046 : reset_shared();
1087 :
1088 : /*
1089 : * Estimate number of openable files. This must happen after setting up
1090 : * semaphores, because on some platforms semaphores count as open files.
1091 : */
1092 1044 : set_max_safe_fds();
1093 :
1094 : /*
1095 : * Set reference point for stack-depth checking.
1096 : */
1097 1044 : (void) set_stack_base();
1098 :
1099 : /*
1100 : * Initialize pipe (or process handle on Windows) that allows children to
1101 : * wake up from sleep on postmaster death.
1102 : */
1103 1044 : InitPostmasterDeathWatchHandle();
1104 :
1105 : #ifdef WIN32
1106 :
1107 : /*
1108 : * Initialize I/O completion port used to deliver list of dead children.
1109 : */
1110 : win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1111 : if (win32ChildQueue == NULL)
1112 : ereport(FATAL,
1113 : (errmsg("could not create I/O completion port for child queue")));
1114 : #endif
1115 :
1116 : #ifdef EXEC_BACKEND
1117 : /* Write out nondefault GUC settings for child processes to use */
1118 : write_nondefault_variables(PGC_POSTMASTER);
1119 :
1120 : /*
1121 : * Clean out the temp directory used to transmit parameters to child
1122 : * processes (see internal_forkexec, below). We must do this before
1123 : * launching any child processes, else we have a race condition: we could
1124 : * remove a parameter file before the child can read it. It should be
1125 : * safe to do so now, because we verified earlier that there are no
1126 : * conflicting Postgres processes in this data directory.
1127 : */
1128 : RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false);
1129 : #endif
1130 :
1131 : /*
1132 : * Forcibly remove the files signaling a standby promotion request.
1133 : * Otherwise, the existence of those files triggers a promotion too early,
1134 : * whether a user wants that or not.
1135 : *
1136 : * This removal of files is usually unnecessary because they can exist
1137 : * only during a few moments during a standby promotion. However there is
1138 : * a race condition: if pg_ctl promote is executed and creates the files
1139 : * during a promotion, the files can stay around even after the server is
1140 : * brought up to be the primary. Then, if a new standby starts by using
1141 : * the backup taken from the new primary, the files can exist at server
1142 : * startup and must be removed in order to avoid an unexpected promotion.
1143 : *
1144 : * Note that promotion signal files need to be removed before the startup
1145 : * process is invoked. Because, after that, they can be used by
1146 : * postmaster's SIGUSR1 signal handler.
1147 : */
1148 1044 : RemovePromoteSignalFiles();
1149 :
1150 : /* Do the same for logrotate signal file */
1151 1044 : RemoveLogrotateSignalFiles();
1152 :
1153 : /* Remove any outdated file holding the current log filenames. */
1154 1044 : if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1155 0 : ereport(LOG,
1156 : (errcode_for_file_access(),
1157 : errmsg("could not remove file \"%s\": %m",
1158 : LOG_METAINFO_DATAFILE)));
1159 :
1160 : /*
1161 : * If enabled, start up syslogger collection subprocess
1162 : */
1163 1044 : SysLoggerPID = SysLogger_Start();
1164 :
1165 : /*
1166 : * Reset whereToSendOutput from DestDebug (its starting state) to
1167 : * DestNone. This stops ereport from sending log messages to stderr unless
1168 : * Log_destination permits. We don't do this until the postmaster is
1169 : * fully launched, since startup failures may as well be reported to
1170 : * stderr.
1171 : *
1172 : * If we are in fact disabling logging to stderr, first emit a log message
1173 : * saying so, to provide a breadcrumb trail for users who may not remember
1174 : * that their logging is configured to go somewhere else.
1175 : */
1176 1044 : if (!(Log_destination & LOG_DESTINATION_STDERR))
1177 0 : ereport(LOG,
1178 : (errmsg("ending log output to stderr"),
1179 : errhint("Future log output will go to log destination \"%s\".",
1180 : Log_destination_string)));
1181 :
1182 1044 : whereToSendOutput = DestNone;
1183 :
1184 : /*
1185 : * Report server startup in log. While we could emit this much earlier,
1186 : * it seems best to do so after starting the log collector, if we intend
1187 : * to use one.
1188 : */
1189 1044 : ereport(LOG,
1190 : (errmsg("starting %s", PG_VERSION_STR)));
1191 :
1192 : /*
1193 : * Establish input sockets.
1194 : *
1195 : * First, mark them all closed, and set up an on_proc_exit function that's
1196 : * charged with closing the sockets again at postmaster shutdown.
1197 : */
1198 67860 : for (i = 0; i < MAXLISTEN; i++)
1199 66816 : ListenSocket[i] = PGINVALID_SOCKET;
1200 :
1201 1044 : on_proc_exit(CloseServerPorts, 0);
1202 :
1203 1044 : if (ListenAddresses)
1204 : {
1205 : char *rawstring;
1206 : List *elemlist;
1207 : ListCell *l;
1208 1044 : int success = 0;
1209 :
1210 : /* Need a modifiable copy of ListenAddresses */
1211 1044 : rawstring = pstrdup(ListenAddresses);
1212 :
1213 : /* Parse string into list of hostnames */
1214 1044 : if (!SplitGUCList(rawstring, ',', &elemlist))
1215 : {
1216 : /* syntax error in list */
1217 0 : ereport(FATAL,
1218 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1219 : errmsg("invalid list syntax in parameter \"%s\"",
1220 : "listen_addresses")));
1221 : }
1222 :
1223 1088 : foreach(l, elemlist)
1224 : {
1225 44 : char *curhost = (char *) lfirst(l);
1226 :
1227 44 : if (strcmp(curhost, "*") == 0)
1228 0 : status = StreamServerPort(AF_UNSPEC, NULL,
1229 0 : (unsigned short) PostPortNumber,
1230 : NULL,
1231 : ListenSocket, MAXLISTEN);
1232 : else
1233 44 : status = StreamServerPort(AF_UNSPEC, curhost,
1234 44 : (unsigned short) PostPortNumber,
1235 : NULL,
1236 : ListenSocket, MAXLISTEN);
1237 :
1238 44 : if (status == STATUS_OK)
1239 : {
1240 44 : success++;
1241 : /* record the first successful host addr in lockfile */
1242 44 : if (!listen_addr_saved)
1243 : {
1244 44 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1245 44 : listen_addr_saved = true;
1246 : }
1247 : }
1248 : else
1249 0 : ereport(WARNING,
1250 : (errmsg("could not create listen socket for \"%s\"",
1251 : curhost)));
1252 : }
1253 :
1254 1044 : if (!success && elemlist != NIL)
1255 0 : ereport(FATAL,
1256 : (errmsg("could not create any TCP/IP sockets")));
1257 :
1258 1044 : list_free(elemlist);
1259 1044 : pfree(rawstring);
1260 : }
1261 :
1262 : #ifdef USE_BONJOUR
1263 : /* Register for Bonjour only if we opened TCP socket(s) */
1264 : if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET)
1265 : {
1266 : DNSServiceErrorType err;
1267 :
1268 : /*
1269 : * We pass 0 for interface_index, which will result in registering on
1270 : * all "applicable" interfaces. It's not entirely clear from the
1271 : * DNS-SD docs whether this would be appropriate if we have bound to
1272 : * just a subset of the available network interfaces.
1273 : */
1274 : err = DNSServiceRegister(&bonjour_sdref,
1275 : 0,
1276 : 0,
1277 : bonjour_name,
1278 : "_postgresql._tcp.",
1279 : NULL,
1280 : NULL,
1281 : pg_hton16(PostPortNumber),
1282 : 0,
1283 : NULL,
1284 : NULL,
1285 : NULL);
1286 : if (err != kDNSServiceErr_NoError)
1287 : ereport(LOG,
1288 : (errmsg("DNSServiceRegister() failed: error code %ld",
1289 : (long) err)));
1290 :
1291 : /*
1292 : * We don't bother to read the mDNS daemon's reply, and we expect that
1293 : * it will automatically terminate our registration when the socket is
1294 : * closed at postmaster termination. So there's nothing more to be
1295 : * done here. However, the bonjour_sdref is kept around so that
1296 : * forked children can close their copies of the socket.
1297 : */
1298 : }
1299 : #endif
1300 :
1301 : #ifdef HAVE_UNIX_SOCKETS
1302 1044 : if (Unix_socket_directories)
1303 : {
1304 : char *rawstring;
1305 : List *elemlist;
1306 : ListCell *l;
1307 1044 : int success = 0;
1308 :
1309 : /* Need a modifiable copy of Unix_socket_directories */
1310 1044 : rawstring = pstrdup(Unix_socket_directories);
1311 :
1312 : /* Parse string into list of directories */
1313 1044 : if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1314 : {
1315 : /* syntax error in list */
1316 0 : ereport(FATAL,
1317 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1318 : errmsg("invalid list syntax in parameter \"%s\"",
1319 : "unix_socket_directories")));
1320 : }
1321 :
1322 2086 : foreach(l, elemlist)
1323 : {
1324 1042 : char *socketdir = (char *) lfirst(l);
1325 :
1326 1042 : status = StreamServerPort(AF_UNIX, NULL,
1327 1042 : (unsigned short) PostPortNumber,
1328 : socketdir,
1329 : ListenSocket, MAXLISTEN);
1330 :
1331 1042 : if (status == STATUS_OK)
1332 : {
1333 1042 : success++;
1334 : /* record the first successful Unix socket in lockfile */
1335 1042 : if (success == 1)
1336 1042 : AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1337 : }
1338 : else
1339 0 : ereport(WARNING,
1340 : (errmsg("could not create Unix-domain socket in directory \"%s\"",
1341 : socketdir)));
1342 : }
1343 :
1344 1044 : if (!success && elemlist != NIL)
1345 0 : ereport(FATAL,
1346 : (errmsg("could not create any Unix-domain sockets")));
1347 :
1348 1044 : list_free_deep(elemlist);
1349 1044 : pfree(rawstring);
1350 : }
1351 : #endif
1352 :
1353 : /*
1354 : * check that we have some socket to listen on
1355 : */
1356 1044 : if (ListenSocket[0] == PGINVALID_SOCKET)
1357 0 : ereport(FATAL,
1358 : (errmsg("no socket created for listening")));
1359 :
1360 : /*
1361 : * If no valid TCP ports, write an empty line for listen address,
1362 : * indicating the Unix socket must be used. Note that this line is not
1363 : * added to the lock file until there is a socket backing it.
1364 : */
1365 1044 : if (!listen_addr_saved)
1366 1000 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1367 :
1368 : /*
1369 : * Record postmaster options. We delay this till now to avoid recording
1370 : * bogus options (eg, unusable port number).
1371 : */
1372 1044 : if (!CreateOptsFile(argc, argv, my_exec_path))
1373 0 : ExitPostmaster(1);
1374 :
1375 : /*
1376 : * Write the external PID file if requested
1377 : */
1378 1044 : if (external_pid_file)
1379 : {
1380 0 : FILE *fpidfile = fopen(external_pid_file, "w");
1381 :
1382 0 : if (fpidfile)
1383 : {
1384 0 : fprintf(fpidfile, "%d\n", MyProcPid);
1385 0 : fclose(fpidfile);
1386 :
1387 : /* Make PID file world readable */
1388 0 : if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1389 0 : write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1390 0 : progname, external_pid_file, strerror(errno));
1391 : }
1392 : else
1393 0 : write_stderr("%s: could not write external PID file \"%s\": %s\n",
1394 0 : progname, external_pid_file, strerror(errno));
1395 :
1396 0 : on_proc_exit(unlink_external_pid_file, 0);
1397 : }
1398 :
1399 : /*
1400 : * Remove old temporary files. At this point there can be no other
1401 : * Postgres processes running in this directory, so this should be safe.
1402 : */
1403 1044 : RemovePgTempFiles();
1404 :
1405 : /*
1406 : * Initialize the autovacuum subsystem (again, no process start yet)
1407 : */
1408 1044 : autovac_init();
1409 :
1410 : /*
1411 : * Load configuration files for client authentication.
1412 : */
1413 1044 : if (!load_hba())
1414 : {
1415 : /*
1416 : * It makes no sense to continue if we fail to load the HBA file,
1417 : * since there is no way to connect to the database in this case.
1418 : */
1419 0 : ereport(FATAL,
1420 : (errmsg("could not load pg_hba.conf")));
1421 : }
1422 1044 : if (!load_ident())
1423 : {
1424 : /*
1425 : * We can start up without the IDENT file, although it means that you
1426 : * cannot log in using any of the authentication methods that need a
1427 : * user name mapping. load_ident() already logged the details of error
1428 : * to the log.
1429 : */
1430 : }
1431 :
1432 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1433 :
1434 : /*
1435 : * On macOS, libintl replaces setlocale() with a version that calls
1436 : * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1437 : * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1438 : * the process multithreaded. The postmaster calls sigprocmask() and
1439 : * calls fork() without an immediate exec(), both of which have undefined
1440 : * behavior in a multithreaded program. A multithreaded postmaster is the
1441 : * normal case on Windows, which offers neither fork() nor sigprocmask().
1442 : */
1443 : if (pthread_is_threaded_np() != 0)
1444 : ereport(FATAL,
1445 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1446 : errmsg("postmaster became multithreaded during startup"),
1447 : errhint("Set the LC_ALL environment variable to a valid locale.")));
1448 : #endif
1449 :
1450 : /*
1451 : * Remember postmaster startup time
1452 : */
1453 1044 : PgStartTime = GetCurrentTimestamp();
1454 :
1455 : /*
1456 : * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1457 : * see what's happening.
1458 : */
1459 1044 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1460 :
1461 : /* Start bgwriter and checkpointer so they can help with recovery */
1462 1044 : if (CheckpointerPID == 0)
1463 1044 : CheckpointerPID = StartCheckpointer();
1464 1044 : if (BgWriterPID == 0)
1465 1044 : BgWriterPID = StartBackgroundWriter();
1466 :
1467 : /*
1468 : * We're ready to rock and roll...
1469 : */
1470 1044 : StartupPID = StartupDataBase();
1471 : Assert(StartupPID != 0);
1472 1044 : StartupStatus = STARTUP_RUNNING;
1473 1044 : pmState = PM_STARTUP;
1474 :
1475 : /* Some workers may be scheduled to start now */
1476 1044 : maybe_start_bgworkers();
1477 :
1478 1044 : status = ServerLoop();
1479 :
1480 : /*
1481 : * ServerLoop probably shouldn't ever return, but if it does, close down.
1482 : */
1483 0 : ExitPostmaster(status != STATUS_OK);
1484 :
1485 : abort(); /* not reached */
1486 : }
1487 :
1488 :
1489 : /*
1490 : * on_proc_exit callback to close server's listen sockets
1491 : */
1492 : static void
1493 1038 : CloseServerPorts(int status, Datum arg)
1494 : {
1495 : int i;
1496 :
1497 : /*
1498 : * First, explicitly close all the socket FDs. We used to just let this
1499 : * happen implicitly at postmaster exit, but it's better to close them
1500 : * before we remove the postmaster.pid lockfile; otherwise there's a race
1501 : * condition if a new postmaster wants to re-use the TCP port number.
1502 : */
1503 67470 : for (i = 0; i < MAXLISTEN; i++)
1504 : {
1505 66432 : if (ListenSocket[i] != PGINVALID_SOCKET)
1506 : {
1507 1082 : StreamClose(ListenSocket[i]);
1508 1082 : ListenSocket[i] = PGINVALID_SOCKET;
1509 : }
1510 : }
1511 :
1512 : /*
1513 : * Next, remove any filesystem entries for Unix sockets. To avoid race
1514 : * conditions against incoming postmasters, this must happen after closing
1515 : * the sockets and before removing lock files.
1516 : */
1517 1038 : RemoveSocketFiles();
1518 :
1519 : /*
1520 : * We don't do anything about socket lock files here; those will be
1521 : * removed in a later on_proc_exit callback.
1522 : */
1523 1038 : }
1524 :
1525 : /*
1526 : * on_proc_exit callback to delete external_pid_file
1527 : */
1528 : static void
1529 0 : unlink_external_pid_file(int status, Datum arg)
1530 : {
1531 0 : if (external_pid_file)
1532 0 : unlink(external_pid_file);
1533 0 : }
1534 :
1535 :
1536 : /*
1537 : * Compute and check the directory paths to files that are part of the
1538 : * installation (as deduced from the postgres executable's own location)
1539 : */
1540 : static void
1541 1060 : getInstallationPaths(const char *argv0)
1542 : {
1543 : DIR *pdir;
1544 :
1545 : /* Locate the postgres executable itself */
1546 1060 : if (find_my_exec(argv0, my_exec_path) < 0)
1547 0 : ereport(FATAL,
1548 : (errmsg("%s: could not locate my own executable path", argv0)));
1549 :
1550 : #ifdef EXEC_BACKEND
1551 : /* Locate executable backend before we change working directory */
1552 : if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1553 : postgres_exec_path) < 0)
1554 : ereport(FATAL,
1555 : (errmsg("%s: could not locate matching postgres executable",
1556 : argv0)));
1557 : #endif
1558 :
1559 : /*
1560 : * Locate the pkglib directory --- this has to be set early in case we try
1561 : * to load any modules from it in response to postgresql.conf entries.
1562 : */
1563 1060 : get_pkglib_path(my_exec_path, pkglib_path);
1564 :
1565 : /*
1566 : * Verify that there's a readable directory there; otherwise the Postgres
1567 : * installation is incomplete or corrupt. (A typical cause of this
1568 : * failure is that the postgres executable has been moved or hardlinked to
1569 : * some directory that's not a sibling of the installation lib/
1570 : * directory.)
1571 : */
1572 1060 : pdir = AllocateDir(pkglib_path);
1573 1060 : if (pdir == NULL)
1574 0 : ereport(ERROR,
1575 : (errcode_for_file_access(),
1576 : errmsg("could not open directory \"%s\": %m",
1577 : pkglib_path),
1578 : errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1579 : my_exec_path)));
1580 1060 : FreeDir(pdir);
1581 :
1582 : /*
1583 : * XXX is it worth similarly checking the share/ directory? If the lib/
1584 : * directory is there, then share/ probably is too.
1585 : */
1586 1060 : }
1587 :
1588 : /*
1589 : * Check that pg_control exists in the correct location in the data directory.
1590 : *
1591 : * No attempt is made to validate the contents of pg_control here. This is
1592 : * just a sanity check to see if we are looking at a real data directory.
1593 : */
1594 : static void
1595 1056 : checkControlFile(void)
1596 : {
1597 : char path[MAXPGPATH];
1598 : FILE *fp;
1599 :
1600 1056 : snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1601 :
1602 1056 : fp = AllocateFile(path, PG_BINARY_R);
1603 1056 : if (fp == NULL)
1604 : {
1605 0 : write_stderr("%s: could not find the database system\n"
1606 : "Expected to find it in the directory \"%s\",\n"
1607 : "but could not open file \"%s\": %s\n",
1608 0 : progname, DataDir, path, strerror(errno));
1609 0 : ExitPostmaster(2);
1610 : }
1611 1056 : FreeFile(fp);
1612 1056 : }
1613 :
1614 : /*
1615 : * Determine how long should we let ServerLoop sleep.
1616 : *
1617 : * In normal conditions we wait at most one minute, to ensure that the other
1618 : * background tasks handled by ServerLoop get done even when no requests are
1619 : * arriving. However, if there are background workers waiting to be started,
1620 : * we don't actually sleep so that they are quickly serviced. Other exception
1621 : * cases are as shown in the code.
1622 : */
1623 : static void
1624 36800 : DetermineSleepTime(struct timeval *timeout)
1625 : {
1626 36800 : TimestampTz next_wakeup = 0;
1627 :
1628 : /*
1629 : * Normal case: either there are no background workers at all, or we're in
1630 : * a shutdown sequence (during which we ignore bgworkers altogether).
1631 : */
1632 36800 : if (Shutdown > NoShutdown ||
1633 32140 : (!StartWorkerNeeded && !HaveCrashedWorker))
1634 : {
1635 36800 : if (AbortStartTime != 0)
1636 : {
1637 : /* time left to abort; clamp to 0 in case it already expired */
1638 1452 : timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1639 1452 : (time(NULL) - AbortStartTime);
1640 1452 : timeout->tv_sec = Max(timeout->tv_sec, 0);
1641 1452 : timeout->tv_usec = 0;
1642 : }
1643 : else
1644 : {
1645 35348 : timeout->tv_sec = 60;
1646 35348 : timeout->tv_usec = 0;
1647 : }
1648 36800 : return;
1649 : }
1650 :
1651 0 : if (StartWorkerNeeded)
1652 : {
1653 0 : timeout->tv_sec = 0;
1654 0 : timeout->tv_usec = 0;
1655 0 : return;
1656 : }
1657 :
1658 0 : if (HaveCrashedWorker)
1659 : {
1660 : slist_mutable_iter siter;
1661 :
1662 : /*
1663 : * When there are crashed bgworkers, we sleep just long enough that
1664 : * they are restarted when they request to be. Scan the list to
1665 : * determine the minimum of all wakeup times according to most recent
1666 : * crash time and requested restart interval.
1667 : */
1668 0 : slist_foreach_modify(siter, &BackgroundWorkerList)
1669 : {
1670 : RegisteredBgWorker *rw;
1671 : TimestampTz this_wakeup;
1672 :
1673 0 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1674 :
1675 0 : if (rw->rw_crashed_at == 0)
1676 0 : continue;
1677 :
1678 0 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1679 0 : || rw->rw_terminate)
1680 : {
1681 0 : ForgetBackgroundWorker(&siter);
1682 0 : continue;
1683 : }
1684 :
1685 0 : this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1686 : 1000L * rw->rw_worker.bgw_restart_time);
1687 0 : if (next_wakeup == 0 || this_wakeup < next_wakeup)
1688 0 : next_wakeup = this_wakeup;
1689 : }
1690 : }
1691 :
1692 0 : if (next_wakeup != 0)
1693 : {
1694 : long secs;
1695 : int microsecs;
1696 :
1697 0 : TimestampDifference(GetCurrentTimestamp(), next_wakeup,
1698 : &secs, µsecs);
1699 0 : timeout->tv_sec = secs;
1700 0 : timeout->tv_usec = microsecs;
1701 :
1702 : /* Ensure we don't exceed one minute */
1703 0 : if (timeout->tv_sec > 60)
1704 : {
1705 0 : timeout->tv_sec = 60;
1706 0 : timeout->tv_usec = 0;
1707 : }
1708 : }
1709 : else
1710 : {
1711 0 : timeout->tv_sec = 60;
1712 0 : timeout->tv_usec = 0;
1713 : }
1714 : }
1715 :
1716 : /*
1717 : * Main idle loop of postmaster
1718 : *
1719 : * NB: Needs to be called with signals blocked
1720 : */
1721 : static int
1722 1044 : ServerLoop(void)
1723 : {
1724 : fd_set readmask;
1725 : int nSockets;
1726 : time_t last_lockfile_recheck_time,
1727 : last_touch_time;
1728 :
1729 1044 : last_lockfile_recheck_time = last_touch_time = time(NULL);
1730 :
1731 1044 : nSockets = initMasks(&readmask);
1732 :
1733 : for (;;)
1734 35768 : {
1735 : fd_set rmask;
1736 : int selres;
1737 : time_t now;
1738 :
1739 : /*
1740 : * Wait for a connection request to arrive.
1741 : *
1742 : * We block all signals except while sleeping. That makes it safe for
1743 : * signal handlers, which again block all signals while executing, to
1744 : * do nontrivial work.
1745 : *
1746 : * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1747 : * any new connections, so we don't call select(), and just sleep.
1748 : */
1749 36812 : memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1750 :
1751 36812 : if (pmState == PM_WAIT_DEAD_END)
1752 : {
1753 12 : PG_SETMASK(&UnBlockSig);
1754 :
1755 10 : pg_usleep(100000L); /* 100 msec seems reasonable */
1756 0 : selres = 0;
1757 :
1758 0 : PG_SETMASK(&BlockSig);
1759 : }
1760 : else
1761 : {
1762 : /* must set timeout each time; some OSes change it! */
1763 : struct timeval timeout;
1764 :
1765 : /* Needs to run with blocked signals! */
1766 36800 : DetermineSleepTime(&timeout);
1767 :
1768 36800 : PG_SETMASK(&UnBlockSig);
1769 :
1770 36800 : selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1771 :
1772 35770 : PG_SETMASK(&BlockSig);
1773 : }
1774 :
1775 : /* Now check the select() result */
1776 35770 : if (selres < 0)
1777 : {
1778 20128 : if (errno != EINTR && errno != EWOULDBLOCK)
1779 : {
1780 0 : ereport(LOG,
1781 : (errcode_for_socket_access(),
1782 : errmsg("select() failed in postmaster: %m")));
1783 0 : return STATUS_ERROR;
1784 : }
1785 : }
1786 :
1787 : /*
1788 : * New connection pending on any of our sockets? If so, fork a child
1789 : * process to deal with it.
1790 : */
1791 35770 : if (selres > 0)
1792 : {
1793 : int i;
1794 :
1795 31760 : for (i = 0; i < MAXLISTEN; i++)
1796 : {
1797 31760 : if (ListenSocket[i] == PGINVALID_SOCKET)
1798 15640 : break;
1799 16120 : if (FD_ISSET(ListenSocket[i], &rmask))
1800 : {
1801 : Port *port;
1802 :
1803 15642 : port = ConnCreate(ListenSocket[i]);
1804 15642 : if (port)
1805 : {
1806 15642 : BackendStartup(port);
1807 :
1808 : /*
1809 : * We no longer need the open socket or port structure
1810 : * in this process
1811 : */
1812 15640 : StreamClose(port->sock);
1813 15640 : ConnFree(port);
1814 : }
1815 : }
1816 : }
1817 : }
1818 :
1819 : /* If we have lost the log collector, try to start a new one */
1820 35768 : if (SysLoggerPID == 0 && Logging_collector)
1821 0 : SysLoggerPID = SysLogger_Start();
1822 :
1823 : /*
1824 : * If no background writer process is running, and we are not in a
1825 : * state that prevents it, start one. It doesn't matter if this
1826 : * fails, we'll just try again later. Likewise for the checkpointer.
1827 : */
1828 35768 : if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1829 6374 : pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
1830 : {
1831 31058 : if (CheckpointerPID == 0)
1832 8 : CheckpointerPID = StartCheckpointer();
1833 31058 : if (BgWriterPID == 0)
1834 8 : BgWriterPID = StartBackgroundWriter();
1835 : }
1836 :
1837 : /*
1838 : * Likewise, if we have lost the walwriter process, try to start a new
1839 : * one. But this is needed only in normal operation (else we cannot
1840 : * be writing any new WAL).
1841 : */
1842 35768 : if (WalWriterPID == 0 && pmState == PM_RUN)
1843 0 : WalWriterPID = StartWalWriter();
1844 :
1845 : /*
1846 : * If we have lost the autovacuum launcher, try to start a new one. We
1847 : * don't want autovacuum to run in binary upgrade mode because
1848 : * autovacuum might update relfrozenxid for empty tables before the
1849 : * physical files are put in place.
1850 : */
1851 41618 : if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1852 7598 : (AutoVacuumingActive() || start_autovac_launcher) &&
1853 4102 : pmState == PM_RUN)
1854 : {
1855 0 : AutoVacPID = StartAutoVacLauncher();
1856 0 : if (AutoVacPID != 0)
1857 0 : start_autovac_launcher = false; /* signal processed */
1858 : }
1859 :
1860 : /* If we have lost the archiver, try to start a new one. */
1861 35768 : if (PgArchPID == 0 && PgArchStartupAllowed())
1862 0 : PgArchPID = StartArchiver();
1863 :
1864 : /* If we need to signal the autovacuum launcher, do so now */
1865 35768 : if (avlauncher_needs_signal)
1866 : {
1867 0 : avlauncher_needs_signal = false;
1868 0 : if (AutoVacPID != 0)
1869 0 : kill(AutoVacPID, SIGUSR2);
1870 : }
1871 :
1872 : /* If we need to start a WAL receiver, try to do that now */
1873 35768 : if (WalReceiverRequested)
1874 206 : MaybeStartWalReceiver();
1875 :
1876 : /* Get other worker processes running, if needed */
1877 35768 : if (StartWorkerNeeded || HaveCrashedWorker)
1878 5000 : maybe_start_bgworkers();
1879 :
1880 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1881 :
1882 : /*
1883 : * With assertions enabled, check regularly for appearance of
1884 : * additional threads. All builds check at start and exit.
1885 : */
1886 : Assert(pthread_is_threaded_np() == 0);
1887 : #endif
1888 :
1889 : /*
1890 : * Lastly, check to see if it's time to do some things that we don't
1891 : * want to do every single time through the loop, because they're a
1892 : * bit expensive. Note that there's up to a minute of slop in when
1893 : * these tasks will be performed, since DetermineSleepTime() will let
1894 : * us sleep at most that long; except for SIGKILL timeout which has
1895 : * special-case logic there.
1896 : */
1897 35768 : now = time(NULL);
1898 :
1899 : /*
1900 : * If we already sent SIGQUIT to children and they are slow to shut
1901 : * down, it's time to send them SIGKILL. This doesn't happen
1902 : * normally, but under certain conditions backends can get stuck while
1903 : * shutting down. This is a last measure to get them unwedged.
1904 : *
1905 : * Note we also do this during recovery from a process crash.
1906 : */
1907 35768 : if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1908 1472 : AbortStartTime != 0 &&
1909 1464 : (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1910 : {
1911 : /* We were gentle with them before. Not anymore */
1912 0 : ereport(LOG,
1913 : (errmsg("issuing SIGKILL to recalcitrant children")));
1914 0 : TerminateChildren(SIGKILL);
1915 : /* reset flag so we don't SIGKILL again */
1916 0 : AbortStartTime = 0;
1917 : }
1918 :
1919 : /*
1920 : * Once a minute, verify that postmaster.pid hasn't been removed or
1921 : * overwritten. If it has, we force a shutdown. This avoids having
1922 : * postmasters and child processes hanging around after their database
1923 : * is gone, and maybe causing problems if a new database cluster is
1924 : * created in the same place. It also provides some protection
1925 : * against a DBA foolishly removing postmaster.pid and manually
1926 : * starting a new postmaster. Data corruption is likely to ensue from
1927 : * that anyway, but we can minimize the damage by aborting ASAP.
1928 : */
1929 35768 : if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1930 : {
1931 8 : if (!RecheckDataDirLockFile())
1932 : {
1933 0 : ereport(LOG,
1934 : (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1935 0 : kill(MyProcPid, SIGQUIT);
1936 : }
1937 8 : last_lockfile_recheck_time = now;
1938 : }
1939 :
1940 : /*
1941 : * Touch Unix socket and lock files every 58 minutes, to ensure that
1942 : * they are not removed by overzealous /tmp-cleaning tasks. We assume
1943 : * no one runs cleaners with cutoff times of less than an hour ...
1944 : */
1945 35768 : if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1946 : {
1947 0 : TouchSocketFiles();
1948 0 : TouchSocketLockFiles();
1949 0 : last_touch_time = now;
1950 : }
1951 : }
1952 : }
1953 :
1954 : /*
1955 : * Initialise the masks for select() for the ports we are listening on.
1956 : * Return the number of sockets to listen on.
1957 : */
1958 : static int
1959 1044 : initMasks(fd_set *rmask)
1960 : {
1961 1044 : int maxsock = -1;
1962 : int i;
1963 :
1964 1044 : FD_ZERO(rmask);
1965 :
1966 2132 : for (i = 0; i < MAXLISTEN; i++)
1967 : {
1968 2132 : int fd = ListenSocket[i];
1969 :
1970 2132 : if (fd == PGINVALID_SOCKET)
1971 1044 : break;
1972 1088 : FD_SET(fd, rmask);
1973 :
1974 1088 : if (fd > maxsock)
1975 1088 : maxsock = fd;
1976 : }
1977 :
1978 1044 : return maxsock + 1;
1979 : }
1980 :
1981 :
1982 : /*
1983 : * Read a client's startup packet and do something according to it.
1984 : *
1985 : * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1986 : * not return at all.
1987 : *
1988 : * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1989 : * if that's what you want. Return STATUS_ERROR if you don't want to
1990 : * send anything to the client, which would typically be appropriate
1991 : * if we detect a communications failure.)
1992 : *
1993 : * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1994 : * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1995 : * encryption layer sets both flags, but a rejected negotiation sets only the
1996 : * flag for that layer, since the client may wish to try the other one. We
1997 : * should make no assumption here about the order in which the client may make
1998 : * requests.
1999 : */
2000 : static int
2001 15860 : ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
2002 : {
2003 : int32 len;
2004 : char *buf;
2005 : ProtocolVersion proto;
2006 : MemoryContext oldcontext;
2007 :
2008 15860 : pq_startmsgread();
2009 :
2010 : /*
2011 : * Grab the first byte of the length word separately, so that we can tell
2012 : * whether we have no data at all or an incomplete packet. (This might
2013 : * sound inefficient, but it's not really, because of buffering in
2014 : * pqcomm.c.)
2015 : */
2016 15860 : if (pq_getbytes((char *) &len, 1) == EOF)
2017 : {
2018 : /*
2019 : * If we get no data at all, don't clutter the log with a complaint;
2020 : * such cases often occur for legitimate reasons. An example is that
2021 : * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
2022 : * client didn't like our response, it'll probably just drop the
2023 : * connection. Service-monitoring software also often just opens and
2024 : * closes a connection without sending anything. (So do port
2025 : * scanners, which may be less benign, but it's not really our job to
2026 : * notice those.)
2027 : */
2028 26 : return STATUS_ERROR;
2029 : }
2030 :
2031 15834 : if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
2032 : {
2033 : /* Got a partial length word, so bleat about that */
2034 0 : if (!ssl_done && !gss_done)
2035 0 : ereport(COMMERROR,
2036 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2037 : errmsg("incomplete startup packet")));
2038 0 : return STATUS_ERROR;
2039 : }
2040 :
2041 15834 : len = pg_ntoh32(len);
2042 15834 : len -= 4;
2043 :
2044 15834 : if (len < (int32) sizeof(ProtocolVersion) ||
2045 15834 : len > MAX_STARTUP_PACKET_LENGTH)
2046 : {
2047 0 : ereport(COMMERROR,
2048 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2049 : errmsg("invalid length of startup packet")));
2050 0 : return STATUS_ERROR;
2051 : }
2052 :
2053 : /*
2054 : * Allocate space to hold the startup packet, plus one extra byte that's
2055 : * initialized to be zero. This ensures we will have null termination of
2056 : * all strings inside the packet.
2057 : */
2058 15834 : buf = palloc(len + 1);
2059 15834 : buf[len] = '\0';
2060 :
2061 15834 : if (pq_getbytes(buf, len) == EOF)
2062 : {
2063 0 : ereport(COMMERROR,
2064 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2065 : errmsg("incomplete startup packet")));
2066 0 : return STATUS_ERROR;
2067 : }
2068 15834 : pq_endmsgread();
2069 :
2070 : /*
2071 : * The first field is either a protocol version number or a special
2072 : * request code.
2073 : */
2074 15834 : port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2075 :
2076 15834 : if (proto == CANCEL_REQUEST_CODE)
2077 : {
2078 4 : processCancelRequest(port, buf);
2079 : /* Not really an error, but we don't want to proceed further */
2080 4 : return STATUS_ERROR;
2081 : }
2082 :
2083 15830 : if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2084 : {
2085 : char SSLok;
2086 :
2087 : #ifdef USE_SSL
2088 : /* No SSL when disabled or on Unix sockets */
2089 472 : if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2090 282 : SSLok = 'N';
2091 : else
2092 190 : SSLok = 'S'; /* Support for SSL */
2093 : #else
2094 : SSLok = 'N'; /* No support for SSL */
2095 : #endif
2096 :
2097 472 : retry1:
2098 472 : if (send(port->sock, &SSLok, 1, 0) != 1)
2099 : {
2100 0 : if (errno == EINTR)
2101 0 : goto retry1; /* if interrupted, just retry */
2102 0 : ereport(COMMERROR,
2103 : (errcode_for_socket_access(),
2104 : errmsg("failed to send SSL negotiation response: %m")));
2105 0 : return STATUS_ERROR; /* close the connection */
2106 : }
2107 :
2108 : #ifdef USE_SSL
2109 472 : if (SSLok == 'S' && secure_open_server(port) == -1)
2110 28 : return STATUS_ERROR;
2111 : #endif
2112 :
2113 : /*
2114 : * At this point we should have no data already buffered. If we do,
2115 : * it was received before we performed the SSL handshake, so it wasn't
2116 : * encrypted and indeed may have been injected by a man-in-the-middle.
2117 : * We report this case to the client.
2118 : */
2119 444 : if (pq_buffer_has_data())
2120 0 : ereport(FATAL,
2121 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2122 : errmsg("received unencrypted data after SSL request"),
2123 : errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2124 :
2125 : /*
2126 : * regular startup packet, cancel, etc packet should follow, but not
2127 : * another SSL negotiation request, and a GSS request should only
2128 : * follow if SSL was rejected (client may negotiate in either order)
2129 : */
2130 444 : return ProcessStartupPacket(port, true, SSLok == 'S');
2131 : }
2132 15358 : else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2133 : {
2134 0 : char GSSok = 'N';
2135 :
2136 : #ifdef ENABLE_GSS
2137 : /* No GSSAPI encryption when on Unix socket */
2138 : if (port->laddr.addr.ss_family != AF_UNIX)
2139 : GSSok = 'G';
2140 : #endif
2141 :
2142 0 : while (send(port->sock, &GSSok, 1, 0) != 1)
2143 : {
2144 0 : if (errno == EINTR)
2145 0 : continue;
2146 0 : ereport(COMMERROR,
2147 : (errcode_for_socket_access(),
2148 : errmsg("failed to send GSSAPI negotiation response: %m")));
2149 0 : return STATUS_ERROR; /* close the connection */
2150 : }
2151 :
2152 : #ifdef ENABLE_GSS
2153 : if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2154 : return STATUS_ERROR;
2155 : #endif
2156 :
2157 : /*
2158 : * At this point we should have no data already buffered. If we do,
2159 : * it was received before we performed the GSS handshake, so it wasn't
2160 : * encrypted and indeed may have been injected by a man-in-the-middle.
2161 : * We report this case to the client.
2162 : */
2163 0 : if (pq_buffer_has_data())
2164 0 : ereport(FATAL,
2165 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2166 : errmsg("received unencrypted data after GSSAPI encryption request"),
2167 : errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2168 :
2169 : /*
2170 : * regular startup packet, cancel, etc packet should follow, but not
2171 : * another GSS negotiation request, and an SSL request should only
2172 : * follow if GSS was rejected (client may negotiate in either order)
2173 : */
2174 0 : return ProcessStartupPacket(port, GSSok == 'G', true);
2175 : }
2176 :
2177 : /* Could add additional special packet types here */
2178 :
2179 : /*
2180 : * Set FrontendProtocol now so that ereport() knows what format to send if
2181 : * we fail during startup.
2182 : */
2183 15358 : FrontendProtocol = proto;
2184 :
2185 : /* Check that the major protocol version is in range. */
2186 15358 : if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
2187 15358 : PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST))
2188 0 : ereport(FATAL,
2189 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2190 : errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2191 : PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2192 : PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
2193 : PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
2194 : PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
2195 :
2196 : /*
2197 : * Now fetch parameters out of startup packet and save them into the Port
2198 : * structure. All data structures attached to the Port struct must be
2199 : * allocated in TopMemoryContext so that they will remain available in a
2200 : * running backend (even after PostmasterContext is destroyed). We need
2201 : * not worry about leaking this storage on failure, since we aren't in the
2202 : * postmaster process anymore.
2203 : */
2204 15358 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
2205 :
2206 : /* Handle protocol version 3 startup packet */
2207 : {
2208 15358 : int32 offset = sizeof(ProtocolVersion);
2209 15358 : List *unrecognized_protocol_options = NIL;
2210 :
2211 : /*
2212 : * Scan packet body for name/option pairs. We can assume any string
2213 : * beginning within the packet body is null-terminated, thanks to
2214 : * zeroing extra byte above.
2215 : */
2216 15358 : port->guc_options = NIL;
2217 :
2218 76848 : while (offset < len)
2219 : {
2220 76848 : char *nameptr = buf + offset;
2221 : int32 valoffset;
2222 : char *valptr;
2223 :
2224 76848 : if (*nameptr == '\0')
2225 15358 : break; /* found packet terminator */
2226 61490 : valoffset = offset + strlen(nameptr) + 1;
2227 61490 : if (valoffset >= len)
2228 0 : break; /* missing value, will complain below */
2229 61490 : valptr = buf + valoffset;
2230 :
2231 61490 : if (strcmp(nameptr, "database") == 0)
2232 15358 : port->database_name = pstrdup(valptr);
2233 46132 : else if (strcmp(nameptr, "user") == 0)
2234 15358 : port->user_name = pstrdup(valptr);
2235 30774 : else if (strcmp(nameptr, "options") == 0)
2236 4828 : port->cmdline_options = pstrdup(valptr);
2237 25946 : else if (strcmp(nameptr, "replication") == 0)
2238 : {
2239 : /*
2240 : * Due to backward compatibility concerns the replication
2241 : * parameter is a hybrid beast which allows the value to be
2242 : * either boolean or the string 'database'. The latter
2243 : * connects to a specific database which is e.g. required for
2244 : * logical decoding while.
2245 : */
2246 1314 : if (strcmp(valptr, "database") == 0)
2247 : {
2248 702 : am_walsender = true;
2249 702 : am_db_walsender = true;
2250 : }
2251 612 : else if (!parse_bool(valptr, &am_walsender))
2252 0 : ereport(FATAL,
2253 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2254 : errmsg("invalid value for parameter \"%s\": \"%s\"",
2255 : "replication",
2256 : valptr),
2257 : errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2258 : }
2259 24632 : else if (strncmp(nameptr, "_pq_.", 5) == 0)
2260 : {
2261 : /*
2262 : * Any option beginning with _pq_. is reserved for use as a
2263 : * protocol-level option, but at present no such options are
2264 : * defined.
2265 : */
2266 : unrecognized_protocol_options =
2267 0 : lappend(unrecognized_protocol_options, pstrdup(nameptr));
2268 : }
2269 : else
2270 : {
2271 : /* Assume it's a generic GUC option */
2272 24632 : port->guc_options = lappend(port->guc_options,
2273 24632 : pstrdup(nameptr));
2274 24632 : port->guc_options = lappend(port->guc_options,
2275 24632 : pstrdup(valptr));
2276 :
2277 : /*
2278 : * Copy application_name to port if we come across it. This
2279 : * is done so we can log the application_name in the
2280 : * connection authorization message. Note that the GUC would
2281 : * be used but we haven't gone through GUC setup yet.
2282 : */
2283 24632 : if (strcmp(nameptr, "application_name") == 0)
2284 : {
2285 15356 : char *tmp_app_name = pstrdup(valptr);
2286 :
2287 15356 : pg_clean_ascii(tmp_app_name);
2288 :
2289 15356 : port->application_name = tmp_app_name;
2290 : }
2291 : }
2292 61490 : offset = valoffset + strlen(valptr) + 1;
2293 : }
2294 :
2295 : /*
2296 : * If we didn't find a packet terminator exactly at the end of the
2297 : * given packet length, complain.
2298 : */
2299 15358 : if (offset != len - 1)
2300 0 : ereport(FATAL,
2301 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
2302 : errmsg("invalid startup packet layout: expected terminator as last byte")));
2303 :
2304 : /*
2305 : * If the client requested a newer protocol version or if the client
2306 : * requested any protocol options we didn't recognize, let them know
2307 : * the newest minor protocol version we do support and the names of
2308 : * any unrecognized options.
2309 : */
2310 15358 : if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) ||
2311 : unrecognized_protocol_options != NIL)
2312 0 : SendNegotiateProtocolVersion(unrecognized_protocol_options);
2313 : }
2314 :
2315 : /* Check a user name was given. */
2316 15358 : if (port->user_name == NULL || port->user_name[0] == '\0')
2317 0 : ereport(FATAL,
2318 : (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2319 : errmsg("no PostgreSQL user name specified in startup packet")));
2320 :
2321 : /* The database defaults to the user name. */
2322 15358 : if (port->database_name == NULL || port->database_name[0] == '\0')
2323 0 : port->database_name = pstrdup(port->user_name);
2324 :
2325 15358 : if (Db_user_namespace)
2326 : {
2327 : /*
2328 : * If user@, it is a global user, remove '@'. We only want to do this
2329 : * if there is an '@' at the end and no earlier in the user string or
2330 : * they may fake as a local user of another database attaching to this
2331 : * database.
2332 : */
2333 0 : if (strchr(port->user_name, '@') ==
2334 0 : port->user_name + strlen(port->user_name) - 1)
2335 0 : *strchr(port->user_name, '@') = '\0';
2336 : else
2337 : {
2338 : /* Append '@' and dbname */
2339 0 : port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2340 : }
2341 : }
2342 :
2343 : /*
2344 : * Truncate given database and user names to length of a Postgres name.
2345 : * This avoids lookup failures when overlength names are given.
2346 : */
2347 15358 : if (strlen(port->database_name) >= NAMEDATALEN)
2348 0 : port->database_name[NAMEDATALEN - 1] = '\0';
2349 15358 : if (strlen(port->user_name) >= NAMEDATALEN)
2350 0 : port->user_name[NAMEDATALEN - 1] = '\0';
2351 :
2352 15358 : if (am_walsender)
2353 1314 : MyBackendType = B_WAL_SENDER;
2354 : else
2355 14044 : MyBackendType = B_BACKEND;
2356 :
2357 : /*
2358 : * Normal walsender backends, e.g. for streaming replication, are not
2359 : * connected to a particular database. But walsenders used for logical
2360 : * replication need to connect to a specific database. We allow streaming
2361 : * replication commands to be issued even if connected to a database as it
2362 : * can make sense to first make a basebackup and then stream changes
2363 : * starting from that.
2364 : */
2365 15358 : if (am_walsender && !am_db_walsender)
2366 612 : port->database_name[0] = '\0';
2367 :
2368 : /*
2369 : * Done putting stuff in TopMemoryContext.
2370 : */
2371 15358 : MemoryContextSwitchTo(oldcontext);
2372 :
2373 : /*
2374 : * If we're going to reject the connection due to database state, say so
2375 : * now instead of wasting cycles on an authentication exchange. (This also
2376 : * allows a pg_ping utility to be written.)
2377 : */
2378 15358 : switch (port->canAcceptConnections)
2379 : {
2380 34 : case CAC_STARTUP:
2381 34 : ereport(FATAL,
2382 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2383 : errmsg("the database system is starting up")));
2384 : break;
2385 8 : case CAC_NOTCONSISTENT:
2386 8 : if (EnableHotStandby)
2387 8 : ereport(FATAL,
2388 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2389 : errmsg("the database system is not yet accepting connections"),
2390 : errdetail("Consistent recovery state has not been yet reached.")));
2391 : else
2392 0 : ereport(FATAL,
2393 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2394 : errmsg("the database system is not accepting connections"),
2395 : errdetail("Hot standby mode is disabled.")));
2396 : break;
2397 4 : case CAC_SHUTDOWN:
2398 4 : ereport(FATAL,
2399 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2400 : errmsg("the database system is shutting down")));
2401 : break;
2402 0 : case CAC_RECOVERY:
2403 0 : ereport(FATAL,
2404 : (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2405 : errmsg("the database system is in recovery mode")));
2406 : break;
2407 0 : case CAC_TOOMANY:
2408 0 : ereport(FATAL,
2409 : (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2410 : errmsg("sorry, too many clients already")));
2411 : break;
2412 15312 : case CAC_OK:
2413 15312 : break;
2414 : }
2415 :
2416 15312 : return STATUS_OK;
2417 : }
2418 :
2419 : /*
2420 : * Send a NegotiateProtocolVersion to the client. This lets the client know
2421 : * that they have requested a newer minor protocol version than we are able
2422 : * to speak. We'll speak the highest version we know about; the client can,
2423 : * of course, abandon the connection if that's a problem.
2424 : *
2425 : * We also include in the response a list of protocol options we didn't
2426 : * understand. This allows clients to include optional parameters that might
2427 : * be present either in newer protocol versions or third-party protocol
2428 : * extensions without fear of having to reconnect if those options are not
2429 : * understood, while at the same time making certain that the client is aware
2430 : * of which options were actually accepted.
2431 : */
2432 : static void
2433 0 : SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2434 : {
2435 : StringInfoData buf;
2436 : ListCell *lc;
2437 :
2438 0 : pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2439 0 : pq_sendint32(&buf, PG_PROTOCOL_LATEST);
2440 0 : pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2441 0 : foreach(lc, unrecognized_protocol_options)
2442 0 : pq_sendstring(&buf, lfirst(lc));
2443 0 : pq_endmessage(&buf);
2444 :
2445 : /* no need to flush, some other message will follow */
2446 0 : }
2447 :
2448 : /*
2449 : * The client has sent a cancel request packet, not a normal
2450 : * start-a-new-connection packet. Perform the necessary processing.
2451 : * Nothing is sent back to the client.
2452 : */
2453 : static void
2454 4 : processCancelRequest(Port *port, void *pkt)
2455 : {
2456 4 : CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2457 : int backendPID;
2458 : int32 cancelAuthCode;
2459 : Backend *bp;
2460 :
2461 : #ifndef EXEC_BACKEND
2462 : dlist_iter iter;
2463 : #else
2464 : int i;
2465 : #endif
2466 :
2467 4 : backendPID = (int) pg_ntoh32(canc->backendPID);
2468 4 : cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2469 :
2470 : /*
2471 : * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2472 : * longer access the postmaster's own backend list, and must rely on the
2473 : * duplicate array in shared memory.
2474 : */
2475 : #ifndef EXEC_BACKEND
2476 6 : dlist_foreach(iter, &BackendList)
2477 : {
2478 6 : bp = dlist_container(Backend, elem, iter.cur);
2479 : #else
2480 : for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2481 : {
2482 : bp = (Backend *) &ShmemBackendArray[i];
2483 : #endif
2484 6 : if (bp->pid == backendPID)
2485 : {
2486 4 : if (bp->cancel_key == cancelAuthCode)
2487 : {
2488 : /* Found a match; signal that backend to cancel current op */
2489 4 : ereport(DEBUG2,
2490 : (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2491 : backendPID)));
2492 4 : signal_child(bp->pid, SIGINT);
2493 : }
2494 : else
2495 : /* Right PID, wrong key: no way, Jose */
2496 0 : ereport(LOG,
2497 : (errmsg("wrong key in cancel request for process %d",
2498 : backendPID)));
2499 4 : return;
2500 : }
2501 : #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2502 : }
2503 : #else
2504 : }
2505 : #endif
2506 :
2507 : /* No matching backend */
2508 0 : ereport(LOG,
2509 : (errmsg("PID %d in cancel request did not match any process",
2510 : backendPID)));
2511 : }
2512 :
2513 : /*
2514 : * canAcceptConnections --- check to see if database state allows connections
2515 : * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
2516 : * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
2517 : * know whether a NORMAL connection might turn into a walsender.)
2518 : */
2519 : static CAC_state
2520 19650 : canAcceptConnections(int backend_type)
2521 : {
2522 19650 : CAC_state result = CAC_OK;
2523 :
2524 : /*
2525 : * Can't start backends when in startup/shutdown/inconsistent recovery
2526 : * state. We treat autovac workers the same as user backends for this
2527 : * purpose. However, bgworkers are excluded from this test; we expect
2528 : * bgworker_should_start_now() decided whether the DB state allows them.
2529 : */
2530 19650 : if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2531 : backend_type != BACKEND_TYPE_BGWORKER)
2532 : {
2533 46 : if (Shutdown > NoShutdown)
2534 4 : return CAC_SHUTDOWN; /* shutdown is pending */
2535 42 : else if (!FatalError && pmState == PM_STARTUP)
2536 34 : return CAC_STARTUP; /* normal startup */
2537 8 : else if (!FatalError && pmState == PM_RECOVERY)
2538 8 : return CAC_NOTCONSISTENT; /* not yet at consistent recovery
2539 : * state */
2540 : else
2541 0 : return CAC_RECOVERY; /* else must be crash recovery */
2542 : }
2543 :
2544 : /*
2545 : * "Smart shutdown" restrictions are applied only to normal connections,
2546 : * not to autovac workers or bgworkers.
2547 : */
2548 19604 : if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2549 0 : return CAC_SHUTDOWN; /* shutdown is pending */
2550 :
2551 : /*
2552 : * Don't start too many children.
2553 : *
2554 : * We allow more connections here than we can have backends because some
2555 : * might still be authenticating; they might fail auth, or some existing
2556 : * backend might exit before the auth cycle is completed. The exact
2557 : * MaxBackends limit is enforced when a new backend tries to join the
2558 : * shared-inval backend array.
2559 : *
2560 : * The limit here must match the sizes of the per-child-process arrays;
2561 : * see comments for MaxLivePostmasterChildren().
2562 : */
2563 19604 : if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
2564 0 : result = CAC_TOOMANY;
2565 :
2566 19604 : return result;
2567 : }
2568 :
2569 :
2570 : /*
2571 : * ConnCreate -- create a local connection data structure
2572 : *
2573 : * Returns NULL on failure, other than out-of-memory which is fatal.
2574 : */
2575 : static Port *
2576 15642 : ConnCreate(int serverFd)
2577 : {
2578 : Port *port;
2579 :
2580 15642 : if (!(port = (Port *) calloc(1, sizeof(Port))))
2581 : {
2582 0 : ereport(LOG,
2583 : (errcode(ERRCODE_OUT_OF_MEMORY),
2584 : errmsg("out of memory")));
2585 0 : ExitPostmaster(1);
2586 : }
2587 :
2588 15642 : if (StreamConnection(serverFd, port) != STATUS_OK)
2589 : {
2590 0 : if (port->sock != PGINVALID_SOCKET)
2591 0 : StreamClose(port->sock);
2592 0 : ConnFree(port);
2593 0 : return NULL;
2594 : }
2595 :
2596 15642 : return port;
2597 : }
2598 :
2599 :
2600 : /*
2601 : * ConnFree -- free a local connection data structure
2602 : *
2603 : * Caller has already closed the socket if any, so there's not much
2604 : * to do here.
2605 : */
2606 : static void
2607 15640 : ConnFree(Port *conn)
2608 : {
2609 15640 : free(conn);
2610 15640 : }
2611 :
2612 :
2613 : /*
2614 : * ClosePostmasterPorts -- close all the postmaster's open sockets
2615 : *
2616 : * This is called during child process startup to release file descriptors
2617 : * that are not needed by that child process. The postmaster still has
2618 : * them open, of course.
2619 : *
2620 : * Note: we pass am_syslogger as a boolean because we don't want to set
2621 : * the global variable yet when this is called.
2622 : */
2623 : void
2624 22688 : ClosePostmasterPorts(bool am_syslogger)
2625 : {
2626 : int i;
2627 :
2628 : #ifndef WIN32
2629 :
2630 : /*
2631 : * Close the write end of postmaster death watch pipe. It's important to
2632 : * do this as early as possible, so that if postmaster dies, others won't
2633 : * think that it's still running because we're holding the pipe open.
2634 : */
2635 22688 : if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0)
2636 0 : ereport(FATAL,
2637 : (errcode_for_file_access(),
2638 : errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2639 22688 : postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
2640 : /* Notify fd.c that we released one pipe FD. */
2641 22688 : ReleaseExternalFD();
2642 : #endif
2643 :
2644 : /*
2645 : * Close the postmaster's listen sockets. These aren't tracked by fd.c,
2646 : * so we don't call ReleaseExternalFD() here.
2647 : */
2648 1474720 : for (i = 0; i < MAXLISTEN; i++)
2649 : {
2650 1452032 : if (ListenSocket[i] != PGINVALID_SOCKET)
2651 : {
2652 23522 : StreamClose(ListenSocket[i]);
2653 23522 : ListenSocket[i] = PGINVALID_SOCKET;
2654 : }
2655 : }
2656 :
2657 : /*
2658 : * If using syslogger, close the read side of the pipe. We don't bother
2659 : * tracking this in fd.c, either.
2660 : */
2661 22688 : if (!am_syslogger)
2662 : {
2663 : #ifndef WIN32
2664 22686 : if (syslogPipe[0] >= 0)
2665 28 : close(syslogPipe[0]);
2666 22686 : syslogPipe[0] = -1;
2667 : #else
2668 : if (syslogPipe[0])
2669 : CloseHandle(syslogPipe[0]);
2670 : syslogPipe[0] = 0;
2671 : #endif
2672 : }
2673 :
2674 : #ifdef USE_BONJOUR
2675 : /* If using Bonjour, close the connection to the mDNS daemon */
2676 : if (bonjour_sdref)
2677 : close(DNSServiceRefSockFD(bonjour_sdref));
2678 : #endif
2679 22688 : }
2680 :
2681 :
2682 : /*
2683 : * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2684 : *
2685 : * Called early in the postmaster and every backend.
2686 : */
2687 : void
2688 25898 : InitProcessGlobals(void)
2689 : {
2690 25898 : MyProcPid = getpid();
2691 25898 : MyStartTimestamp = GetCurrentTimestamp();
2692 25898 : MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2693 :
2694 : /*
2695 : * Set a different global seed in every process. We want something
2696 : * unpredictable, so if possible, use high-quality random bits for the
2697 : * seed. Otherwise, fall back to a seed based on timestamp and PID.
2698 : */
2699 25898 : if (unlikely(!pg_prng_strong_seed(&pg_global_prng_state)))
2700 : {
2701 : uint64 rseed;
2702 :
2703 : /*
2704 : * Since PIDs and timestamps tend to change more frequently in their
2705 : * least significant bits, shift the timestamp left to allow a larger
2706 : * total number of seeds in a given time period. Since that would
2707 : * leave only 20 bits of the timestamp that cycle every ~1 second,
2708 : * also mix in some higher bits.
2709 : */
2710 0 : rseed = ((uint64) MyProcPid) ^
2711 0 : ((uint64) MyStartTimestamp << 12) ^
2712 0 : ((uint64) MyStartTimestamp >> 20);
2713 :
2714 0 : pg_prng_seed(&pg_global_prng_state, rseed);
2715 : }
2716 :
2717 : /*
2718 : * Also make sure that we've set a good seed for random(3). Use of that
2719 : * is deprecated in core Postgres, but extensions might use it.
2720 : */
2721 : #ifndef WIN32
2722 25898 : srandom(pg_prng_uint32(&pg_global_prng_state));
2723 : #endif
2724 25898 : }
2725 :
2726 :
2727 : /*
2728 : * reset_shared -- reset shared memory and semaphores
2729 : */
2730 : static void
2731 1054 : reset_shared(void)
2732 : {
2733 : /*
2734 : * Create or re-create shared memory and semaphores.
2735 : *
2736 : * Note: in each "cycle of life" we will normally assign the same IPC keys
2737 : * (if using SysV shmem and/or semas). This helps ensure that we will
2738 : * clean up dead IPC objects if the postmaster crashes and is restarted.
2739 : */
2740 1054 : CreateSharedMemoryAndSemaphores();
2741 1052 : }
2742 :
2743 :
2744 : /*
2745 : * SIGHUP -- reread config files, and tell children to do same
2746 : */
2747 : static void
2748 112 : SIGHUP_handler(SIGNAL_ARGS)
2749 : {
2750 112 : int save_errno = errno;
2751 :
2752 : /*
2753 : * We rely on the signal mechanism to have blocked all signals ... except
2754 : * on Windows, which lacks sigaction(), so we have to do it manually.
2755 : */
2756 : #ifdef WIN32
2757 : PG_SETMASK(&BlockSig);
2758 : #endif
2759 :
2760 112 : if (Shutdown <= SmartShutdown)
2761 : {
2762 112 : ereport(LOG,
2763 : (errmsg("received SIGHUP, reloading configuration files")));
2764 112 : ProcessConfigFile(PGC_SIGHUP);
2765 112 : SignalChildren(SIGHUP);
2766 112 : if (StartupPID != 0)
2767 16 : signal_child(StartupPID, SIGHUP);
2768 112 : if (BgWriterPID != 0)
2769 112 : signal_child(BgWriterPID, SIGHUP);
2770 112 : if (CheckpointerPID != 0)
2771 112 : signal_child(CheckpointerPID, SIGHUP);
2772 112 : if (WalWriterPID != 0)
2773 96 : signal_child(WalWriterPID, SIGHUP);
2774 112 : if (WalReceiverPID != 0)
2775 12 : signal_child(WalReceiverPID, SIGHUP);
2776 112 : if (AutoVacPID != 0)
2777 92 : signal_child(AutoVacPID, SIGHUP);
2778 112 : if (PgArchPID != 0)
2779 6 : signal_child(PgArchPID, SIGHUP);
2780 112 : if (SysLoggerPID != 0)
2781 0 : signal_child(SysLoggerPID, SIGHUP);
2782 :
2783 : /* Reload authentication config files too */
2784 112 : if (!load_hba())
2785 0 : ereport(LOG,
2786 : /* translator: %s is a configuration file */
2787 : (errmsg("%s was not reloaded", "pg_hba.conf")));
2788 :
2789 112 : if (!load_ident())
2790 0 : ereport(LOG,
2791 : (errmsg("%s was not reloaded", "pg_ident.conf")));
2792 :
2793 : #ifdef USE_SSL
2794 : /* Reload SSL configuration as well */
2795 112 : if (EnableSSL)
2796 : {
2797 0 : if (secure_initialize(false) == 0)
2798 0 : LoadedSSL = true;
2799 : else
2800 0 : ereport(LOG,
2801 : (errmsg("SSL configuration was not reloaded")));
2802 : }
2803 : else
2804 : {
2805 112 : secure_destroy();
2806 112 : LoadedSSL = false;
2807 : }
2808 : #endif
2809 :
2810 : #ifdef EXEC_BACKEND
2811 : /* Update the starting-point file for future children */
2812 : write_nondefault_variables(PGC_SIGHUP);
2813 : #endif
2814 : }
2815 :
2816 : #ifdef WIN32
2817 : PG_SETMASK(&UnBlockSig);
2818 : #endif
2819 :
2820 112 : errno = save_errno;
2821 112 : }
2822 :
2823 :
2824 : /*
2825 : * pmdie -- signal handler for processing various postmaster signals.
2826 : */
2827 : static void
2828 1032 : pmdie(SIGNAL_ARGS)
2829 : {
2830 1032 : int save_errno = errno;
2831 :
2832 : /*
2833 : * We rely on the signal mechanism to have blocked all signals ... except
2834 : * on Windows, which lacks sigaction(), so we have to do it manually.
2835 : */
2836 : #ifdef WIN32
2837 : PG_SETMASK(&BlockSig);
2838 : #endif
2839 :
2840 1032 : ereport(DEBUG2,
2841 : (errmsg_internal("postmaster received signal %d",
2842 : postgres_signal_arg)));
2843 :
2844 1032 : switch (postgres_signal_arg)
2845 : {
2846 8 : case SIGTERM:
2847 :
2848 : /*
2849 : * Smart Shutdown:
2850 : *
2851 : * Wait for children to end their work, then shut down.
2852 : */
2853 8 : if (Shutdown >= SmartShutdown)
2854 0 : break;
2855 8 : Shutdown = SmartShutdown;
2856 8 : ereport(LOG,
2857 : (errmsg("received smart shutdown request")));
2858 :
2859 : /* Report status */
2860 8 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2861 : #ifdef USE_SYSTEMD
2862 : sd_notify(0, "STOPPING=1");
2863 : #endif
2864 :
2865 : /*
2866 : * If we reached normal running, we go straight to waiting for
2867 : * client backends to exit. If already in PM_STOP_BACKENDS or a
2868 : * later state, do not change it.
2869 : */
2870 8 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2871 8 : connsAllowed = false;
2872 0 : else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2873 : {
2874 : /* There should be no clients, so proceed to stop children */
2875 0 : pmState = PM_STOP_BACKENDS;
2876 : }
2877 :
2878 : /*
2879 : * Now wait for online backup mode to end and backends to exit. If
2880 : * that is already the case, PostmasterStateMachine will take the
2881 : * next step.
2882 : */
2883 8 : PostmasterStateMachine();
2884 8 : break;
2885 :
2886 610 : case SIGINT:
2887 :
2888 : /*
2889 : * Fast Shutdown:
2890 : *
2891 : * Abort all children with SIGTERM (rollback active transactions
2892 : * and exit) and shut down when they are gone.
2893 : */
2894 610 : if (Shutdown >= FastShutdown)
2895 0 : break;
2896 610 : Shutdown = FastShutdown;
2897 610 : ereport(LOG,
2898 : (errmsg("received fast shutdown request")));
2899 :
2900 : /* Report status */
2901 610 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2902 : #ifdef USE_SYSTEMD
2903 : sd_notify(0, "STOPPING=1");
2904 : #endif
2905 :
2906 610 : if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2907 : {
2908 : /* Just shut down background processes silently */
2909 0 : pmState = PM_STOP_BACKENDS;
2910 : }
2911 610 : else if (pmState == PM_RUN ||
2912 56 : pmState == PM_HOT_STANDBY)
2913 : {
2914 : /* Report that we're about to zap live client sessions */
2915 610 : ereport(LOG,
2916 : (errmsg("aborting any active transactions")));
2917 610 : pmState = PM_STOP_BACKENDS;
2918 : }
2919 :
2920 : /*
2921 : * PostmasterStateMachine will issue any necessary signals, or
2922 : * take the next step if no child processes need to be killed.
2923 : */
2924 610 : PostmasterStateMachine();
2925 610 : break;
2926 :
2927 414 : case SIGQUIT:
2928 :
2929 : /*
2930 : * Immediate Shutdown:
2931 : *
2932 : * abort all children with SIGQUIT, wait for them to exit,
2933 : * terminate remaining ones with SIGKILL, then exit without
2934 : * attempt to properly shut down the data base system.
2935 : */
2936 414 : if (Shutdown >= ImmediateShutdown)
2937 0 : break;
2938 414 : Shutdown = ImmediateShutdown;
2939 414 : ereport(LOG,
2940 : (errmsg("received immediate shutdown request")));
2941 :
2942 : /* Report status */
2943 414 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2944 : #ifdef USE_SYSTEMD
2945 : sd_notify(0, "STOPPING=1");
2946 : #endif
2947 :
2948 : /* tell children to shut down ASAP */
2949 414 : SetQuitSignalReason(PMQUIT_FOR_STOP);
2950 414 : TerminateChildren(SIGQUIT);
2951 414 : pmState = PM_WAIT_BACKENDS;
2952 :
2953 : /* set stopwatch for them to die */
2954 414 : AbortStartTime = time(NULL);
2955 :
2956 : /*
2957 : * Now wait for backends to exit. If there are none,
2958 : * PostmasterStateMachine will take the next step.
2959 : */
2960 414 : PostmasterStateMachine();
2961 414 : break;
2962 : }
2963 :
2964 : #ifdef WIN32
2965 : PG_SETMASK(&UnBlockSig);
2966 : #endif
2967 :
2968 1032 : errno = save_errno;
2969 1032 : }
2970 :
2971 : /*
2972 : * Reaper -- signal handler to cleanup after a child process dies.
2973 : */
2974 : static void
2975 23456 : reaper(SIGNAL_ARGS)
2976 : {
2977 23456 : int save_errno = errno;
2978 : int pid; /* process id of dead child process */
2979 : int exitstatus; /* its exit status */
2980 :
2981 : /*
2982 : * We rely on the signal mechanism to have blocked all signals ... except
2983 : * on Windows, which lacks sigaction(), so we have to do it manually.
2984 : */
2985 : #ifdef WIN32
2986 : PG_SETMASK(&BlockSig);
2987 : #endif
2988 :
2989 23456 : ereport(DEBUG4,
2990 : (errmsg_internal("reaping dead processes")));
2991 :
2992 48356 : while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2993 : {
2994 : /*
2995 : * Check if this child was a startup process.
2996 : */
2997 24904 : if (pid == StartupPID)
2998 : {
2999 1052 : StartupPID = 0;
3000 :
3001 : /*
3002 : * Startup process exited in response to a shutdown request (or it
3003 : * completed normally regardless of the shutdown request).
3004 : */
3005 1052 : if (Shutdown > NoShutdown &&
3006 130 : (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
3007 : {
3008 56 : StartupStatus = STARTUP_NOT_RUNNING;
3009 56 : pmState = PM_WAIT_BACKENDS;
3010 : /* PostmasterStateMachine logic does the rest */
3011 56 : continue;
3012 : }
3013 :
3014 996 : if (EXIT_STATUS_3(exitstatus))
3015 : {
3016 0 : ereport(LOG,
3017 : (errmsg("shutdown at recovery target")));
3018 0 : StartupStatus = STARTUP_NOT_RUNNING;
3019 0 : Shutdown = Max(Shutdown, SmartShutdown);
3020 0 : TerminateChildren(SIGTERM);
3021 0 : pmState = PM_WAIT_BACKENDS;
3022 : /* PostmasterStateMachine logic does the rest */
3023 0 : continue;
3024 : }
3025 :
3026 : /*
3027 : * Unexpected exit of startup process (including FATAL exit)
3028 : * during PM_STARTUP is treated as catastrophic. There are no
3029 : * other processes running yet, so we can just exit.
3030 : */
3031 996 : if (pmState == PM_STARTUP &&
3032 708 : StartupStatus != STARTUP_SIGNALED &&
3033 708 : !EXIT_STATUS_0(exitstatus))
3034 : {
3035 0 : LogChildExit(LOG, _("startup process"),
3036 : pid, exitstatus);
3037 0 : ereport(LOG,
3038 : (errmsg("aborting startup due to startup process failure")));
3039 0 : ExitPostmaster(1);
3040 : }
3041 :
3042 : /*
3043 : * After PM_STARTUP, any unexpected exit (including FATAL exit) of
3044 : * the startup process is catastrophic, so kill other children,
3045 : * and set StartupStatus so we don't try to reinitialize after
3046 : * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
3047 : * then we previously sent the startup process a SIGQUIT; so
3048 : * that's probably the reason it died, and we do want to try to
3049 : * restart in that case.
3050 : *
3051 : * This stanza also handles the case where we sent a SIGQUIT
3052 : * during PM_STARTUP due to some dead_end child crashing: in that
3053 : * situation, if the startup process dies on the SIGQUIT, we need
3054 : * to transition to PM_WAIT_BACKENDS state which will allow
3055 : * PostmasterStateMachine to restart the startup process. (On the
3056 : * other hand, the startup process might complete normally, if we
3057 : * were too late with the SIGQUIT. In that case we'll fall
3058 : * through and commence normal operations.)
3059 : */
3060 996 : if (!EXIT_STATUS_0(exitstatus))
3061 : {
3062 80 : if (StartupStatus == STARTUP_SIGNALED)
3063 : {
3064 74 : StartupStatus = STARTUP_NOT_RUNNING;
3065 74 : if (pmState == PM_STARTUP)
3066 0 : pmState = PM_WAIT_BACKENDS;
3067 : }
3068 : else
3069 6 : StartupStatus = STARTUP_CRASHED;
3070 80 : HandleChildCrash(pid, exitstatus,
3071 80 : _("startup process"));
3072 80 : continue;
3073 : }
3074 :
3075 : /*
3076 : * Startup succeeded, commence normal operations
3077 : */
3078 916 : StartupStatus = STARTUP_NOT_RUNNING;
3079 916 : FatalError = false;
3080 916 : AbortStartTime = 0;
3081 916 : ReachedNormalRunning = true;
3082 916 : pmState = PM_RUN;
3083 916 : connsAllowed = true;
3084 :
3085 : /*
3086 : * Crank up the background tasks, if we didn't do that already
3087 : * when we entered consistent recovery state. It doesn't matter
3088 : * if this fails, we'll just try again later.
3089 : */
3090 916 : if (CheckpointerPID == 0)
3091 0 : CheckpointerPID = StartCheckpointer();
3092 916 : if (BgWriterPID == 0)
3093 0 : BgWriterPID = StartBackgroundWriter();
3094 916 : if (WalWriterPID == 0)
3095 916 : WalWriterPID = StartWalWriter();
3096 :
3097 : /*
3098 : * Likewise, start other special children as needed. In a restart
3099 : * situation, some of them may be alive already.
3100 : */
3101 916 : if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
3102 868 : AutoVacPID = StartAutoVacLauncher();
3103 916 : if (PgArchStartupAllowed() && PgArchPID == 0)
3104 46 : PgArchPID = StartArchiver();
3105 :
3106 : /* workers may be scheduled to start now */
3107 916 : maybe_start_bgworkers();
3108 :
3109 : /* at this point we are really open for business */
3110 912 : ereport(LOG,
3111 : (errmsg("database system is ready to accept connections")));
3112 :
3113 : /* Report status */
3114 912 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
3115 : #ifdef USE_SYSTEMD
3116 : sd_notify(0, "READY=1");
3117 : #endif
3118 :
3119 912 : continue;
3120 : }
3121 :
3122 : /*
3123 : * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3124 : * one at the next iteration of the postmaster's main loop, if
3125 : * necessary. Any other exit condition is treated as a crash.
3126 : */
3127 23852 : if (pid == BgWriterPID)
3128 : {
3129 1046 : BgWriterPID = 0;
3130 1046 : if (!EXIT_STATUS_0(exitstatus))
3131 428 : HandleChildCrash(pid, exitstatus,
3132 428 : _("background writer process"));
3133 1046 : continue;
3134 : }
3135 :
3136 : /*
3137 : * Was it the checkpointer?
3138 : */
3139 22806 : if (pid == CheckpointerPID)
3140 : {
3141 1046 : CheckpointerPID = 0;
3142 1046 : if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3143 : {
3144 : /*
3145 : * OK, we saw normal exit of the checkpointer after it's been
3146 : * told to shut down. We expect that it wrote a shutdown
3147 : * checkpoint. (If for some reason it didn't, recovery will
3148 : * occur on next postmaster start.)
3149 : *
3150 : * At this point we should have no normal backend children
3151 : * left (else we'd not be in PM_SHUTDOWN state) but we might
3152 : * have dead_end children to wait for.
3153 : *
3154 : * If we have an archiver subprocess, tell it to do a last
3155 : * archive cycle and quit. Likewise, if we have walsender
3156 : * processes, tell them to send any remaining WAL and quit.
3157 : */
3158 : Assert(Shutdown > NoShutdown);
3159 :
3160 : /* Waken archiver for the last time */
3161 618 : if (PgArchPID != 0)
3162 14 : signal_child(PgArchPID, SIGUSR2);
3163 :
3164 : /*
3165 : * Waken walsenders for the last time. No regular backends
3166 : * should be around anymore.
3167 : */
3168 618 : SignalChildren(SIGUSR2);
3169 :
3170 618 : pmState = PM_SHUTDOWN_2;
3171 : }
3172 : else
3173 : {
3174 : /*
3175 : * Any unexpected exit of the checkpointer (including FATAL
3176 : * exit) is treated as a crash.
3177 : */
3178 428 : HandleChildCrash(pid, exitstatus,
3179 428 : _("checkpointer process"));
3180 : }
3181 :
3182 1046 : continue;
3183 : }
3184 :
3185 : /*
3186 : * Was it the wal writer? Normal exit can be ignored; we'll start a
3187 : * new one at the next iteration of the postmaster's main loop, if
3188 : * necessary. Any other exit condition is treated as a crash.
3189 : */
3190 21760 : if (pid == WalWriterPID)
3191 : {
3192 910 : WalWriterPID = 0;
3193 910 : if (!EXIT_STATUS_0(exitstatus))
3194 348 : HandleChildCrash(pid, exitstatus,
3195 348 : _("WAL writer process"));
3196 910 : continue;
3197 : }
3198 :
3199 : /*
3200 : * Was it the wal receiver? If exit status is zero (normal) or one
3201 : * (FATAL exit), we assume everything is all right just like normal
3202 : * backends. (If we need a new wal receiver, we'll start one at the
3203 : * next iteration of the postmaster's main loop.)
3204 : */
3205 20850 : if (pid == WalReceiverPID)
3206 : {
3207 298 : WalReceiverPID = 0;
3208 298 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3209 28 : HandleChildCrash(pid, exitstatus,
3210 28 : _("WAL receiver process"));
3211 298 : continue;
3212 : }
3213 :
3214 : /*
3215 : * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3216 : * start a new one at the next iteration of the postmaster's main
3217 : * loop, if necessary. Any other exit condition is treated as a
3218 : * crash.
3219 : */
3220 20552 : if (pid == AutoVacPID)
3221 : {
3222 862 : AutoVacPID = 0;
3223 862 : if (!EXIT_STATUS_0(exitstatus))
3224 330 : HandleChildCrash(pid, exitstatus,
3225 330 : _("autovacuum launcher process"));
3226 862 : continue;
3227 : }
3228 :
3229 : /*
3230 : * Was it the archiver? If exit status is zero (normal) or one (FATAL
3231 : * exit), we assume everything is all right just like normal backends
3232 : * and just try to restart a new one so that we immediately retry
3233 : * archiving remaining files. (If fail, we'll try again in future
3234 : * cycles of the postmaster's main loop.) Unless we were waiting for
3235 : * it to shut down; don't restart it in that case, and
3236 : * PostmasterStateMachine() will advance to the next shutdown step.
3237 : */
3238 19690 : if (pid == PgArchPID)
3239 : {
3240 52 : PgArchPID = 0;
3241 52 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3242 38 : HandleChildCrash(pid, exitstatus,
3243 38 : _("archiver process"));
3244 52 : if (PgArchStartupAllowed())
3245 0 : PgArchPID = StartArchiver();
3246 52 : continue;
3247 : }
3248 :
3249 : /* Was it the system logger? If so, try to start a new one */
3250 19638 : if (pid == SysLoggerPID)
3251 : {
3252 0 : SysLoggerPID = 0;
3253 : /* for safety's sake, launch new logger *first* */
3254 0 : SysLoggerPID = SysLogger_Start();
3255 0 : if (!EXIT_STATUS_0(exitstatus))
3256 0 : LogChildExit(LOG, _("system logger process"),
3257 : pid, exitstatus);
3258 0 : continue;
3259 : }
3260 :
3261 : /* Was it one of our background workers? */
3262 19638 : if (CleanupBackgroundWorker(pid, exitstatus))
3263 : {
3264 : /* have it be restarted */
3265 3990 : HaveCrashedWorker = true;
3266 3990 : continue;
3267 : }
3268 :
3269 : /*
3270 : * Else do standard backend child cleanup.
3271 : */
3272 15648 : CleanupBackend(pid, exitstatus);
3273 : } /* loop over pending child-death reports */
3274 :
3275 : /*
3276 : * After cleaning out the SIGCHLD queue, see if we have any state changes
3277 : * or actions to make.
3278 : */
3279 23452 : PostmasterStateMachine();
3280 :
3281 : /* Done with signal handler */
3282 : #ifdef WIN32
3283 : PG_SETMASK(&UnBlockSig);
3284 : #endif
3285 :
3286 22414 : errno = save_errno;
3287 22414 : }
3288 :
3289 : /*
3290 : * Scan the bgworkers list and see if the given PID (which has just stopped
3291 : * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3292 : * bgworker, return false.
3293 : *
3294 : * This is heavily based on CleanupBackend. One important difference is that
3295 : * we don't know yet that the dying process is a bgworker, so we must be silent
3296 : * until we're sure it is.
3297 : */
3298 : static bool
3299 19638 : CleanupBackgroundWorker(int pid,
3300 : int exitstatus) /* child's exit status */
3301 : {
3302 : char namebuf[MAXPGPATH];
3303 : slist_mutable_iter iter;
3304 :
3305 38580 : slist_foreach_modify(iter, &BackgroundWorkerList)
3306 : {
3307 : RegisteredBgWorker *rw;
3308 :
3309 22932 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3310 :
3311 22932 : if (rw->rw_pid != pid)
3312 18942 : continue;
3313 :
3314 : #ifdef WIN32
3315 : /* see CleanupBackend */
3316 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3317 : exitstatus = 0;
3318 : #endif
3319 :
3320 3990 : snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3321 3990 : rw->rw_worker.bgw_type);
3322 :
3323 :
3324 3990 : if (!EXIT_STATUS_0(exitstatus))
3325 : {
3326 : /* Record timestamp, so we know when to restart the worker. */
3327 1112 : rw->rw_crashed_at = GetCurrentTimestamp();
3328 : }
3329 : else
3330 : {
3331 : /* Zero exit status means terminate */
3332 2878 : rw->rw_crashed_at = 0;
3333 2878 : rw->rw_terminate = true;
3334 : }
3335 :
3336 : /*
3337 : * Additionally, just like a backend, any exit status other than 0 or
3338 : * 1 is considered a crash and causes a system-wide restart.
3339 : */
3340 3990 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3341 : {
3342 358 : HandleChildCrash(pid, exitstatus, namebuf);
3343 358 : return true;
3344 : }
3345 :
3346 : /*
3347 : * We must release the postmaster child slot. If the worker failed to
3348 : * do so, it did not clean up after itself, requiring a crash-restart
3349 : * cycle.
3350 : */
3351 3632 : if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
3352 : {
3353 0 : HandleChildCrash(pid, exitstatus, namebuf);
3354 0 : return true;
3355 : }
3356 :
3357 : /* Get it out of the BackendList and clear out remaining data */
3358 3632 : dlist_delete(&rw->rw_backend->elem);
3359 : #ifdef EXEC_BACKEND
3360 : ShmemBackendArrayRemove(rw->rw_backend);
3361 : #endif
3362 :
3363 : /*
3364 : * It's possible that this background worker started some OTHER
3365 : * background worker and asked to be notified when that worker started
3366 : * or stopped. If so, cancel any notifications destined for the
3367 : * now-dead backend.
3368 : */
3369 3632 : if (rw->rw_backend->bgworker_notify)
3370 196 : BackgroundWorkerStopNotifications(rw->rw_pid);
3371 3632 : free(rw->rw_backend);
3372 3632 : rw->rw_backend = NULL;
3373 3632 : rw->rw_pid = 0;
3374 3632 : rw->rw_child_slot = 0;
3375 3632 : ReportBackgroundWorkerExit(&iter); /* report child death */
3376 :
3377 3632 : LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3378 : namebuf, pid, exitstatus);
3379 :
3380 3632 : return true;
3381 : }
3382 :
3383 15648 : return false;
3384 : }
3385 :
3386 : /*
3387 : * CleanupBackend -- cleanup after terminated backend.
3388 : *
3389 : * Remove all local state associated with backend.
3390 : *
3391 : * If you change this, see also CleanupBackgroundWorker.
3392 : */
3393 : static void
3394 15648 : CleanupBackend(int pid,
3395 : int exitstatus) /* child's exit status. */
3396 : {
3397 : dlist_mutable_iter iter;
3398 :
3399 15648 : LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3400 :
3401 : /*
3402 : * If a backend dies in an ugly way then we must signal all other backends
3403 : * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3404 : * assume everything is all right and proceed to remove the backend from
3405 : * the active backend list.
3406 : */
3407 :
3408 : #ifdef WIN32
3409 :
3410 : /*
3411 : * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3412 : * since that sometimes happens under load when the process fails to start
3413 : * properly (long before it starts using shared memory). Microsoft reports
3414 : * it is related to mutex failure:
3415 : * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3416 : */
3417 : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3418 : {
3419 : LogChildExit(LOG, _("server process"), pid, exitstatus);
3420 : exitstatus = 0;
3421 : }
3422 : #endif
3423 :
3424 15648 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3425 : {
3426 256 : HandleChildCrash(pid, exitstatus, _("server process"));
3427 256 : return;
3428 : }
3429 :
3430 29698 : dlist_foreach_modify(iter, &BackendList)
3431 : {
3432 29698 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3433 :
3434 29698 : if (bp->pid == pid)
3435 : {
3436 15392 : if (!bp->dead_end)
3437 : {
3438 15346 : if (!ReleasePostmasterChildSlot(bp->child_slot))
3439 : {
3440 : /*
3441 : * Uh-oh, the child failed to clean itself up. Treat as a
3442 : * crash after all.
3443 : */
3444 0 : HandleChildCrash(pid, exitstatus, _("server process"));
3445 0 : return;
3446 : }
3447 : #ifdef EXEC_BACKEND
3448 : ShmemBackendArrayRemove(bp);
3449 : #endif
3450 : }
3451 15392 : if (bp->bgworker_notify)
3452 : {
3453 : /*
3454 : * This backend may have been slated to receive SIGUSR1 when
3455 : * some background worker started or stopped. Cancel those
3456 : * notifications, as we don't want to signal PIDs that are not
3457 : * PostgreSQL backends. This gets skipped in the (probably
3458 : * very common) case where the backend has never requested any
3459 : * such notifications.
3460 : */
3461 110 : BackgroundWorkerStopNotifications(bp->pid);
3462 : }
3463 15392 : dlist_delete(iter.cur);
3464 15392 : free(bp);
3465 15392 : break;
3466 : }
3467 : }
3468 : }
3469 :
3470 : /*
3471 : * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3472 : * walwriter, autovacuum, archiver or background worker.
3473 : *
3474 : * The objectives here are to clean up our local state about the child
3475 : * process, and to signal all other remaining children to quickdie.
3476 : */
3477 : static void
3478 2294 : HandleChildCrash(int pid, int exitstatus, const char *procname)
3479 : {
3480 : dlist_mutable_iter iter;
3481 : slist_iter siter;
3482 : Backend *bp;
3483 : bool take_action;
3484 :
3485 : /*
3486 : * We only log messages and send signals if this is the first process
3487 : * crash and we're not doing an immediate shutdown; otherwise, we're only
3488 : * here to update postmaster's idea of live processes. If we have already
3489 : * signaled children, nonzero exit status is to be expected, so don't
3490 : * clutter log.
3491 : */
3492 2294 : take_action = !FatalError && Shutdown != ImmediateShutdown;
3493 :
3494 2294 : if (take_action)
3495 : {
3496 14 : LogChildExit(LOG, procname, pid, exitstatus);
3497 14 : ereport(LOG,
3498 : (errmsg("terminating any other active server processes")));
3499 14 : SetQuitSignalReason(PMQUIT_FOR_CRASH);
3500 : }
3501 :
3502 : /* Process background workers. */
3503 4644 : slist_foreach(siter, &BackgroundWorkerList)
3504 : {
3505 : RegisteredBgWorker *rw;
3506 :
3507 2350 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3508 2350 : if (rw->rw_pid == 0)
3509 998 : continue; /* not running */
3510 1352 : if (rw->rw_pid == pid)
3511 : {
3512 : /*
3513 : * Found entry for freshly-dead worker, so remove it.
3514 : */
3515 358 : (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3516 358 : dlist_delete(&rw->rw_backend->elem);
3517 : #ifdef EXEC_BACKEND
3518 : ShmemBackendArrayRemove(rw->rw_backend);
3519 : #endif
3520 358 : free(rw->rw_backend);
3521 358 : rw->rw_backend = NULL;
3522 358 : rw->rw_pid = 0;
3523 358 : rw->rw_child_slot = 0;
3524 : /* don't reset crashed_at */
3525 : /* don't report child stop, either */
3526 : /* Keep looping so we can signal remaining workers */
3527 : }
3528 : else
3529 : {
3530 : /*
3531 : * This worker is still alive. Unless we did so already, tell it
3532 : * to commit hara-kiri.
3533 : *
3534 : * SIGQUIT is the special signal that says exit without proc_exit
3535 : * and let the user know what's going on. But if SendStop is set
3536 : * (-T on command line), then we send SIGSTOP instead, so that we
3537 : * can get core dumps from all backends by hand.
3538 : */
3539 994 : if (take_action)
3540 : {
3541 8 : ereport(DEBUG2,
3542 : (errmsg_internal("sending %s to process %d",
3543 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3544 : (int) rw->rw_pid)));
3545 8 : signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT));
3546 : }
3547 : }
3548 : }
3549 :
3550 : /* Process regular backends */
3551 4634 : dlist_foreach_modify(iter, &BackendList)
3552 : {
3553 2340 : bp = dlist_container(Backend, elem, iter.cur);
3554 :
3555 2340 : if (bp->pid == pid)
3556 : {
3557 : /*
3558 : * Found entry for freshly-dead backend, so remove it.
3559 : */
3560 256 : if (!bp->dead_end)
3561 : {
3562 256 : (void) ReleasePostmasterChildSlot(bp->child_slot);
3563 : #ifdef EXEC_BACKEND
3564 : ShmemBackendArrayRemove(bp);
3565 : #endif
3566 : }
3567 256 : dlist_delete(iter.cur);
3568 256 : free(bp);
3569 : /* Keep looping so we can signal remaining backends */
3570 : }
3571 : else
3572 : {
3573 : /*
3574 : * This backend is still alive. Unless we did so already, tell it
3575 : * to commit hara-kiri.
3576 : *
3577 : * SIGQUIT is the special signal that says exit without proc_exit
3578 : * and let the user know what's going on. But if SendStop is set
3579 : * (-T on command line), then we send SIGSTOP instead, so that we
3580 : * can get core dumps from all backends by hand.
3581 : *
3582 : * We could exclude dead_end children here, but at least in the
3583 : * SIGSTOP case it seems better to include them.
3584 : *
3585 : * Background workers were already processed above; ignore them
3586 : * here.
3587 : */
3588 2084 : if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3589 994 : continue;
3590 :
3591 1090 : if (take_action)
3592 : {
3593 8 : ereport(DEBUG2,
3594 : (errmsg_internal("sending %s to process %d",
3595 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3596 : (int) bp->pid)));
3597 8 : signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3598 : }
3599 : }
3600 : }
3601 :
3602 : /* Take care of the startup process too */
3603 2294 : if (pid == StartupPID)
3604 : {
3605 0 : StartupPID = 0;
3606 : /* Caller adjusts StartupStatus, so don't touch it here */
3607 : }
3608 2294 : else if (StartupPID != 0 && take_action)
3609 : {
3610 0 : ereport(DEBUG2,
3611 : (errmsg_internal("sending %s to process %d",
3612 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3613 : (int) StartupPID)));
3614 0 : signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3615 0 : StartupStatus = STARTUP_SIGNALED;
3616 : }
3617 :
3618 : /* Take care of the bgwriter too */
3619 2294 : if (pid == BgWriterPID)
3620 0 : BgWriterPID = 0;
3621 2294 : else if (BgWriterPID != 0 && take_action)
3622 : {
3623 14 : ereport(DEBUG2,
3624 : (errmsg_internal("sending %s to process %d",
3625 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3626 : (int) BgWriterPID)));
3627 14 : signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3628 : }
3629 :
3630 : /* Take care of the checkpointer too */
3631 2294 : if (pid == CheckpointerPID)
3632 0 : CheckpointerPID = 0;
3633 2294 : else if (CheckpointerPID != 0 && take_action)
3634 : {
3635 14 : ereport(DEBUG2,
3636 : (errmsg_internal("sending %s to process %d",
3637 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3638 : (int) CheckpointerPID)));
3639 14 : signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3640 : }
3641 :
3642 : /* Take care of the walwriter too */
3643 2294 : if (pid == WalWriterPID)
3644 0 : WalWriterPID = 0;
3645 2294 : else if (WalWriterPID != 0 && take_action)
3646 : {
3647 8 : ereport(DEBUG2,
3648 : (errmsg_internal("sending %s to process %d",
3649 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3650 : (int) WalWriterPID)));
3651 8 : signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3652 : }
3653 :
3654 : /* Take care of the walreceiver too */
3655 2294 : if (pid == WalReceiverPID)
3656 0 : WalReceiverPID = 0;
3657 2294 : else if (WalReceiverPID != 0 && take_action)
3658 : {
3659 0 : ereport(DEBUG2,
3660 : (errmsg_internal("sending %s to process %d",
3661 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3662 : (int) WalReceiverPID)));
3663 0 : signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3664 : }
3665 :
3666 : /* Take care of the autovacuum launcher too */
3667 2294 : if (pid == AutoVacPID)
3668 0 : AutoVacPID = 0;
3669 2294 : else if (AutoVacPID != 0 && take_action)
3670 : {
3671 8 : ereport(DEBUG2,
3672 : (errmsg_internal("sending %s to process %d",
3673 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3674 : (int) AutoVacPID)));
3675 8 : signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3676 : }
3677 :
3678 : /* Take care of the archiver too */
3679 2294 : if (pid == PgArchPID)
3680 0 : PgArchPID = 0;
3681 2294 : else if (PgArchPID != 0 && take_action)
3682 : {
3683 0 : ereport(DEBUG2,
3684 : (errmsg_internal("sending %s to process %d",
3685 : (SendStop ? "SIGSTOP" : "SIGQUIT"),
3686 : (int) PgArchPID)));
3687 0 : signal_child(PgArchPID, (SendStop ? SIGSTOP : SIGQUIT));
3688 : }
3689 :
3690 : /* We do NOT restart the syslogger */
3691 :
3692 2294 : if (Shutdown != ImmediateShutdown)
3693 74 : FatalError = true;
3694 :
3695 : /* We now transit into a state of waiting for children to die */
3696 2294 : if (pmState == PM_RECOVERY ||
3697 2290 : pmState == PM_HOT_STANDBY ||
3698 2288 : pmState == PM_RUN ||
3699 2280 : pmState == PM_STOP_BACKENDS ||
3700 2280 : pmState == PM_SHUTDOWN)
3701 14 : pmState = PM_WAIT_BACKENDS;
3702 :
3703 : /*
3704 : * .. and if this doesn't happen quickly enough, now the clock is ticking
3705 : * for us to kill them without mercy.
3706 : */
3707 2294 : if (AbortStartTime == 0)
3708 14 : AbortStartTime = time(NULL);
3709 2294 : }
3710 :
3711 : /*
3712 : * Log the death of a child process.
3713 : */
3714 : static void
3715 19294 : LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3716 : {
3717 : /*
3718 : * size of activity_buffer is arbitrary, but set equal to default
3719 : * track_activity_query_size
3720 : */
3721 : char activity_buffer[1024];
3722 19294 : const char *activity = NULL;
3723 :
3724 19294 : if (!EXIT_STATUS_0(exitstatus))
3725 1210 : activity = pgstat_get_crashed_backend_activity(pid,
3726 : activity_buffer,
3727 : sizeof(activity_buffer));
3728 :
3729 19294 : if (WIFEXITED(exitstatus))
3730 19282 : ereport(lev,
3731 :
3732 : /*------
3733 : translator: %s is a noun phrase describing a child process, such as
3734 : "server process" */
3735 : (errmsg("%s (PID %d) exited with exit code %d",
3736 : procname, pid, WEXITSTATUS(exitstatus)),
3737 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3738 12 : else if (WIFSIGNALED(exitstatus))
3739 : {
3740 : #if defined(WIN32)
3741 : ereport(lev,
3742 :
3743 : /*------
3744 : translator: %s is a noun phrase describing a child process, such as
3745 : "server process" */
3746 : (errmsg("%s (PID %d) was terminated by exception 0x%X",
3747 : procname, pid, WTERMSIG(exitstatus)),
3748 : errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3749 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3750 : #else
3751 12 : ereport(lev,
3752 :
3753 : /*------
3754 : translator: %s is a noun phrase describing a child process, such as
3755 : "server process" */
3756 : (errmsg("%s (PID %d) was terminated by signal %d: %s",
3757 : procname, pid, WTERMSIG(exitstatus),
3758 : pg_strsignal(WTERMSIG(exitstatus))),
3759 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3760 : #endif
3761 : }
3762 : else
3763 0 : ereport(lev,
3764 :
3765 : /*------
3766 : translator: %s is a noun phrase describing a child process, such as
3767 : "server process" */
3768 : (errmsg("%s (PID %d) exited with unrecognized status %d",
3769 : procname, pid, exitstatus),
3770 : activity ? errdetail("Failed process was running: %s", activity) : 0));
3771 19294 : }
3772 :
3773 : /*
3774 : * Advance the postmaster's state machine and take actions as appropriate
3775 : *
3776 : * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3777 : * receive the signals that might mean we need to change state.
3778 : */
3779 : static void
3780 25856 : PostmasterStateMachine(void)
3781 : {
3782 : /* If we're doing a smart shutdown, try to advance that state. */
3783 25856 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3784 : {
3785 19876 : if (!connsAllowed)
3786 : {
3787 : /*
3788 : * This state ends when we have no normal client backends running.
3789 : * Then we're ready to stop other children.
3790 : */
3791 16 : if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3792 8 : pmState = PM_STOP_BACKENDS;
3793 : }
3794 : }
3795 :
3796 : /*
3797 : * If we're ready to do so, signal child processes to shut down. (This
3798 : * isn't a persistent state, but treating it as a distinct pmState allows
3799 : * us to share this code across multiple shutdown code paths.)
3800 : */
3801 25856 : if (pmState == PM_STOP_BACKENDS)
3802 : {
3803 : /*
3804 : * Forget any pending requests for background workers, since we're no
3805 : * longer willing to launch any new workers. (If additional requests
3806 : * arrive, BackgroundWorkerStateChange will reject them.)
3807 : */
3808 618 : ForgetUnstartedBackgroundWorkers();
3809 :
3810 : /* Signal all backend children except walsenders */
3811 618 : SignalSomeChildren(SIGTERM,
3812 : BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
3813 : /* and the autovac launcher too */
3814 618 : if (AutoVacPID != 0)
3815 532 : signal_child(AutoVacPID, SIGTERM);
3816 : /* and the bgwriter too */
3817 618 : if (BgWriterPID != 0)
3818 618 : signal_child(BgWriterPID, SIGTERM);
3819 : /* and the walwriter too */
3820 618 : if (WalWriterPID != 0)
3821 562 : signal_child(WalWriterPID, SIGTERM);
3822 : /* If we're in recovery, also stop startup and walreceiver procs */
3823 618 : if (StartupPID != 0)
3824 56 : signal_child(StartupPID, SIGTERM);
3825 618 : if (WalReceiverPID != 0)
3826 40 : signal_child(WalReceiverPID, SIGTERM);
3827 : /* checkpointer, archiver, stats, and syslogger may continue for now */
3828 :
3829 : /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3830 618 : pmState = PM_WAIT_BACKENDS;
3831 : }
3832 :
3833 : /*
3834 : * If we are in a state-machine state that implies waiting for backends to
3835 : * exit, see if they're all gone, and change state if so.
3836 : */
3837 25856 : if (pmState == PM_WAIT_BACKENDS)
3838 : {
3839 : /*
3840 : * PM_WAIT_BACKENDS state ends when we have no regular backends
3841 : * (including autovac workers), no bgworkers (including unconnected
3842 : * ones), and no walwriter, autovac launcher or bgwriter. If we are
3843 : * doing crash recovery or an immediate shutdown then we expect the
3844 : * checkpointer to exit as well, otherwise not. The stats and
3845 : * syslogger processes are disregarded since they are not connected to
3846 : * shared memory; we also disregard dead_end children here. Walsenders
3847 : * and archiver are also disregarded, they will be terminated later
3848 : * after writing the checkpoint record.
3849 : */
3850 5246 : if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
3851 2490 : StartupPID == 0 &&
3852 2304 : WalReceiverPID == 0 &&
3853 2262 : BgWriterPID == 0 &&
3854 1644 : (CheckpointerPID == 0 ||
3855 1080 : (!FatalError && Shutdown < ImmediateShutdown)) &&
3856 1532 : WalWriterPID == 0 &&
3857 1204 : AutoVacPID == 0)
3858 : {
3859 1046 : if (Shutdown >= ImmediateShutdown || FatalError)
3860 : {
3861 : /*
3862 : * Start waiting for dead_end children to die. This state
3863 : * change causes ServerLoop to stop creating new ones.
3864 : */
3865 428 : pmState = PM_WAIT_DEAD_END;
3866 :
3867 : /*
3868 : * We already SIGQUIT'd the archiver and stats processes, if
3869 : * any, when we started immediate shutdown or entered
3870 : * FatalError state.
3871 : */
3872 : }
3873 : else
3874 : {
3875 : /*
3876 : * If we get here, we are proceeding with normal shutdown. All
3877 : * the regular children are gone, and it's time to tell the
3878 : * checkpointer to do a shutdown checkpoint.
3879 : */
3880 : Assert(Shutdown > NoShutdown);
3881 : /* Start the checkpointer if not running */
3882 618 : if (CheckpointerPID == 0)
3883 0 : CheckpointerPID = StartCheckpointer();
3884 : /* And tell it to shut down */
3885 618 : if (CheckpointerPID != 0)
3886 : {
3887 618 : signal_child(CheckpointerPID, SIGUSR2);
3888 618 : pmState = PM_SHUTDOWN;
3889 : }
3890 : else
3891 : {
3892 : /*
3893 : * If we failed to fork a checkpointer, just shut down.
3894 : * Any required cleanup will happen at next restart. We
3895 : * set FatalError so that an "abnormal shutdown" message
3896 : * gets logged when we exit.
3897 : */
3898 0 : FatalError = true;
3899 0 : pmState = PM_WAIT_DEAD_END;
3900 :
3901 : /* Kill the walsenders and archiver too */
3902 0 : SignalChildren(SIGQUIT);
3903 0 : if (PgArchPID != 0)
3904 0 : signal_child(PgArchPID, SIGQUIT);
3905 : }
3906 : }
3907 : }
3908 : }
3909 :
3910 25856 : if (pmState == PM_SHUTDOWN_2)
3911 : {
3912 : /*
3913 : * PM_SHUTDOWN_2 state ends when there's no other children than
3914 : * dead_end children left. There shouldn't be any regular backends
3915 : * left by now anyway; what we're really waiting for is walsenders and
3916 : * archiver.
3917 : */
3918 670 : if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3919 : {
3920 618 : pmState = PM_WAIT_DEAD_END;
3921 : }
3922 : }
3923 :
3924 25856 : if (pmState == PM_WAIT_DEAD_END)
3925 : {
3926 : /*
3927 : * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3928 : * (ie, no dead_end children remain), and the archiver is gone too.
3929 : *
3930 : * The reason we wait for those two is to protect them against a new
3931 : * postmaster starting conflicting subprocesses; this isn't an
3932 : * ironclad protection, but it at least helps in the
3933 : * shutdown-and-immediately-restart scenario. Note that they have
3934 : * already been sent appropriate shutdown signals, either during a
3935 : * normal state transition leading up to PM_WAIT_DEAD_END, or during
3936 : * FatalError processing.
3937 : */
3938 1060 : if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3939 : {
3940 : /* These other guys should be dead already */
3941 : Assert(StartupPID == 0);
3942 : Assert(WalReceiverPID == 0);
3943 : Assert(BgWriterPID == 0);
3944 : Assert(CheckpointerPID == 0);
3945 : Assert(WalWriterPID == 0);
3946 : Assert(AutoVacPID == 0);
3947 : /* syslogger is not considered here */
3948 1046 : pmState = PM_NO_CHILDREN;
3949 : }
3950 : }
3951 :
3952 : /*
3953 : * If we've been told to shut down, we exit as soon as there are no
3954 : * remaining children. If there was a crash, cleanup will occur at the
3955 : * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3956 : * crash before exiting, but that seems unwise if we are quitting because
3957 : * we got SIGTERM from init --- there may well not be time for recovery
3958 : * before init decides to SIGKILL us.)
3959 : *
3960 : * Note that the syslogger continues to run. It will exit when it sees
3961 : * EOF on its input pipe, which happens when there are no more upstream
3962 : * processes.
3963 : */
3964 25856 : if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3965 : {
3966 1032 : if (FatalError)
3967 : {
3968 0 : ereport(LOG, (errmsg("abnormal database system shutdown")));
3969 0 : ExitPostmaster(1);
3970 : }
3971 : else
3972 : {
3973 : /*
3974 : * Normal exit from the postmaster is here. We don't need to log
3975 : * anything here, since the UnlinkLockFiles proc_exit callback
3976 : * will do so, and that should be the last user-visible action.
3977 : */
3978 1032 : ExitPostmaster(0);
3979 : }
3980 : }
3981 :
3982 : /*
3983 : * If the startup process failed, or the user does not want an automatic
3984 : * restart after backend crashes, wait for all non-syslogger children to
3985 : * exit, and then exit postmaster. We don't try to reinitialize when the
3986 : * startup process fails, because more than likely it will just fail again
3987 : * and we will keep trying forever.
3988 : */
3989 24824 : if (pmState == PM_NO_CHILDREN)
3990 : {
3991 14 : if (StartupStatus == STARTUP_CRASHED)
3992 : {
3993 6 : ereport(LOG,
3994 : (errmsg("shutting down due to startup process failure")));
3995 6 : ExitPostmaster(1);
3996 : }
3997 8 : if (!restart_after_crash)
3998 : {
3999 0 : ereport(LOG,
4000 : (errmsg("shutting down because restart_after_crash is off")));
4001 0 : ExitPostmaster(1);
4002 : }
4003 : }
4004 :
4005 : /*
4006 : * If we need to recover from a crash, wait for all non-syslogger children
4007 : * to exit, then reset shmem and StartupDataBase.
4008 : */
4009 24818 : if (FatalError && pmState == PM_NO_CHILDREN)
4010 : {
4011 8 : ereport(LOG,
4012 : (errmsg("all server processes terminated; reinitializing")));
4013 :
4014 : /* remove leftover temporary files after a crash */
4015 8 : if (remove_temp_files_after_crash)
4016 6 : RemovePgTempFiles();
4017 :
4018 : /* allow background workers to immediately restart */
4019 8 : ResetBackgroundWorkerCrashTimes();
4020 :
4021 8 : shmem_exit(1);
4022 :
4023 : /* re-read control file into local memory */
4024 8 : LocalProcessControlFile(true);
4025 :
4026 8 : reset_shared();
4027 :
4028 8 : StartupPID = StartupDataBase();
4029 : Assert(StartupPID != 0);
4030 8 : StartupStatus = STARTUP_RUNNING;
4031 8 : pmState = PM_STARTUP;
4032 : /* crash recovery started, reset SIGKILL flag */
4033 8 : AbortStartTime = 0;
4034 : }
4035 24818 : }
4036 :
4037 :
4038 : /*
4039 : * Send a signal to a postmaster child process
4040 : *
4041 : * On systems that have setsid(), each child process sets itself up as a
4042 : * process group leader. For signals that are generally interpreted in the
4043 : * appropriate fashion, we signal the entire process group not just the
4044 : * direct child process. This allows us to, for example, SIGQUIT a blocked
4045 : * archive_recovery script, or SIGINT a script being run by a backend via
4046 : * system().
4047 : *
4048 : * There is a race condition for recently-forked children: they might not
4049 : * have executed setsid() yet. So we signal the child directly as well as
4050 : * the group. We assume such a child will handle the signal before trying
4051 : * to spawn any grandchild processes. We also assume that signaling the
4052 : * child twice will not cause any problems.
4053 : */
4054 : static void
4055 6486 : signal_child(pid_t pid, int signal)
4056 : {
4057 6486 : if (kill(pid, signal) < 0)
4058 0 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
4059 : #ifdef HAVE_SETSID
4060 6486 : switch (signal)
4061 : {
4062 5014 : case SIGINT:
4063 : case SIGTERM:
4064 : case SIGQUIT:
4065 : case SIGSTOP:
4066 : case SIGKILL:
4067 5014 : if (kill(-pid, signal) < 0)
4068 8 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
4069 5014 : break;
4070 1472 : default:
4071 1472 : break;
4072 : }
4073 : #endif
4074 6486 : }
4075 :
4076 : /*
4077 : * Send a signal to the targeted children (but NOT special children;
4078 : * dead_end children are never signaled, either).
4079 : */
4080 : static bool
4081 1762 : SignalSomeChildren(int signal, int target)
4082 : {
4083 : dlist_iter iter;
4084 1762 : bool signaled = false;
4085 :
4086 3642 : dlist_foreach(iter, &BackendList)
4087 : {
4088 1880 : Backend *bp = dlist_container(Backend, elem, iter.cur);
4089 :
4090 1880 : if (bp->dead_end)
4091 2 : continue;
4092 :
4093 : /*
4094 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
4095 : * it first and avoid touching shared memory for every child.
4096 : */
4097 1878 : if (target != BACKEND_TYPE_ALL)
4098 : {
4099 : /*
4100 : * Assign bkend_type for any recently announced WAL Sender
4101 : * processes.
4102 : */
4103 1332 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4104 366 : IsPostmasterChildWalSender(bp->child_slot))
4105 44 : bp->bkend_type = BACKEND_TYPE_WALSND;
4106 :
4107 966 : if (!(target & bp->bkend_type))
4108 44 : continue;
4109 : }
4110 :
4111 1834 : ereport(DEBUG4,
4112 : (errmsg_internal("sending signal %d to process %d",
4113 : signal, (int) bp->pid)));
4114 1834 : signal_child(bp->pid, signal);
4115 1834 : signaled = true;
4116 : }
4117 1762 : return signaled;
4118 : }
4119 :
4120 : /*
4121 : * Send a termination signal to children. This considers all of our children
4122 : * processes, except syslogger and dead_end backends.
4123 : */
4124 : static void
4125 414 : TerminateChildren(int signal)
4126 : {
4127 414 : SignalChildren(signal);
4128 414 : if (StartupPID != 0)
4129 : {
4130 74 : signal_child(StartupPID, signal);
4131 74 : if (signal == SIGQUIT || signal == SIGKILL)
4132 74 : StartupStatus = STARTUP_SIGNALED;
4133 : }
4134 414 : if (BgWriterPID != 0)
4135 414 : signal_child(BgWriterPID, signal);
4136 414 : if (CheckpointerPID != 0)
4137 414 : signal_child(CheckpointerPID, signal);
4138 414 : if (WalWriterPID != 0)
4139 340 : signal_child(WalWriterPID, signal);
4140 414 : if (WalReceiverPID != 0)
4141 28 : signal_child(WalReceiverPID, signal);
4142 414 : if (AutoVacPID != 0)
4143 322 : signal_child(AutoVacPID, signal);
4144 414 : if (PgArchPID != 0)
4145 38 : signal_child(PgArchPID, signal);
4146 414 : }
4147 :
4148 : /*
4149 : * BackendStartup -- start backend process
4150 : *
4151 : * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4152 : *
4153 : * Note: if you change this code, also consider StartAutovacuumWorker.
4154 : */
4155 : static int
4156 15642 : BackendStartup(Port *port)
4157 : {
4158 : Backend *bn; /* for backend cleanup */
4159 : pid_t pid;
4160 :
4161 : /*
4162 : * Create backend data structure. Better before the fork() so we can
4163 : * handle failure cleanly.
4164 : */
4165 15642 : bn = (Backend *) malloc(sizeof(Backend));
4166 15642 : if (!bn)
4167 : {
4168 0 : ereport(LOG,
4169 : (errcode(ERRCODE_OUT_OF_MEMORY),
4170 : errmsg("out of memory")));
4171 0 : return STATUS_ERROR;
4172 : }
4173 :
4174 : /*
4175 : * Compute the cancel key that will be assigned to this backend. The
4176 : * backend will have its own copy in the forked-off process' value of
4177 : * MyCancelKey, so that it can transmit the key to the frontend.
4178 : */
4179 15642 : if (!RandomCancelKey(&MyCancelKey))
4180 : {
4181 0 : free(bn);
4182 0 : ereport(LOG,
4183 : (errcode(ERRCODE_INTERNAL_ERROR),
4184 : errmsg("could not generate random cancel key")));
4185 0 : return STATUS_ERROR;
4186 : }
4187 :
4188 15642 : bn->cancel_key = MyCancelKey;
4189 :
4190 : /* Pass down canAcceptConnections state */
4191 15642 : port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4192 15642 : bn->dead_end = (port->canAcceptConnections != CAC_OK);
4193 :
4194 : /*
4195 : * Unless it's a dead_end child, assign it a child slot number
4196 : */
4197 15642 : if (!bn->dead_end)
4198 15596 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4199 : else
4200 46 : bn->child_slot = 0;
4201 :
4202 : /* Hasn't asked to be notified about any bgworkers yet */
4203 15642 : bn->bgworker_notify = false;
4204 :
4205 : #ifdef EXEC_BACKEND
4206 : pid = backend_forkexec(port);
4207 : #else /* !EXEC_BACKEND */
4208 15642 : pid = fork_process();
4209 31056 : if (pid == 0) /* child */
4210 : {
4211 15416 : free(bn);
4212 :
4213 : /* Detangle from postmaster */
4214 15416 : InitPostmasterChild();
4215 :
4216 : /* Close the postmaster's sockets */
4217 15416 : ClosePostmasterPorts(false);
4218 :
4219 : /* Perform additional initialization and collect startup packet */
4220 15416 : BackendInitialize(port);
4221 :
4222 : /*
4223 : * Create a per-backend PGPROC struct in shared memory. We must do
4224 : * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4225 : * this could be delayed a bit further, but EXEC_BACKEND needs to do
4226 : * stuff with LWLocks before PostgresMain(), so we do it here as well
4227 : * for symmetry.
4228 : */
4229 15312 : InitProcess();
4230 :
4231 : /* And run the backend */
4232 15308 : BackendRun(port);
4233 : }
4234 : #endif /* EXEC_BACKEND */
4235 :
4236 15640 : if (pid < 0)
4237 : {
4238 : /* in parent, fork failed */
4239 0 : int save_errno = errno;
4240 :
4241 0 : if (!bn->dead_end)
4242 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
4243 0 : free(bn);
4244 0 : errno = save_errno;
4245 0 : ereport(LOG,
4246 : (errmsg("could not fork new process for connection: %m")));
4247 0 : report_fork_failure_to_client(port, save_errno);
4248 0 : return STATUS_ERROR;
4249 : }
4250 :
4251 : /* in parent, successful fork */
4252 15640 : ereport(DEBUG2,
4253 : (errmsg_internal("forked new backend, pid=%d socket=%d",
4254 : (int) pid, (int) port->sock)));
4255 :
4256 : /*
4257 : * Everything's been successful, it's safe to add this backend to our list
4258 : * of backends.
4259 : */
4260 15640 : bn->pid = pid;
4261 15640 : bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4262 15640 : dlist_push_head(&BackendList, &bn->elem);
4263 :
4264 : #ifdef EXEC_BACKEND
4265 : if (!bn->dead_end)
4266 : ShmemBackendArrayAdd(bn);
4267 : #endif
4268 :
4269 15640 : return STATUS_OK;
4270 : }
4271 :
4272 : /*
4273 : * Try to report backend fork() failure to client before we close the
4274 : * connection. Since we do not care to risk blocking the postmaster on
4275 : * this connection, we set the connection to non-blocking and try only once.
4276 : *
4277 : * This is grungy special-purpose code; we cannot use backend libpq since
4278 : * it's not up and running.
4279 : */
4280 : static void
4281 0 : report_fork_failure_to_client(Port *port, int errnum)
4282 : {
4283 : char buffer[1000];
4284 : int rc;
4285 :
4286 : /* Format the error message packet (always V2 protocol) */
4287 0 : snprintf(buffer, sizeof(buffer), "E%s%s\n",
4288 : _("could not fork new process for connection: "),
4289 : strerror(errnum));
4290 :
4291 : /* Set port to non-blocking. Don't do send() if this fails */
4292 0 : if (!pg_set_noblock(port->sock))
4293 0 : return;
4294 :
4295 : /* We'll retry after EINTR, but ignore all other failures */
4296 : do
4297 : {
4298 0 : rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4299 0 : } while (rc < 0 && errno == EINTR);
4300 : }
4301 :
4302 :
4303 : /*
4304 : * BackendInitialize -- initialize an interactive (postmaster-child)
4305 : * backend process, and collect the client's startup packet.
4306 : *
4307 : * returns: nothing. Will not return at all if there's any failure.
4308 : *
4309 : * Note: this code does not depend on having any access to shared memory.
4310 : * Indeed, our approach to SIGTERM/timeout handling *requires* that
4311 : * shared memory not have been touched yet; see comments within.
4312 : * In the EXEC_BACKEND case, we are physically attached to shared memory
4313 : * but have not yet set up most of our local pointers to shmem structures.
4314 : */
4315 : static void
4316 15416 : BackendInitialize(Port *port)
4317 : {
4318 : int status;
4319 : int ret;
4320 : char remote_host[NI_MAXHOST];
4321 : char remote_port[NI_MAXSERV];
4322 : StringInfoData ps_data;
4323 :
4324 : /* Save port etc. for ps status */
4325 15416 : MyProcPort = port;
4326 :
4327 : /* Tell fd.c about the long-lived FD associated with the port */
4328 15416 : ReserveExternalFD();
4329 :
4330 : /*
4331 : * PreAuthDelay is a debugging aid for investigating problems in the
4332 : * authentication cycle: it can be set in postgresql.conf to allow time to
4333 : * attach to the newly-forked backend with a debugger. (See also
4334 : * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4335 : * is not honored until after authentication.)
4336 : */
4337 15416 : if (PreAuthDelay > 0)
4338 0 : pg_usleep(PreAuthDelay * 1000000L);
4339 :
4340 : /* This flag will remain set until InitPostgres finishes authentication */
4341 15416 : ClientAuthInProgress = true; /* limit visibility of log messages */
4342 :
4343 : /* set these to empty in case they are needed before we set them up */
4344 15416 : port->remote_host = "";
4345 15416 : port->remote_port = "";
4346 :
4347 : /*
4348 : * Initialize libpq and enable reporting of ereport errors to the client.
4349 : * Must do this now because authentication uses libpq to send messages.
4350 : */
4351 15416 : pq_init(); /* initialize libpq to talk to client */
4352 15416 : whereToSendOutput = DestRemote; /* now safe to ereport to client */
4353 :
4354 : /*
4355 : * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4356 : * to collect the startup packet; while SIGQUIT results in _exit(2).
4357 : * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4358 : * cleanly if a buggy client fails to send the packet promptly.
4359 : *
4360 : * Exiting with _exit(1) is only possible because we have not yet touched
4361 : * shared memory; therefore no outside-the-process state needs to get
4362 : * cleaned up.
4363 : */
4364 15416 : pqsignal(SIGTERM, process_startup_packet_die);
4365 : /* SIGQUIT handler was already set up by InitPostmasterChild */
4366 15416 : InitializeTimeouts(); /* establishes SIGALRM handler */
4367 15416 : PG_SETMASK(&StartupBlockSig);
4368 :
4369 : /*
4370 : * Get the remote host name and port for logging and status display.
4371 : */
4372 15416 : remote_host[0] = '\0';
4373 15416 : remote_port[0] = '\0';
4374 15416 : if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4375 : remote_host, sizeof(remote_host),
4376 : remote_port, sizeof(remote_port),
4377 : (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4378 0 : ereport(WARNING,
4379 : (errmsg_internal("pg_getnameinfo_all() failed: %s",
4380 : gai_strerror(ret))));
4381 :
4382 : /*
4383 : * Save remote_host and remote_port in port structure (after this, they
4384 : * will appear in log_line_prefix data for log messages).
4385 : */
4386 15416 : port->remote_host = strdup(remote_host);
4387 15416 : port->remote_port = strdup(remote_port);
4388 :
4389 : /* And now we can issue the Log_connections message, if wanted */
4390 15416 : if (Log_connections)
4391 : {
4392 736 : if (remote_port[0])
4393 192 : ereport(LOG,
4394 : (errmsg("connection received: host=%s port=%s",
4395 : remote_host,
4396 : remote_port)));
4397 : else
4398 544 : ereport(LOG,
4399 : (errmsg("connection received: host=%s",
4400 : remote_host)));
4401 : }
4402 :
4403 : /*
4404 : * If we did a reverse lookup to name, we might as well save the results
4405 : * rather than possibly repeating the lookup during authentication.
4406 : *
4407 : * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4408 : * get nothing useful for a client without an rDNS entry. Therefore, we
4409 : * must check whether we got a numeric IPv4 or IPv6 address, and not save
4410 : * it into remote_hostname if so. (This test is conservative and might
4411 : * sometimes classify a hostname as numeric, but an error in that
4412 : * direction is safe; it only results in a possible extra lookup.)
4413 : */
4414 15416 : if (log_hostname &&
4415 192 : ret == 0 &&
4416 192 : strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4417 192 : strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4418 192 : port->remote_hostname = strdup(remote_host);
4419 :
4420 : /*
4421 : * Ready to begin client interaction. We will give up and _exit(1) after
4422 : * a time delay, so that a broken client can't hog a connection
4423 : * indefinitely. PreAuthDelay and any DNS interactions above don't count
4424 : * against the time limit.
4425 : *
4426 : * Note: AuthenticationTimeout is applied here while waiting for the
4427 : * startup packet, and then again in InitPostgres for the duration of any
4428 : * authentication operations. So a hostile client could tie up the
4429 : * process for nearly twice AuthenticationTimeout before we kick him off.
4430 : *
4431 : * Note: because PostgresMain will call InitializeTimeouts again, the
4432 : * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4433 : * since we never use it again after this function.
4434 : */
4435 15416 : RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler);
4436 15416 : enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000);
4437 :
4438 : /*
4439 : * Receive the startup packet (which might turn out to be a cancel request
4440 : * packet).
4441 : */
4442 15416 : status = ProcessStartupPacket(port, false, false);
4443 :
4444 : /*
4445 : * Disable the timeout, and prevent SIGTERM again.
4446 : */
4447 15370 : disable_timeout(STARTUP_PACKET_TIMEOUT, false);
4448 15370 : PG_SETMASK(&BlockSig);
4449 :
4450 : /*
4451 : * As a safety check that nothing in startup has yet performed
4452 : * shared-memory modifications that would need to be undone if we had
4453 : * exited through SIGTERM or timeout above, check that no on_shmem_exit
4454 : * handlers have been registered yet. (This isn't terribly bulletproof,
4455 : * since someone might misuse an on_proc_exit handler for shmem cleanup,
4456 : * but it's a cheap and helpful check. We cannot disallow on_proc_exit
4457 : * handlers unfortunately, since pq_init() already registered one.)
4458 : */
4459 15370 : check_on_shmem_exit_lists_are_empty();
4460 :
4461 : /*
4462 : * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4463 : * already did any appropriate error reporting.
4464 : */
4465 15370 : if (status != STATUS_OK)
4466 58 : proc_exit(0);
4467 :
4468 : /*
4469 : * Now that we have the user and database name, we can set the process
4470 : * title for ps. It's good to do this as early as possible in startup.
4471 : */
4472 15312 : initStringInfo(&ps_data);
4473 15312 : if (am_walsender)
4474 1306 : appendStringInfo(&ps_data, "%s ", GetBackendTypeDesc(B_WAL_SENDER));
4475 15312 : appendStringInfo(&ps_data, "%s ", port->user_name);
4476 15312 : if (!am_walsender)
4477 14006 : appendStringInfo(&ps_data, "%s ", port->database_name);
4478 15312 : appendStringInfo(&ps_data, "%s", port->remote_host);
4479 15312 : if (port->remote_port[0] != '\0')
4480 420 : appendStringInfo(&ps_data, "(%s)", port->remote_port);
4481 :
4482 15312 : init_ps_display(ps_data.data);
4483 15312 : pfree(ps_data.data);
4484 :
4485 15312 : set_ps_display("initializing");
4486 15312 : }
4487 :
4488 :
4489 : /*
4490 : * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4491 : *
4492 : * returns:
4493 : * Doesn't return at all.
4494 : */
4495 : static void
4496 15308 : BackendRun(Port *port)
4497 : {
4498 : /*
4499 : * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4500 : * just yet, though, because InitPostgres will need the HBA data.)
4501 : */
4502 15308 : MemoryContextSwitchTo(TopMemoryContext);
4503 :
4504 15308 : PostgresMain(port->database_name, port->user_name);
4505 : }
4506 :
4507 :
4508 : #ifdef EXEC_BACKEND
4509 :
4510 : /*
4511 : * postmaster_forkexec -- fork and exec a postmaster subprocess
4512 : *
4513 : * The caller must have set up the argv array already, except for argv[2]
4514 : * which will be filled with the name of the temp variable file.
4515 : *
4516 : * Returns the child process PID, or -1 on fork failure (a suitable error
4517 : * message has been logged on failure).
4518 : *
4519 : * All uses of this routine will dispatch to SubPostmasterMain in the
4520 : * child process.
4521 : */
4522 : pid_t
4523 : postmaster_forkexec(int argc, char *argv[])
4524 : {
4525 : Port port;
4526 :
4527 : /* This entry point passes dummy values for the Port variables */
4528 : memset(&port, 0, sizeof(port));
4529 : return internal_forkexec(argc, argv, &port);
4530 : }
4531 :
4532 : /*
4533 : * backend_forkexec -- fork/exec off a backend process
4534 : *
4535 : * Some operating systems (WIN32) don't have fork() so we have to simulate
4536 : * it by storing parameters that need to be passed to the child and
4537 : * then create a new child process.
4538 : *
4539 : * returns the pid of the fork/exec'd process, or -1 on failure
4540 : */
4541 : static pid_t
4542 : backend_forkexec(Port *port)
4543 : {
4544 : char *av[4];
4545 : int ac = 0;
4546 :
4547 : av[ac++] = "postgres";
4548 : av[ac++] = "--forkbackend";
4549 : av[ac++] = NULL; /* filled in by internal_forkexec */
4550 :
4551 : av[ac] = NULL;
4552 : Assert(ac < lengthof(av));
4553 :
4554 : return internal_forkexec(ac, av, port);
4555 : }
4556 :
4557 : #ifndef WIN32
4558 :
4559 : /*
4560 : * internal_forkexec non-win32 implementation
4561 : *
4562 : * - writes out backend variables to the parameter file
4563 : * - fork():s, and then exec():s the child process
4564 : */
4565 : static pid_t
4566 : internal_forkexec(int argc, char *argv[], Port *port)
4567 : {
4568 : static unsigned long tmpBackendFileNum = 0;
4569 : pid_t pid;
4570 : char tmpfilename[MAXPGPATH];
4571 : BackendParameters param;
4572 : FILE *fp;
4573 :
4574 : if (!save_backend_variables(¶m, port))
4575 : return -1; /* log made by save_backend_variables */
4576 :
4577 : /* Calculate name for temp file */
4578 : snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4579 : PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
4580 : MyProcPid, ++tmpBackendFileNum);
4581 :
4582 : /* Open file */
4583 : fp = AllocateFile(tmpfilename, PG_BINARY_W);
4584 : if (!fp)
4585 : {
4586 : /*
4587 : * As in OpenTemporaryFileInTablespace, try to make the temp-file
4588 : * directory, ignoring errors.
4589 : */
4590 : (void) MakePGDirectory(PG_TEMP_FILES_DIR);
4591 :
4592 : fp = AllocateFile(tmpfilename, PG_BINARY_W);
4593 : if (!fp)
4594 : {
4595 : ereport(LOG,
4596 : (errcode_for_file_access(),
4597 : errmsg("could not create file \"%s\": %m",
4598 : tmpfilename)));
4599 : return -1;
4600 : }
4601 : }
4602 :
4603 : if (fwrite(¶m, sizeof(param), 1, fp) != 1)
4604 : {
4605 : ereport(LOG,
4606 : (errcode_for_file_access(),
4607 : errmsg("could not write to file \"%s\": %m", tmpfilename)));
4608 : FreeFile(fp);
4609 : return -1;
4610 : }
4611 :
4612 : /* Release file */
4613 : if (FreeFile(fp))
4614 : {
4615 : ereport(LOG,
4616 : (errcode_for_file_access(),
4617 : errmsg("could not write to file \"%s\": %m", tmpfilename)));
4618 : return -1;
4619 : }
4620 :
4621 : /* Make sure caller set up argv properly */
4622 : Assert(argc >= 3);
4623 : Assert(argv[argc] == NULL);
4624 : Assert(strncmp(argv[1], "--fork", 6) == 0);
4625 : Assert(argv[2] == NULL);
4626 :
4627 : /* Insert temp file name after --fork argument */
4628 : argv[2] = tmpfilename;
4629 :
4630 : /* Fire off execv in child */
4631 : if ((pid = fork_process()) == 0)
4632 : {
4633 : if (execv(postgres_exec_path, argv) < 0)
4634 : {
4635 : ereport(LOG,
4636 : (errmsg("could not execute server process \"%s\": %m",
4637 : postgres_exec_path)));
4638 : /* We're already in the child process here, can't return */
4639 : exit(1);
4640 : }
4641 : }
4642 :
4643 : return pid; /* Parent returns pid, or -1 on fork failure */
4644 : }
4645 : #else /* WIN32 */
4646 :
4647 : /*
4648 : * internal_forkexec win32 implementation
4649 : *
4650 : * - starts backend using CreateProcess(), in suspended state
4651 : * - writes out backend variables to the parameter file
4652 : * - during this, duplicates handles and sockets required for
4653 : * inheritance into the new process
4654 : * - resumes execution of the new process once the backend parameter
4655 : * file is complete.
4656 : */
4657 : static pid_t
4658 : internal_forkexec(int argc, char *argv[], Port *port)
4659 : {
4660 : int retry_count = 0;
4661 : STARTUPINFO si;
4662 : PROCESS_INFORMATION pi;
4663 : int i;
4664 : int j;
4665 : char cmdLine[MAXPGPATH * 2];
4666 : HANDLE paramHandle;
4667 : BackendParameters *param;
4668 : SECURITY_ATTRIBUTES sa;
4669 : char paramHandleStr[32];
4670 : win32_deadchild_waitinfo *childinfo;
4671 :
4672 : /* Make sure caller set up argv properly */
4673 : Assert(argc >= 3);
4674 : Assert(argv[argc] == NULL);
4675 : Assert(strncmp(argv[1], "--fork", 6) == 0);
4676 : Assert(argv[2] == NULL);
4677 :
4678 : /* Resume here if we need to retry */
4679 : retry:
4680 :
4681 : /* Set up shared memory for parameter passing */
4682 : ZeroMemory(&sa, sizeof(sa));
4683 : sa.nLength = sizeof(sa);
4684 : sa.bInheritHandle = TRUE;
4685 : paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4686 : &sa,
4687 : PAGE_READWRITE,
4688 : 0,
4689 : sizeof(BackendParameters),
4690 : NULL);
4691 : if (paramHandle == INVALID_HANDLE_VALUE)
4692 : {
4693 : ereport(LOG,
4694 : (errmsg("could not create backend parameter file mapping: error code %lu",
4695 : GetLastError())));
4696 : return -1;
4697 : }
4698 :
4699 : param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4700 : if (!param)
4701 : {
4702 : ereport(LOG,
4703 : (errmsg("could not map backend parameter memory: error code %lu",
4704 : GetLastError())));
4705 : CloseHandle(paramHandle);
4706 : return -1;
4707 : }
4708 :
4709 : /* Insert temp file name after --fork argument */
4710 : #ifdef _WIN64
4711 : sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4712 : #else
4713 : sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4714 : #endif
4715 : argv[2] = paramHandleStr;
4716 :
4717 : /* Format the cmd line */
4718 : cmdLine[sizeof(cmdLine) - 1] = '\0';
4719 : cmdLine[sizeof(cmdLine) - 2] = '\0';
4720 : snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4721 : i = 0;
4722 : while (argv[++i] != NULL)
4723 : {
4724 : j = strlen(cmdLine);
4725 : snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4726 : }
4727 : if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4728 : {
4729 : ereport(LOG,
4730 : (errmsg("subprocess command line too long")));
4731 : UnmapViewOfFile(param);
4732 : CloseHandle(paramHandle);
4733 : return -1;
4734 : }
4735 :
4736 : memset(&pi, 0, sizeof(pi));
4737 : memset(&si, 0, sizeof(si));
4738 : si.cb = sizeof(si);
4739 :
4740 : /*
4741 : * Create the subprocess in a suspended state. This will be resumed later,
4742 : * once we have written out the parameter file.
4743 : */
4744 : if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4745 : NULL, NULL, &si, &pi))
4746 : {
4747 : ereport(LOG,
4748 : (errmsg("CreateProcess() call failed: %m (error code %lu)",
4749 : GetLastError())));
4750 : UnmapViewOfFile(param);
4751 : CloseHandle(paramHandle);
4752 : return -1;
4753 : }
4754 :
4755 : if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4756 : {
4757 : /*
4758 : * log made by save_backend_variables, but we have to clean up the
4759 : * mess with the half-started process
4760 : */
4761 : if (!TerminateProcess(pi.hProcess, 255))
4762 : ereport(LOG,
4763 : (errmsg_internal("could not terminate unstarted process: error code %lu",
4764 : GetLastError())));
4765 : CloseHandle(pi.hProcess);
4766 : CloseHandle(pi.hThread);
4767 : UnmapViewOfFile(param);
4768 : CloseHandle(paramHandle);
4769 : return -1; /* log made by save_backend_variables */
4770 : }
4771 :
4772 : /* Drop the parameter shared memory that is now inherited to the backend */
4773 : if (!UnmapViewOfFile(param))
4774 : ereport(LOG,
4775 : (errmsg("could not unmap view of backend parameter file: error code %lu",
4776 : GetLastError())));
4777 : if (!CloseHandle(paramHandle))
4778 : ereport(LOG,
4779 : (errmsg("could not close handle to backend parameter file: error code %lu",
4780 : GetLastError())));
4781 :
4782 : /*
4783 : * Reserve the memory region used by our main shared memory segment before
4784 : * we resume the child process. Normally this should succeed, but if ASLR
4785 : * is active then it might sometimes fail due to the stack or heap having
4786 : * gotten mapped into that range. In that case, just terminate the
4787 : * process and retry.
4788 : */
4789 : if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4790 : {
4791 : /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4792 : if (!TerminateProcess(pi.hProcess, 255))
4793 : ereport(LOG,
4794 : (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4795 : GetLastError())));
4796 : CloseHandle(pi.hProcess);
4797 : CloseHandle(pi.hThread);
4798 : if (++retry_count < 100)
4799 : goto retry;
4800 : ereport(LOG,
4801 : (errmsg("giving up after too many tries to reserve shared memory"),
4802 : errhint("This might be caused by ASLR or antivirus software.")));
4803 : return -1;
4804 : }
4805 :
4806 : /*
4807 : * Now that the backend variables are written out, we start the child
4808 : * thread so it can start initializing while we set up the rest of the
4809 : * parent state.
4810 : */
4811 : if (ResumeThread(pi.hThread) == -1)
4812 : {
4813 : if (!TerminateProcess(pi.hProcess, 255))
4814 : {
4815 : ereport(LOG,
4816 : (errmsg_internal("could not terminate unstartable process: error code %lu",
4817 : GetLastError())));
4818 : CloseHandle(pi.hProcess);
4819 : CloseHandle(pi.hThread);
4820 : return -1;
4821 : }
4822 : CloseHandle(pi.hProcess);
4823 : CloseHandle(pi.hThread);
4824 : ereport(LOG,
4825 : (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4826 : GetLastError())));
4827 : return -1;
4828 : }
4829 :
4830 : /*
4831 : * Queue a waiter to signal when this child dies. The wait will be handled
4832 : * automatically by an operating system thread pool.
4833 : *
4834 : * Note: use malloc instead of palloc, since it needs to be thread-safe.
4835 : * Struct will be free():d from the callback function that runs on a
4836 : * different thread.
4837 : */
4838 : childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4839 : if (!childinfo)
4840 : ereport(FATAL,
4841 : (errcode(ERRCODE_OUT_OF_MEMORY),
4842 : errmsg("out of memory")));
4843 :
4844 : childinfo->procHandle = pi.hProcess;
4845 : childinfo->procId = pi.dwProcessId;
4846 :
4847 : if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4848 : pi.hProcess,
4849 : pgwin32_deadchild_callback,
4850 : childinfo,
4851 : INFINITE,
4852 : WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4853 : ereport(FATAL,
4854 : (errmsg_internal("could not register process for wait: error code %lu",
4855 : GetLastError())));
4856 :
4857 : /* Don't close pi.hProcess here - the wait thread needs access to it */
4858 :
4859 : CloseHandle(pi.hThread);
4860 :
4861 : return pi.dwProcessId;
4862 : }
4863 : #endif /* WIN32 */
4864 :
4865 :
4866 : /*
4867 : * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4868 : * to what it would be if we'd simply forked on Unix, and then
4869 : * dispatch to the appropriate place.
4870 : *
4871 : * The first two command line arguments are expected to be "--forkFOO"
4872 : * (where FOO indicates which postmaster child we are to become), and
4873 : * the name of a variables file that we can read to load data that would
4874 : * have been inherited by fork() on Unix. Remaining arguments go to the
4875 : * subprocess FooMain() routine.
4876 : */
4877 : void
4878 : SubPostmasterMain(int argc, char *argv[])
4879 : {
4880 : Port port;
4881 :
4882 : /* In EXEC_BACKEND case we will not have inherited these settings */
4883 : IsPostmasterEnvironment = true;
4884 : whereToSendOutput = DestNone;
4885 :
4886 : /* Setup essential subsystems (to ensure elog() behaves sanely) */
4887 : InitializeGUCOptions();
4888 :
4889 : /* Check we got appropriate args */
4890 : if (argc < 3)
4891 : elog(FATAL, "invalid subpostmaster invocation");
4892 :
4893 : /* Read in the variables file */
4894 : memset(&port, 0, sizeof(Port));
4895 : read_backend_variables(argv[2], &port);
4896 :
4897 : /* Close the postmaster's sockets (as soon as we know them) */
4898 : ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4899 :
4900 : /* Setup as postmaster child */
4901 : InitPostmasterChild();
4902 :
4903 : /*
4904 : * If appropriate, physically re-attach to shared memory segment. We want
4905 : * to do this before going any further to ensure that we can attach at the
4906 : * same address the postmaster used. On the other hand, if we choose not
4907 : * to re-attach, we may have other cleanup to do.
4908 : *
4909 : * If testing EXEC_BACKEND on Linux, you should run this as root before
4910 : * starting the postmaster:
4911 : *
4912 : * echo 0 >/proc/sys/kernel/randomize_va_space
4913 : *
4914 : * This prevents using randomized stack and code addresses that cause the
4915 : * child process's memory map to be different from the parent's, making it
4916 : * sometimes impossible to attach to shared memory at the desired address.
4917 : * Return the setting to its old value (usually '1' or '2') when finished.
4918 : */
4919 : if (strcmp(argv[1], "--forkbackend") == 0 ||
4920 : strcmp(argv[1], "--forkavlauncher") == 0 ||
4921 : strcmp(argv[1], "--forkavworker") == 0 ||
4922 : strcmp(argv[1], "--forkaux") == 0 ||
4923 : strncmp(argv[1], "--forkbgworker=", 15) == 0)
4924 : PGSharedMemoryReAttach();
4925 : else
4926 : PGSharedMemoryNoReAttach();
4927 :
4928 : /* autovacuum needs this set before calling InitProcess */
4929 : if (strcmp(argv[1], "--forkavlauncher") == 0)
4930 : AutovacuumLauncherIAm();
4931 : if (strcmp(argv[1], "--forkavworker") == 0)
4932 : AutovacuumWorkerIAm();
4933 :
4934 : /* Read in remaining GUC variables */
4935 : read_nondefault_variables();
4936 :
4937 : /*
4938 : * Check that the data directory looks valid, which will also check the
4939 : * privileges on the data directory and update our umask and file/group
4940 : * variables for creating files later. Note: this should really be done
4941 : * before we create any files or directories.
4942 : */
4943 : checkDataDir();
4944 :
4945 : /*
4946 : * (re-)read control file, as it contains config. The postmaster will
4947 : * already have read this, but this process doesn't know about that.
4948 : */
4949 : LocalProcessControlFile(false);
4950 :
4951 : /*
4952 : * Reload any libraries that were preloaded by the postmaster. Since we
4953 : * exec'd this process, those libraries didn't come along with us; but we
4954 : * should load them into all child processes to be consistent with the
4955 : * non-EXEC_BACKEND behavior.
4956 : */
4957 : process_shared_preload_libraries();
4958 :
4959 : /* Run backend or appropriate child */
4960 : if (strcmp(argv[1], "--forkbackend") == 0)
4961 : {
4962 : Assert(argc == 3); /* shouldn't be any more args */
4963 :
4964 : /*
4965 : * Need to reinitialize the SSL library in the backend, since the
4966 : * context structures contain function pointers and cannot be passed
4967 : * through the parameter file.
4968 : *
4969 : * If for some reason reload fails (maybe the user installed broken
4970 : * key files), soldier on without SSL; that's better than all
4971 : * connections becoming impossible.
4972 : *
4973 : * XXX should we do this in all child processes? For the moment it's
4974 : * enough to do it in backend children.
4975 : */
4976 : #ifdef USE_SSL
4977 : if (EnableSSL)
4978 : {
4979 : if (secure_initialize(false) == 0)
4980 : LoadedSSL = true;
4981 : else
4982 : ereport(LOG,
4983 : (errmsg("SSL configuration could not be loaded in child process")));
4984 : }
4985 : #endif
4986 :
4987 : /*
4988 : * Perform additional initialization and collect startup packet.
4989 : *
4990 : * We want to do this before InitProcess() for a couple of reasons: 1.
4991 : * so that we aren't eating up a PGPROC slot while waiting on the
4992 : * client. 2. so that if InitProcess() fails due to being out of
4993 : * PGPROC slots, we have already initialized libpq and are able to
4994 : * report the error to the client.
4995 : */
4996 : BackendInitialize(&port);
4997 :
4998 : /* Restore basic shared memory pointers */
4999 : InitShmemAccess(UsedShmemSegAddr);
5000 :
5001 : /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5002 : InitProcess();
5003 :
5004 : /* Attach process to shared data structures */
5005 : CreateSharedMemoryAndSemaphores();
5006 :
5007 : /* And run the backend */
5008 : BackendRun(&port); /* does not return */
5009 : }
5010 : if (strcmp(argv[1], "--forkaux") == 0)
5011 : {
5012 : AuxProcType auxtype;
5013 :
5014 : Assert(argc == 4);
5015 :
5016 : /* Restore basic shared memory pointers */
5017 : InitShmemAccess(UsedShmemSegAddr);
5018 :
5019 : /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5020 : InitAuxiliaryProcess();
5021 :
5022 : /* Attach process to shared data structures */
5023 : CreateSharedMemoryAndSemaphores();
5024 :
5025 : auxtype = atoi(argv[3]);
5026 : AuxiliaryProcessMain(auxtype); /* does not return */
5027 : }
5028 : if (strcmp(argv[1], "--forkavlauncher") == 0)
5029 : {
5030 : /* Restore basic shared memory pointers */
5031 : InitShmemAccess(UsedShmemSegAddr);
5032 :
5033 : /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5034 : InitProcess();
5035 :
5036 : /* Attach process to shared data structures */
5037 : CreateSharedMemoryAndSemaphores();
5038 :
5039 : AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
5040 : }
5041 : if (strcmp(argv[1], "--forkavworker") == 0)
5042 : {
5043 : /* Restore basic shared memory pointers */
5044 : InitShmemAccess(UsedShmemSegAddr);
5045 :
5046 : /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5047 : InitProcess();
5048 :
5049 : /* Attach process to shared data structures */
5050 : CreateSharedMemoryAndSemaphores();
5051 :
5052 : AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
5053 : }
5054 : if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
5055 : {
5056 : int shmem_slot;
5057 :
5058 : /* do this as early as possible; in particular, before InitProcess() */
5059 : IsBackgroundWorker = true;
5060 :
5061 : /* Restore basic shared memory pointers */
5062 : InitShmemAccess(UsedShmemSegAddr);
5063 :
5064 : /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5065 : InitProcess();
5066 :
5067 : /* Attach process to shared data structures */
5068 : CreateSharedMemoryAndSemaphores();
5069 :
5070 : /* Fetch MyBgworkerEntry from shared memory */
5071 : shmem_slot = atoi(argv[1] + 15);
5072 : MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5073 :
5074 : StartBackgroundWorker();
5075 : }
5076 : if (strcmp(argv[1], "--forklog") == 0)
5077 : {
5078 : /* Do not want to attach to shared memory */
5079 :
5080 : SysLoggerMain(argc, argv); /* does not return */
5081 : }
5082 :
5083 : abort(); /* shouldn't get here */
5084 : }
5085 : #endif /* EXEC_BACKEND */
5086 :
5087 :
5088 : /*
5089 : * ExitPostmaster -- cleanup
5090 : *
5091 : * Do NOT call exit() directly --- always go through here!
5092 : */
5093 : static void
5094 1042 : ExitPostmaster(int status)
5095 : {
5096 : #ifdef HAVE_PTHREAD_IS_THREADED_NP
5097 :
5098 : /*
5099 : * There is no known cause for a postmaster to become multithreaded after
5100 : * startup. Recheck to account for the possibility of unknown causes.
5101 : * This message uses LOG level, because an unclean shutdown at this point
5102 : * would usually not look much different from a clean shutdown.
5103 : */
5104 : if (pthread_is_threaded_np() != 0)
5105 : ereport(LOG,
5106 : (errcode(ERRCODE_INTERNAL_ERROR),
5107 : errmsg_internal("postmaster became multithreaded"),
5108 : errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5109 : #endif
5110 :
5111 : /* should cleanup shared memory and kill all backends */
5112 :
5113 : /*
5114 : * Not sure of the semantics here. When the Postmaster dies, should the
5115 : * backends all be killed? probably not.
5116 : *
5117 : * MUST -- vadim 05-10-1999
5118 : */
5119 :
5120 1042 : proc_exit(status);
5121 : }
5122 :
5123 : /*
5124 : * sigusr1_handler - handle signal conditions from child processes
5125 : */
5126 : static void
5127 4002 : sigusr1_handler(SIGNAL_ARGS)
5128 : {
5129 4002 : int save_errno = errno;
5130 :
5131 : /*
5132 : * We rely on the signal mechanism to have blocked all signals ... except
5133 : * on Windows, which lacks sigaction(), so we have to do it manually.
5134 : */
5135 : #ifdef WIN32
5136 : PG_SETMASK(&BlockSig);
5137 : #endif
5138 :
5139 : /*
5140 : * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5141 : * unexpected states. If the startup process quickly starts up, completes
5142 : * recovery, exits, we might process the death of the startup process
5143 : * first. We don't want to go back to recovery in that case.
5144 : */
5145 4002 : if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
5146 344 : pmState == PM_STARTUP && Shutdown == NoShutdown)
5147 : {
5148 : /* WAL redo has started. We're out of reinitialization. */
5149 344 : FatalError = false;
5150 344 : AbortStartTime = 0;
5151 :
5152 : /*
5153 : * Start the archiver if we're responsible for (re-)archiving received
5154 : * files.
5155 : */
5156 : Assert(PgArchPID == 0);
5157 344 : if (XLogArchivingAlways())
5158 6 : PgArchPID = StartArchiver();
5159 :
5160 : /*
5161 : * If we aren't planning to enter hot standby mode later, treat
5162 : * RECOVERY_STARTED as meaning we're out of startup, and report status
5163 : * accordingly.
5164 : */
5165 344 : if (!EnableHotStandby)
5166 : {
5167 4 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
5168 : #ifdef USE_SYSTEMD
5169 : sd_notify(0, "READY=1");
5170 : #endif
5171 : }
5172 :
5173 344 : pmState = PM_RECOVERY;
5174 : }
5175 :
5176 4002 : if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
5177 208 : pmState == PM_RECOVERY && Shutdown == NoShutdown)
5178 : {
5179 208 : ereport(LOG,
5180 : (errmsg("database system is ready to accept read-only connections")));
5181 :
5182 : /* Report status */
5183 208 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
5184 : #ifdef USE_SYSTEMD
5185 : sd_notify(0, "READY=1");
5186 : #endif
5187 :
5188 208 : pmState = PM_HOT_STANDBY;
5189 208 : connsAllowed = true;
5190 :
5191 : /* Some workers may be scheduled to start now */
5192 208 : StartWorkerNeeded = true;
5193 : }
5194 :
5195 : /* Process background worker state changes. */
5196 4002 : if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
5197 : {
5198 : /* Accept new worker requests only if not stopping. */
5199 1646 : BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS);
5200 1646 : StartWorkerNeeded = true;
5201 : }
5202 :
5203 4002 : if (StartWorkerNeeded || HaveCrashedWorker)
5204 1854 : maybe_start_bgworkers();
5205 :
5206 : /* Tell syslogger to rotate logfile if requested */
5207 4002 : if (SysLoggerPID != 0)
5208 : {
5209 2 : if (CheckLogrotateSignal())
5210 : {
5211 2 : signal_child(SysLoggerPID, SIGUSR1);
5212 2 : RemoveLogrotateSignalFiles();
5213 : }
5214 0 : else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
5215 : {
5216 0 : signal_child(SysLoggerPID, SIGUSR1);
5217 : }
5218 : }
5219 :
5220 4002 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
5221 0 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5222 : {
5223 : /*
5224 : * Start one iteration of the autovacuum daemon, even if autovacuuming
5225 : * is nominally not enabled. This is so we can have an active defense
5226 : * against transaction ID wraparound. We set a flag for the main loop
5227 : * to do it rather than trying to do it here --- this is because the
5228 : * autovac process itself may send the signal, and we want to handle
5229 : * that by launching another iteration as soon as the current one
5230 : * completes.
5231 : */
5232 0 : start_autovac_launcher = true;
5233 : }
5234 :
5235 4002 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
5236 12 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5237 : {
5238 : /* The autovacuum launcher wants us to start a worker process. */
5239 12 : StartAutovacuumWorker();
5240 : }
5241 :
5242 4002 : if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
5243 : {
5244 : /* Startup Process wants us to start the walreceiver process. */
5245 : /* Start immediately if possible, else remember request for later. */
5246 302 : WalReceiverRequested = true;
5247 302 : MaybeStartWalReceiver();
5248 : }
5249 :
5250 : /*
5251 : * Try to advance postmaster's state machine, if a child requests it.
5252 : *
5253 : * Be careful about the order of this action relative to sigusr1_handler's
5254 : * other actions. Generally, this should be after other actions, in case
5255 : * they have effects PostmasterStateMachine would need to know about.
5256 : * However, we should do it before the CheckPromoteSignal step, which
5257 : * cannot have any (immediate) effect on the state machine, but does
5258 : * depend on what state we're in now.
5259 : */
5260 4002 : if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
5261 : {
5262 1372 : PostmasterStateMachine();
5263 : }
5264 :
5265 4002 : if (StartupPID != 0 &&
5266 926 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5267 1520 : pmState == PM_HOT_STANDBY) &&
5268 926 : CheckPromoteSignal())
5269 : {
5270 : /*
5271 : * Tell startup process to finish recovery.
5272 : *
5273 : * Leave the promote signal file in place and let the Startup process
5274 : * do the unlink.
5275 : */
5276 70 : signal_child(StartupPID, SIGUSR2);
5277 : }
5278 :
5279 : #ifdef WIN32
5280 : PG_SETMASK(&UnBlockSig);
5281 : #endif
5282 :
5283 4002 : errno = save_errno;
5284 4002 : }
5285 :
5286 : /*
5287 : * SIGTERM while processing startup packet.
5288 : *
5289 : * Running proc_exit() from a signal handler would be quite unsafe.
5290 : * However, since we have not yet touched shared memory, we can just
5291 : * pull the plug and exit without running any atexit handlers.
5292 : *
5293 : * One might be tempted to try to send a message, or log one, indicating
5294 : * why we are disconnecting. However, that would be quite unsafe in itself.
5295 : * Also, it seems undesirable to provide clues about the database's state
5296 : * to a client that has not yet completed authentication, or even sent us
5297 : * a startup packet.
5298 : */
5299 : static void
5300 0 : process_startup_packet_die(SIGNAL_ARGS)
5301 : {
5302 0 : _exit(1);
5303 : }
5304 :
5305 : /*
5306 : * Dummy signal handler
5307 : *
5308 : * We use this for signals that we don't actually use in the postmaster,
5309 : * but we do use in backends. If we were to SIG_IGN such signals in the
5310 : * postmaster, then a newly started backend might drop a signal that arrives
5311 : * before it's able to reconfigure its signal processing. (See notes in
5312 : * tcop/postgres.c.)
5313 : */
5314 : static void
5315 0 : dummy_handler(SIGNAL_ARGS)
5316 : {
5317 0 : }
5318 :
5319 : /*
5320 : * Timeout while processing startup packet.
5321 : * As for process_startup_packet_die(), we exit via _exit(1).
5322 : */
5323 : static void
5324 0 : StartupPacketTimeoutHandler(void)
5325 : {
5326 0 : _exit(1);
5327 : }
5328 :
5329 :
5330 : /*
5331 : * Generate a random cancel key.
5332 : */
5333 : static bool
5334 19650 : RandomCancelKey(int32 *cancel_key)
5335 : {
5336 19650 : return pg_strong_random(cancel_key, sizeof(int32));
5337 : }
5338 :
5339 : /*
5340 : * Count up number of child processes of specified types (dead_end children
5341 : * are always excluded).
5342 : */
5343 : static int
5344 25522 : CountChildren(int target)
5345 : {
5346 : dlist_iter iter;
5347 25522 : int cnt = 0;
5348 :
5349 84228 : dlist_foreach(iter, &BackendList)
5350 : {
5351 58706 : Backend *bp = dlist_container(Backend, elem, iter.cur);
5352 :
5353 58706 : if (bp->dead_end)
5354 32 : continue;
5355 :
5356 : /*
5357 : * Since target == BACKEND_TYPE_ALL is the most common case, we test
5358 : * it first and avoid touching shared memory for every child.
5359 : */
5360 58674 : if (target != BACKEND_TYPE_ALL)
5361 : {
5362 : /*
5363 : * Assign bkend_type for any recently announced WAL Sender
5364 : * processes.
5365 : */
5366 5456 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5367 1344 : IsPostmasterChildWalSender(bp->child_slot))
5368 68 : bp->bkend_type = BACKEND_TYPE_WALSND;
5369 :
5370 4112 : if (!(target & bp->bkend_type))
5371 442 : continue;
5372 : }
5373 :
5374 58232 : cnt++;
5375 : }
5376 25522 : return cnt;
5377 : }
5378 :
5379 :
5380 : /*
5381 : * StartChildProcess -- start an auxiliary process for the postmaster
5382 : *
5383 : * "type" determines what kind of child will be started. All child types
5384 : * initially go to AuxiliaryProcessMain, which will handle common setup.
5385 : *
5386 : * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5387 : * to start subprocess.
5388 : */
5389 : static pid_t
5390 4422 : StartChildProcess(AuxProcType type)
5391 : {
5392 : pid_t pid;
5393 :
5394 : #ifdef EXEC_BACKEND
5395 : {
5396 : char *av[10];
5397 : int ac = 0;
5398 : char typebuf[32];
5399 :
5400 : /*
5401 : * Set up command-line arguments for subprocess
5402 : */
5403 : av[ac++] = "postgres";
5404 : av[ac++] = "--forkaux";
5405 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
5406 :
5407 : snprintf(typebuf, sizeof(typebuf), "%d", type);
5408 : av[ac++] = typebuf;
5409 :
5410 : av[ac] = NULL;
5411 : Assert(ac < lengthof(av));
5412 :
5413 : pid = postmaster_forkexec(ac, av);
5414 : }
5415 : #else /* !EXEC_BACKEND */
5416 4422 : pid = fork_process();
5417 :
5418 7504 : if (pid == 0) /* child */
5419 : {
5420 3082 : InitPostmasterChild();
5421 :
5422 : /* Close the postmaster's sockets */
5423 3082 : ClosePostmasterPorts(false);
5424 :
5425 : /* Release postmaster's working memory context */
5426 3082 : MemoryContextSwitchTo(TopMemoryContext);
5427 3082 : MemoryContextDelete(PostmasterContext);
5428 3082 : PostmasterContext = NULL;
5429 :
5430 3082 : AuxiliaryProcessMain(type); /* does not return */
5431 : }
5432 : #endif /* EXEC_BACKEND */
5433 :
5434 4422 : if (pid < 0)
5435 : {
5436 : /* in parent, fork failed */
5437 0 : int save_errno = errno;
5438 :
5439 0 : errno = save_errno;
5440 0 : switch (type)
5441 : {
5442 0 : case StartupProcess:
5443 0 : ereport(LOG,
5444 : (errmsg("could not fork startup process: %m")));
5445 0 : break;
5446 0 : case ArchiverProcess:
5447 0 : ereport(LOG,
5448 : (errmsg("could not fork archiver process: %m")));
5449 0 : break;
5450 0 : case BgWriterProcess:
5451 0 : ereport(LOG,
5452 : (errmsg("could not fork background writer process: %m")));
5453 0 : break;
5454 0 : case CheckpointerProcess:
5455 0 : ereport(LOG,
5456 : (errmsg("could not fork checkpointer process: %m")));
5457 0 : break;
5458 0 : case WalWriterProcess:
5459 0 : ereport(LOG,
5460 : (errmsg("could not fork WAL writer process: %m")));
5461 0 : break;
5462 0 : case WalReceiverProcess:
5463 0 : ereport(LOG,
5464 : (errmsg("could not fork WAL receiver process: %m")));
5465 0 : break;
5466 0 : default:
5467 0 : ereport(LOG,
5468 : (errmsg("could not fork process: %m")));
5469 0 : break;
5470 : }
5471 :
5472 : /*
5473 : * fork failure is fatal during startup, but there's no need to choke
5474 : * immediately if starting other child types fails.
5475 : */
5476 0 : if (type == StartupProcess)
5477 0 : ExitPostmaster(1);
5478 0 : return 0;
5479 : }
5480 :
5481 : /*
5482 : * in parent, successful fork
5483 : */
5484 4422 : return pid;
5485 : }
5486 :
5487 : /*
5488 : * StartAutovacuumWorker
5489 : * Start an autovac worker process.
5490 : *
5491 : * This function is here because it enters the resulting PID into the
5492 : * postmaster's private backends list.
5493 : *
5494 : * NB -- this code very roughly matches BackendStartup.
5495 : */
5496 : static void
5497 12 : StartAutovacuumWorker(void)
5498 : {
5499 : Backend *bn;
5500 :
5501 : /*
5502 : * If not in condition to run a process, don't try, but handle it like a
5503 : * fork failure. This does not normally happen, since the signal is only
5504 : * supposed to be sent by autovacuum launcher when it's OK to do it, but
5505 : * we have to check to avoid race-condition problems during DB state
5506 : * changes.
5507 : */
5508 12 : if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK)
5509 : {
5510 : /*
5511 : * Compute the cancel key that will be assigned to this session. We
5512 : * probably don't need cancel keys for autovac workers, but we'd
5513 : * better have something random in the field to prevent unfriendly
5514 : * people from sending cancels to them.
5515 : */
5516 12 : if (!RandomCancelKey(&MyCancelKey))
5517 : {
5518 0 : ereport(LOG,
5519 : (errcode(ERRCODE_INTERNAL_ERROR),
5520 : errmsg("could not generate random cancel key")));
5521 0 : return;
5522 : }
5523 :
5524 12 : bn = (Backend *) malloc(sizeof(Backend));
5525 12 : if (bn)
5526 : {
5527 12 : bn->cancel_key = MyCancelKey;
5528 :
5529 : /* Autovac workers are not dead_end and need a child slot */
5530 12 : bn->dead_end = false;
5531 12 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5532 12 : bn->bgworker_notify = false;
5533 :
5534 12 : bn->pid = StartAutoVacWorker();
5535 12 : if (bn->pid > 0)
5536 : {
5537 12 : bn->bkend_type = BACKEND_TYPE_AUTOVAC;
5538 12 : dlist_push_head(&BackendList, &bn->elem);
5539 : #ifdef EXEC_BACKEND
5540 : ShmemBackendArrayAdd(bn);
5541 : #endif
5542 : /* all OK */
5543 12 : return;
5544 : }
5545 :
5546 : /*
5547 : * fork failed, fall through to report -- actual error message was
5548 : * logged by StartAutoVacWorker
5549 : */
5550 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
5551 0 : free(bn);
5552 : }
5553 : else
5554 0 : ereport(LOG,
5555 : (errcode(ERRCODE_OUT_OF_MEMORY),
5556 : errmsg("out of memory")));
5557 : }
5558 :
5559 : /*
5560 : * Report the failure to the launcher, if it's running. (If it's not, we
5561 : * might not even be connected to shared memory, so don't try to call
5562 : * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5563 : * responds to the condition, but we don't do that here, instead waiting
5564 : * for ServerLoop to do it. This way we avoid a ping-pong signaling in
5565 : * quick succession between the autovac launcher and postmaster in case
5566 : * things get ugly.
5567 : */
5568 0 : if (AutoVacPID != 0)
5569 : {
5570 0 : AutoVacWorkerFailed();
5571 0 : avlauncher_needs_signal = true;
5572 : }
5573 : }
5574 :
5575 : /*
5576 : * MaybeStartWalReceiver
5577 : * Start the WAL receiver process, if not running and our state allows.
5578 : *
5579 : * Note: if WalReceiverPID is already nonzero, it might seem that we should
5580 : * clear WalReceiverRequested. However, there's a race condition if the
5581 : * walreceiver terminates and the startup process immediately requests a new
5582 : * one: it's quite possible to get the signal for the request before reaping
5583 : * the dead walreceiver process. Better to risk launching an extra
5584 : * walreceiver than to miss launching one we need. (The walreceiver code
5585 : * has logic to recognize that it should go away if not needed.)
5586 : */
5587 : static void
5588 508 : MaybeStartWalReceiver(void)
5589 : {
5590 508 : if (WalReceiverPID == 0 &&
5591 306 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5592 304 : pmState == PM_HOT_STANDBY) &&
5593 298 : Shutdown <= SmartShutdown)
5594 : {
5595 298 : WalReceiverPID = StartWalReceiver();
5596 298 : if (WalReceiverPID != 0)
5597 298 : WalReceiverRequested = false;
5598 : /* else leave the flag set, so we'll try again later */
5599 : }
5600 508 : }
5601 :
5602 :
5603 : /*
5604 : * Create the opts file
5605 : */
5606 : static bool
5607 1044 : CreateOptsFile(int argc, char *argv[], char *fullprogname)
5608 : {
5609 : FILE *fp;
5610 : int i;
5611 :
5612 : #define OPTS_FILE "postmaster.opts"
5613 :
5614 1044 : if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5615 : {
5616 0 : ereport(LOG,
5617 : (errcode_for_file_access(),
5618 : errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5619 0 : return false;
5620 : }
5621 :
5622 1044 : fprintf(fp, "%s", fullprogname);
5623 4872 : for (i = 1; i < argc; i++)
5624 3828 : fprintf(fp, " \"%s\"", argv[i]);
5625 1044 : fputs("\n", fp);
5626 :
5627 1044 : if (fclose(fp))
5628 : {
5629 0 : ereport(LOG,
5630 : (errcode_for_file_access(),
5631 : errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5632 0 : return false;
5633 : }
5634 :
5635 1044 : return true;
5636 : }
5637 :
5638 :
5639 : /*
5640 : * MaxLivePostmasterChildren
5641 : *
5642 : * This reports the number of entries needed in per-child-process arrays
5643 : * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5644 : * These arrays include regular backends, autovac workers, walsenders
5645 : * and background workers, but not special children nor dead_end children.
5646 : * This allows the arrays to have a fixed maximum size, to wit the same
5647 : * too-many-children limit enforced by canAcceptConnections(). The exact value
5648 : * isn't too critical as long as it's more than MaxBackends.
5649 : */
5650 : int
5651 33448 : MaxLivePostmasterChildren(void)
5652 : {
5653 66896 : return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5654 33448 : max_wal_senders + max_worker_processes);
5655 : }
5656 :
5657 : /*
5658 : * Connect background worker to a database.
5659 : */
5660 : void
5661 574 : BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
5662 : {
5663 574 : BackgroundWorker *worker = MyBgworkerEntry;
5664 :
5665 : /* XXX is this the right errcode? */
5666 574 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5667 0 : ereport(FATAL,
5668 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5669 : errmsg("database connection requirement not indicated during registration")));
5670 :
5671 574 : InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5672 :
5673 : /* it had better not gotten out of "init" mode yet */
5674 570 : if (!IsInitProcessingMode())
5675 0 : ereport(ERROR,
5676 : (errmsg("invalid processing mode in background worker")));
5677 570 : SetProcessingMode(NormalProcessing);
5678 570 : }
5679 :
5680 : /*
5681 : * Connect background worker to a database using OIDs.
5682 : */
5683 : void
5684 3050 : BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
5685 : {
5686 3050 : BackgroundWorker *worker = MyBgworkerEntry;
5687 :
5688 : /* XXX is this the right errcode? */
5689 3050 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5690 0 : ereport(FATAL,
5691 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5692 : errmsg("database connection requirement not indicated during registration")));
5693 :
5694 3050 : InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5695 :
5696 : /* it had better not gotten out of "init" mode yet */
5697 3044 : if (!IsInitProcessingMode())
5698 0 : ereport(ERROR,
5699 : (errmsg("invalid processing mode in background worker")));
5700 3044 : SetProcessingMode(NormalProcessing);
5701 3044 : }
5702 :
5703 : /*
5704 : * Block/unblock signals in a background worker
5705 : */
5706 : void
5707 0 : BackgroundWorkerBlockSignals(void)
5708 : {
5709 0 : PG_SETMASK(&BlockSig);
5710 0 : }
5711 :
5712 : void
5713 3720 : BackgroundWorkerUnblockSignals(void)
5714 : {
5715 3720 : PG_SETMASK(&UnBlockSig);
5716 3720 : }
5717 :
5718 : #ifdef EXEC_BACKEND
5719 : static pid_t
5720 : bgworker_forkexec(int shmem_slot)
5721 : {
5722 : char *av[10];
5723 : int ac = 0;
5724 : char forkav[MAXPGPATH];
5725 :
5726 : snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5727 :
5728 : av[ac++] = "postgres";
5729 : av[ac++] = forkav;
5730 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
5731 : av[ac] = NULL;
5732 :
5733 : Assert(ac < lengthof(av));
5734 :
5735 : return postmaster_forkexec(ac, av);
5736 : }
5737 : #endif
5738 :
5739 : /*
5740 : * Start a new bgworker.
5741 : * Starting time conditions must have been checked already.
5742 : *
5743 : * Returns true on success, false on failure.
5744 : * In either case, update the RegisteredBgWorker's state appropriately.
5745 : *
5746 : * This code is heavily based on autovacuum.c, q.v.
5747 : */
5748 : static bool
5749 3996 : do_start_bgworker(RegisteredBgWorker *rw)
5750 : {
5751 : pid_t worker_pid;
5752 :
5753 : Assert(rw->rw_pid == 0);
5754 :
5755 : /*
5756 : * Allocate and assign the Backend element. Note we must do this before
5757 : * forking, so that we can handle failures (out of memory or child-process
5758 : * slots) cleanly.
5759 : *
5760 : * Treat failure as though the worker had crashed. That way, the
5761 : * postmaster will wait a bit before attempting to start it again; if we
5762 : * tried again right away, most likely we'd find ourselves hitting the
5763 : * same resource-exhaustion condition.
5764 : */
5765 3996 : if (!assign_backendlist_entry(rw))
5766 : {
5767 0 : rw->rw_crashed_at = GetCurrentTimestamp();
5768 0 : return false;
5769 : }
5770 :
5771 3996 : ereport(DEBUG1,
5772 : (errmsg_internal("starting background worker process \"%s\"",
5773 : rw->rw_worker.bgw_name)));
5774 :
5775 : #ifdef EXEC_BACKEND
5776 : switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5777 : #else
5778 3996 : switch ((worker_pid = fork_process()))
5779 : #endif
5780 : {
5781 0 : case -1:
5782 : /* in postmaster, fork failed ... */
5783 0 : ereport(LOG,
5784 : (errmsg("could not fork worker process: %m")));
5785 : /* undo what assign_backendlist_entry did */
5786 0 : ReleasePostmasterChildSlot(rw->rw_child_slot);
5787 0 : rw->rw_child_slot = 0;
5788 0 : free(rw->rw_backend);
5789 0 : rw->rw_backend = NULL;
5790 : /* mark entry as crashed, so we'll try again later */
5791 0 : rw->rw_crashed_at = GetCurrentTimestamp();
5792 0 : break;
5793 :
5794 : #ifndef EXEC_BACKEND
5795 3638 : case 0:
5796 : /* in postmaster child ... */
5797 3638 : InitPostmasterChild();
5798 :
5799 : /* Close the postmaster's sockets */
5800 3638 : ClosePostmasterPorts(false);
5801 :
5802 : /*
5803 : * Before blowing away PostmasterContext, save this bgworker's
5804 : * data where it can find it.
5805 : */
5806 3638 : MyBgworkerEntry = (BackgroundWorker *)
5807 3638 : MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
5808 3638 : memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5809 :
5810 : /* Release postmaster's working memory context */
5811 3638 : MemoryContextSwitchTo(TopMemoryContext);
5812 3638 : MemoryContextDelete(PostmasterContext);
5813 3638 : PostmasterContext = NULL;
5814 :
5815 3638 : StartBackgroundWorker();
5816 :
5817 : exit(1); /* should not get here */
5818 : break;
5819 : #endif
5820 3992 : default:
5821 : /* in postmaster, fork successful ... */
5822 3992 : rw->rw_pid = worker_pid;
5823 3992 : rw->rw_backend->pid = rw->rw_pid;
5824 3992 : ReportBackgroundWorkerPID(rw);
5825 : /* add new worker to lists of backends */
5826 3992 : dlist_push_head(&BackendList, &rw->rw_backend->elem);
5827 : #ifdef EXEC_BACKEND
5828 : ShmemBackendArrayAdd(rw->rw_backend);
5829 : #endif
5830 3992 : return true;
5831 : }
5832 :
5833 0 : return false;
5834 : }
5835 :
5836 : /*
5837 : * Does the current postmaster state require starting a worker with the
5838 : * specified start_time?
5839 : */
5840 : static bool
5841 5252 : bgworker_should_start_now(BgWorkerStartTime start_time)
5842 : {
5843 5252 : switch (pmState)
5844 : {
5845 0 : case PM_NO_CHILDREN:
5846 : case PM_WAIT_DEAD_END:
5847 : case PM_SHUTDOWN_2:
5848 : case PM_SHUTDOWN:
5849 : case PM_WAIT_BACKENDS:
5850 : case PM_STOP_BACKENDS:
5851 0 : break;
5852 :
5853 3996 : case PM_RUN:
5854 3996 : if (start_time == BgWorkerStart_RecoveryFinished)
5855 1416 : return true;
5856 : /* fall through */
5857 :
5858 : case PM_HOT_STANDBY:
5859 2788 : if (start_time == BgWorkerStart_ConsistentState)
5860 2580 : return true;
5861 : /* fall through */
5862 :
5863 : case PM_RECOVERY:
5864 : case PM_STARTUP:
5865 : case PM_INIT:
5866 1256 : if (start_time == BgWorkerStart_PostmasterStart)
5867 0 : return true;
5868 : /* fall through */
5869 : }
5870 :
5871 1256 : return false;
5872 : }
5873 :
5874 : /*
5875 : * Allocate the Backend struct for a connected background worker, but don't
5876 : * add it to the list of backends just yet.
5877 : *
5878 : * On failure, return false without changing any worker state.
5879 : *
5880 : * Some info from the Backend is copied into the passed rw.
5881 : */
5882 : static bool
5883 3996 : assign_backendlist_entry(RegisteredBgWorker *rw)
5884 : {
5885 : Backend *bn;
5886 :
5887 : /*
5888 : * Check that database state allows another connection. Currently the
5889 : * only possible failure is CAC_TOOMANY, so we just log an error message
5890 : * based on that rather than checking the error code precisely.
5891 : */
5892 3996 : if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK)
5893 : {
5894 0 : ereport(LOG,
5895 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
5896 : errmsg("no slot available for new worker process")));
5897 0 : return false;
5898 : }
5899 :
5900 : /*
5901 : * Compute the cancel key that will be assigned to this session. We
5902 : * probably don't need cancel keys for background workers, but we'd better
5903 : * have something random in the field to prevent unfriendly people from
5904 : * sending cancels to them.
5905 : */
5906 3996 : if (!RandomCancelKey(&MyCancelKey))
5907 : {
5908 0 : ereport(LOG,
5909 : (errcode(ERRCODE_INTERNAL_ERROR),
5910 : errmsg("could not generate random cancel key")));
5911 0 : return false;
5912 : }
5913 :
5914 3996 : bn = malloc(sizeof(Backend));
5915 3996 : if (bn == NULL)
5916 : {
5917 0 : ereport(LOG,
5918 : (errcode(ERRCODE_OUT_OF_MEMORY),
5919 : errmsg("out of memory")));
5920 0 : return false;
5921 : }
5922 :
5923 3996 : bn->cancel_key = MyCancelKey;
5924 3996 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5925 3996 : bn->bkend_type = BACKEND_TYPE_BGWORKER;
5926 3996 : bn->dead_end = false;
5927 3996 : bn->bgworker_notify = false;
5928 :
5929 3996 : rw->rw_backend = bn;
5930 3996 : rw->rw_child_slot = bn->child_slot;
5931 :
5932 3996 : return true;
5933 : }
5934 :
5935 : /*
5936 : * If the time is right, start background worker(s).
5937 : *
5938 : * As a side effect, the bgworker control variables are set or reset
5939 : * depending on whether more workers may need to be started.
5940 : *
5941 : * We limit the number of workers started per call, to avoid consuming the
5942 : * postmaster's attention for too long when many such requests are pending.
5943 : * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5944 : * call this function again after dealing with any other issues.
5945 : */
5946 : static void
5947 8814 : maybe_start_bgworkers(void)
5948 : {
5949 : #define MAX_BGWORKERS_TO_LAUNCH 100
5950 8814 : int num_launched = 0;
5951 8814 : TimestampTz now = 0;
5952 : slist_mutable_iter iter;
5953 :
5954 : /*
5955 : * During crash recovery, we have no need to be called until the state
5956 : * transition out of recovery.
5957 : */
5958 8814 : if (FatalError)
5959 : {
5960 8 : StartWorkerNeeded = false;
5961 8 : HaveCrashedWorker = false;
5962 8 : return;
5963 : }
5964 :
5965 : /* Don't need to be called again unless we find a reason for it below */
5966 8806 : StartWorkerNeeded = false;
5967 8806 : HaveCrashedWorker = false;
5968 :
5969 24880 : slist_foreach_modify(iter, &BackgroundWorkerList)
5970 : {
5971 : RegisteredBgWorker *rw;
5972 :
5973 16078 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5974 :
5975 : /* ignore if already running */
5976 16078 : if (rw->rw_pid != 0)
5977 8708 : continue;
5978 :
5979 : /* if marked for death, clean up and remove from list */
5980 7370 : if (rw->rw_terminate)
5981 : {
5982 0 : ForgetBackgroundWorker(&iter);
5983 0 : continue;
5984 : }
5985 :
5986 : /*
5987 : * If this worker has crashed previously, maybe it needs to be
5988 : * restarted (unless on registration it specified it doesn't want to
5989 : * be restarted at all). Check how long ago did a crash last happen.
5990 : * If the last crash is too recent, don't start it right away; let it
5991 : * be restarted once enough time has passed.
5992 : */
5993 7370 : if (rw->rw_crashed_at != 0)
5994 : {
5995 2118 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
5996 : {
5997 : int notify_pid;
5998 :
5999 6 : notify_pid = rw->rw_worker.bgw_notify_pid;
6000 :
6001 6 : ForgetBackgroundWorker(&iter);
6002 :
6003 : /* Report worker is gone now. */
6004 6 : if (notify_pid != 0)
6005 6 : kill(notify_pid, SIGUSR1);
6006 :
6007 6 : continue;
6008 : }
6009 :
6010 : /* read system time only when needed */
6011 2112 : if (now == 0)
6012 2112 : now = GetCurrentTimestamp();
6013 :
6014 2112 : if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
6015 2112 : rw->rw_worker.bgw_restart_time * 1000))
6016 : {
6017 : /* Set flag to remember that we have workers to start later */
6018 2112 : HaveCrashedWorker = true;
6019 2112 : continue;
6020 : }
6021 : }
6022 :
6023 5252 : if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
6024 : {
6025 : /* reset crash time before trying to start worker */
6026 3996 : rw->rw_crashed_at = 0;
6027 :
6028 : /*
6029 : * Try to start the worker.
6030 : *
6031 : * On failure, give up processing workers for now, but set
6032 : * StartWorkerNeeded so we'll come back here on the next iteration
6033 : * of ServerLoop to try again. (We don't want to wait, because
6034 : * there might be additional ready-to-run workers.) We could set
6035 : * HaveCrashedWorker as well, since this worker is now marked
6036 : * crashed, but there's no need because the next run of this
6037 : * function will do that.
6038 : */
6039 3996 : if (!do_start_bgworker(rw))
6040 : {
6041 0 : StartWorkerNeeded = true;
6042 0 : return;
6043 : }
6044 :
6045 : /*
6046 : * If we've launched as many workers as allowed, quit, but have
6047 : * ServerLoop call us again to look for additional ready-to-run
6048 : * workers. There might not be any, but we'll find out the next
6049 : * time we run.
6050 : */
6051 3992 : if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
6052 : {
6053 0 : StartWorkerNeeded = true;
6054 0 : return;
6055 : }
6056 : }
6057 : }
6058 : }
6059 :
6060 : /*
6061 : * When a backend asks to be notified about worker state changes, we
6062 : * set a flag in its backend entry. The background worker machinery needs
6063 : * to know when such backends exit.
6064 : */
6065 : bool
6066 3076 : PostmasterMarkPIDForWorkerNotify(int pid)
6067 : {
6068 : dlist_iter iter;
6069 : Backend *bp;
6070 :
6071 6034 : dlist_foreach(iter, &BackendList)
6072 : {
6073 6034 : bp = dlist_container(Backend, elem, iter.cur);
6074 6034 : if (bp->pid == pid)
6075 : {
6076 3076 : bp->bgworker_notify = true;
6077 3076 : return true;
6078 : }
6079 : }
6080 0 : return false;
6081 : }
6082 :
6083 : #ifdef EXEC_BACKEND
6084 :
6085 : /*
6086 : * The following need to be available to the save/restore_backend_variables
6087 : * functions. They are marked NON_EXEC_STATIC in their home modules.
6088 : */
6089 : extern slock_t *ShmemLock;
6090 : extern slock_t *ProcStructLock;
6091 : extern PGPROC *AuxiliaryProcs;
6092 : extern PMSignalData *PMSignalState;
6093 : extern pg_time_t first_syslogger_file_time;
6094 :
6095 : #ifndef WIN32
6096 : #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
6097 : #define read_inheritable_socket(dest, src) (*(dest) = *(src))
6098 : #else
6099 : static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
6100 : static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
6101 : pid_t childPid);
6102 : static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
6103 : #endif
6104 :
6105 :
6106 : /* Save critical backend variables into the BackendParameters struct */
6107 : #ifndef WIN32
6108 : static bool
6109 : save_backend_variables(BackendParameters *param, Port *port)
6110 : #else
6111 : static bool
6112 : save_backend_variables(BackendParameters *param, Port *port,
6113 : HANDLE childProcess, pid_t childPid)
6114 : #endif
6115 : {
6116 : memcpy(¶m->port, port, sizeof(Port));
6117 : if (!write_inheritable_socket(¶m->portsocket, port->sock, childPid))
6118 : return false;
6119 :
6120 : strlcpy(param->DataDir, DataDir, MAXPGPATH);
6121 :
6122 : memcpy(¶m->ListenSocket, &ListenSocket, sizeof(ListenSocket));
6123 :
6124 : param->MyCancelKey = MyCancelKey;
6125 : param->MyPMChildSlot = MyPMChildSlot;
6126 :
6127 : #ifdef WIN32
6128 : param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6129 : #endif
6130 : param->UsedShmemSegID = UsedShmemSegID;
6131 : param->UsedShmemSegAddr = UsedShmemSegAddr;
6132 :
6133 : param->ShmemLock = ShmemLock;
6134 : param->ShmemVariableCache = ShmemVariableCache;
6135 : param->ShmemBackendArray = ShmemBackendArray;
6136 :
6137 : #ifndef HAVE_SPINLOCKS
6138 : param->SpinlockSemaArray = SpinlockSemaArray;
6139 : #endif
6140 : param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
6141 : param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
6142 : param->MainLWLockArray = MainLWLockArray;
6143 : param->ProcStructLock = ProcStructLock;
6144 : param->ProcGlobal = ProcGlobal;
6145 : param->AuxiliaryProcs = AuxiliaryProcs;
6146 : param->PreparedXactProcs = PreparedXactProcs;
6147 : param->PMSignalState = PMSignalState;
6148 :
6149 : param->PostmasterPid = PostmasterPid;
6150 : param->PgStartTime = PgStartTime;
6151 : param->PgReloadTime = PgReloadTime;
6152 : param->first_syslogger_file_time = first_syslogger_file_time;
6153 :
6154 : param->redirection_done = redirection_done;
6155 : param->IsBinaryUpgrade = IsBinaryUpgrade;
6156 : param->query_id_enabled = query_id_enabled;
6157 : param->max_safe_fds = max_safe_fds;
6158 :
6159 : param->MaxBackends = MaxBackends;
6160 :
6161 : #ifdef WIN32
6162 : param->PostmasterHandle = PostmasterHandle;
6163 : if (!write_duplicated_handle(¶m->initial_signal_pipe,
6164 : pgwin32_create_signal_listener(childPid),
6165 : childProcess))
6166 : return false;
6167 : #else
6168 : memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds,
6169 : sizeof(postmaster_alive_fds));
6170 : #endif
6171 :
6172 : memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe));
6173 :
6174 : strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
6175 :
6176 : strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
6177 :
6178 : return true;
6179 : }
6180 :
6181 :
6182 : #ifdef WIN32
6183 : /*
6184 : * Duplicate a handle for usage in a child process, and write the child
6185 : * process instance of the handle to the parameter file.
6186 : */
6187 : static bool
6188 : write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
6189 : {
6190 : HANDLE hChild = INVALID_HANDLE_VALUE;
6191 :
6192 : if (!DuplicateHandle(GetCurrentProcess(),
6193 : src,
6194 : childProcess,
6195 : &hChild,
6196 : 0,
6197 : TRUE,
6198 : DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
6199 : {
6200 : ereport(LOG,
6201 : (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
6202 : GetLastError())));
6203 : return false;
6204 : }
6205 :
6206 : *dest = hChild;
6207 : return true;
6208 : }
6209 :
6210 : /*
6211 : * Duplicate a socket for usage in a child process, and write the resulting
6212 : * structure to the parameter file.
6213 : * This is required because a number of LSPs (Layered Service Providers) very
6214 : * common on Windows (antivirus, firewalls, download managers etc) break
6215 : * straight socket inheritance.
6216 : */
6217 : static bool
6218 : write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
6219 : {
6220 : dest->origsocket = src;
6221 : if (src != 0 && src != PGINVALID_SOCKET)
6222 : {
6223 : /* Actual socket */
6224 : if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
6225 : {
6226 : ereport(LOG,
6227 : (errmsg("could not duplicate socket %d for use in backend: error code %d",
6228 : (int) src, WSAGetLastError())));
6229 : return false;
6230 : }
6231 : }
6232 : return true;
6233 : }
6234 :
6235 : /*
6236 : * Read a duplicate socket structure back, and get the socket descriptor.
6237 : */
6238 : static void
6239 : read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
6240 : {
6241 : SOCKET s;
6242 :
6243 : if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
6244 : {
6245 : /* Not a real socket! */
6246 : *dest = src->origsocket;
6247 : }
6248 : else
6249 : {
6250 : /* Actual socket, so create from structure */
6251 : s = WSASocket(FROM_PROTOCOL_INFO,
6252 : FROM_PROTOCOL_INFO,
6253 : FROM_PROTOCOL_INFO,
6254 : &src->wsainfo,
6255 : 0,
6256 : 0);
6257 : if (s == INVALID_SOCKET)
6258 : {
6259 : write_stderr("could not create inherited socket: error code %d\n",
6260 : WSAGetLastError());
6261 : exit(1);
6262 : }
6263 : *dest = s;
6264 :
6265 : /*
6266 : * To make sure we don't get two references to the same socket, close
6267 : * the original one. (This would happen when inheritance actually
6268 : * works..
6269 : */
6270 : closesocket(src->origsocket);
6271 : }
6272 : }
6273 : #endif
6274 :
6275 : static void
6276 : read_backend_variables(char *id, Port *port)
6277 : {
6278 : BackendParameters param;
6279 :
6280 : #ifndef WIN32
6281 : /* Non-win32 implementation reads from file */
6282 : FILE *fp;
6283 :
6284 : /* Open file */
6285 : fp = AllocateFile(id, PG_BINARY_R);
6286 : if (!fp)
6287 : {
6288 : write_stderr("could not open backend variables file \"%s\": %s\n",
6289 : id, strerror(errno));
6290 : exit(1);
6291 : }
6292 :
6293 : if (fread(¶m, sizeof(param), 1, fp) != 1)
6294 : {
6295 : write_stderr("could not read from backend variables file \"%s\": %s\n",
6296 : id, strerror(errno));
6297 : exit(1);
6298 : }
6299 :
6300 : /* Release file */
6301 : FreeFile(fp);
6302 : if (unlink(id) != 0)
6303 : {
6304 : write_stderr("could not remove file \"%s\": %s\n",
6305 : id, strerror(errno));
6306 : exit(1);
6307 : }
6308 : #else
6309 : /* Win32 version uses mapped file */
6310 : HANDLE paramHandle;
6311 : BackendParameters *paramp;
6312 :
6313 : #ifdef _WIN64
6314 : paramHandle = (HANDLE) _atoi64(id);
6315 : #else
6316 : paramHandle = (HANDLE) atol(id);
6317 : #endif
6318 : paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
6319 : if (!paramp)
6320 : {
6321 : write_stderr("could not map view of backend variables: error code %lu\n",
6322 : GetLastError());
6323 : exit(1);
6324 : }
6325 :
6326 : memcpy(¶m, paramp, sizeof(BackendParameters));
6327 :
6328 : if (!UnmapViewOfFile(paramp))
6329 : {
6330 : write_stderr("could not unmap view of backend variables: error code %lu\n",
6331 : GetLastError());
6332 : exit(1);
6333 : }
6334 :
6335 : if (!CloseHandle(paramHandle))
6336 : {
6337 : write_stderr("could not close handle to backend parameter variables: error code %lu\n",
6338 : GetLastError());
6339 : exit(1);
6340 : }
6341 : #endif
6342 :
6343 : restore_backend_variables(¶m, port);
6344 : }
6345 :
6346 : /* Restore critical backend variables from the BackendParameters struct */
6347 : static void
6348 : restore_backend_variables(BackendParameters *param, Port *port)
6349 : {
6350 : memcpy(port, ¶m->port, sizeof(Port));
6351 : read_inheritable_socket(&port->sock, ¶m->portsocket);
6352 :
6353 : SetDataDir(param->DataDir);
6354 :
6355 : memcpy(&ListenSocket, ¶m->ListenSocket, sizeof(ListenSocket));
6356 :
6357 : MyCancelKey = param->MyCancelKey;
6358 : MyPMChildSlot = param->MyPMChildSlot;
6359 :
6360 : #ifdef WIN32
6361 : ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6362 : #endif
6363 : UsedShmemSegID = param->UsedShmemSegID;
6364 : UsedShmemSegAddr = param->UsedShmemSegAddr;
6365 :
6366 : ShmemLock = param->ShmemLock;
6367 : ShmemVariableCache = param->ShmemVariableCache;
6368 : ShmemBackendArray = param->ShmemBackendArray;
6369 :
6370 : #ifndef HAVE_SPINLOCKS
6371 : SpinlockSemaArray = param->SpinlockSemaArray;
6372 : #endif
6373 : NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
6374 : NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
6375 : MainLWLockArray = param->MainLWLockArray;
6376 : ProcStructLock = param->ProcStructLock;
6377 : ProcGlobal = param->ProcGlobal;
6378 : AuxiliaryProcs = param->AuxiliaryProcs;
6379 : PreparedXactProcs = param->PreparedXactProcs;
6380 : PMSignalState = param->PMSignalState;
6381 :
6382 : PostmasterPid = param->PostmasterPid;
6383 : PgStartTime = param->PgStartTime;
6384 : PgReloadTime = param->PgReloadTime;
6385 : first_syslogger_file_time = param->first_syslogger_file_time;
6386 :
6387 : redirection_done = param->redirection_done;
6388 : IsBinaryUpgrade = param->IsBinaryUpgrade;
6389 : query_id_enabled = param->query_id_enabled;
6390 : max_safe_fds = param->max_safe_fds;
6391 :
6392 : MaxBackends = param->MaxBackends;
6393 :
6394 : #ifdef WIN32
6395 : PostmasterHandle = param->PostmasterHandle;
6396 : pgwin32_initial_signal_pipe = param->initial_signal_pipe;
6397 : #else
6398 : memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds,
6399 : sizeof(postmaster_alive_fds));
6400 : #endif
6401 :
6402 : memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe));
6403 :
6404 : strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
6405 :
6406 : strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
6407 :
6408 : /*
6409 : * We need to restore fd.c's counts of externally-opened FDs; to avoid
6410 : * confusion, be sure to do this after restoring max_safe_fds. (Note:
6411 : * BackendInitialize will handle this for port->sock.)
6412 : */
6413 : #ifndef WIN32
6414 : if (postmaster_alive_fds[0] >= 0)
6415 : ReserveExternalFD();
6416 : if (postmaster_alive_fds[1] >= 0)
6417 : ReserveExternalFD();
6418 : #endif
6419 : }
6420 :
6421 :
6422 : Size
6423 : ShmemBackendArraySize(void)
6424 : {
6425 : return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
6426 : }
6427 :
6428 : void
6429 : ShmemBackendArrayAllocation(void)
6430 : {
6431 : Size size = ShmemBackendArraySize();
6432 :
6433 : ShmemBackendArray = (Backend *) ShmemAlloc(size);
6434 : /* Mark all slots as empty */
6435 : memset(ShmemBackendArray, 0, size);
6436 : }
6437 :
6438 : static void
6439 : ShmemBackendArrayAdd(Backend *bn)
6440 : {
6441 : /* The array slot corresponding to my PMChildSlot should be free */
6442 : int i = bn->child_slot - 1;
6443 :
6444 : Assert(ShmemBackendArray[i].pid == 0);
6445 : ShmemBackendArray[i] = *bn;
6446 : }
6447 :
6448 : static void
6449 : ShmemBackendArrayRemove(Backend *bn)
6450 : {
6451 : int i = bn->child_slot - 1;
6452 :
6453 : Assert(ShmemBackendArray[i].pid == bn->pid);
6454 : /* Mark the slot as empty */
6455 : ShmemBackendArray[i].pid = 0;
6456 : }
6457 : #endif /* EXEC_BACKEND */
6458 :
6459 :
6460 : #ifdef WIN32
6461 :
6462 : /*
6463 : * Subset implementation of waitpid() for Windows. We assume pid is -1
6464 : * (that is, check all child processes) and options is WNOHANG (don't wait).
6465 : */
6466 : static pid_t
6467 : waitpid(pid_t pid, int *exitstatus, int options)
6468 : {
6469 : DWORD dwd;
6470 : ULONG_PTR key;
6471 : OVERLAPPED *ovl;
6472 :
6473 : /*
6474 : * Check if there are any dead children. If there are, return the pid of
6475 : * the first one that died.
6476 : */
6477 : if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
6478 : {
6479 : *exitstatus = (int) key;
6480 : return dwd;
6481 : }
6482 :
6483 : return -1;
6484 : }
6485 :
6486 : /*
6487 : * Note! Code below executes on a thread pool! All operations must
6488 : * be thread safe! Note that elog() and friends must *not* be used.
6489 : */
6490 : static void WINAPI
6491 : pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
6492 : {
6493 : win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter;
6494 : DWORD exitcode;
6495 :
6496 : if (TimerOrWaitFired)
6497 : return; /* timeout. Should never happen, since we use
6498 : * INFINITE as timeout value. */
6499 :
6500 : /*
6501 : * Remove handle from wait - required even though it's set to wait only
6502 : * once
6503 : */
6504 : UnregisterWaitEx(childinfo->waitHandle, NULL);
6505 :
6506 : if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
6507 : {
6508 : /*
6509 : * Should never happen. Inform user and set a fixed exitcode.
6510 : */
6511 : write_stderr("could not read exit code for process\n");
6512 : exitcode = 255;
6513 : }
6514 :
6515 : if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
6516 : write_stderr("could not post child completion status\n");
6517 :
6518 : /*
6519 : * Handle is per-process, so we close it here instead of in the
6520 : * originating thread
6521 : */
6522 : CloseHandle(childinfo->procHandle);
6523 :
6524 : /*
6525 : * Free struct that was allocated before the call to
6526 : * RegisterWaitForSingleObject()
6527 : */
6528 : free(childinfo);
6529 :
6530 : /* Queue SIGCHLD signal */
6531 : pg_queue_signal(SIGCHLD);
6532 : }
6533 : #endif /* WIN32 */
6534 :
6535 : /*
6536 : * Initialize one and only handle for monitoring postmaster death.
6537 : *
6538 : * Called once in the postmaster, so that child processes can subsequently
6539 : * monitor if their parent is dead.
6540 : */
6541 : static void
6542 1044 : InitPostmasterDeathWatchHandle(void)
6543 : {
6544 : #ifndef WIN32
6545 :
6546 : /*
6547 : * Create a pipe. Postmaster holds the write end of the pipe open
6548 : * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
6549 : * the read file descriptor to select() to wake up in case postmaster
6550 : * dies, or check for postmaster death with a (read() == 0). Children must
6551 : * close the write end as soon as possible after forking, because EOF
6552 : * won't be signaled in the read end until all processes have closed the
6553 : * write fd. That is taken care of in ClosePostmasterPorts().
6554 : */
6555 : Assert(MyProcPid == PostmasterPid);
6556 1044 : if (pipe(postmaster_alive_fds) < 0)
6557 0 : ereport(FATAL,
6558 : (errcode_for_file_access(),
6559 : errmsg_internal("could not create pipe to monitor postmaster death: %m")));
6560 :
6561 : /* Notify fd.c that we've eaten two FDs for the pipe. */
6562 1044 : ReserveExternalFD();
6563 1044 : ReserveExternalFD();
6564 :
6565 : /*
6566 : * Set O_NONBLOCK to allow testing for the fd's presence with a read()
6567 : * call.
6568 : */
6569 1044 : if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
6570 0 : ereport(FATAL,
6571 : (errcode_for_socket_access(),
6572 : errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
6573 : #else
6574 :
6575 : /*
6576 : * On Windows, we use a process handle for the same purpose.
6577 : */
6578 : if (DuplicateHandle(GetCurrentProcess(),
6579 : GetCurrentProcess(),
6580 : GetCurrentProcess(),
6581 : &PostmasterHandle,
6582 : 0,
6583 : TRUE,
6584 : DUPLICATE_SAME_ACCESS) == 0)
6585 : ereport(FATAL,
6586 : (errmsg_internal("could not duplicate postmaster handle: error code %lu",
6587 : GetLastError())));
6588 : #endif /* WIN32 */
6589 1044 : }
|