Line data Source code
1 : /*--------------------------------------------------------------------
2 : * bgworker.c
3 : * POSTGRES pluggable background workers implementation
4 : *
5 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/postmaster/bgworker.c
9 : *
10 : *-------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "access/parallel.h"
16 : #include "libpq/pqsignal.h"
17 : #include "miscadmin.h"
18 : #include "pgstat.h"
19 : #include "port/atomics.h"
20 : #include "postmaster/bgworker_internals.h"
21 : #include "postmaster/postmaster.h"
22 : #include "replication/logicallauncher.h"
23 : #include "replication/logicalworker.h"
24 : #include "storage/ipc.h"
25 : #include "storage/latch.h"
26 : #include "storage/lwlock.h"
27 : #include "storage/pmsignal.h"
28 : #include "storage/proc.h"
29 : #include "storage/procarray.h"
30 : #include "storage/procsignal.h"
31 : #include "storage/shmem.h"
32 : #include "tcop/tcopprot.h"
33 : #include "utils/ascii.h"
34 : #include "utils/memutils.h"
35 : #include "utils/ps_status.h"
36 : #include "utils/timeout.h"
37 :
38 : /*
39 : * The postmaster's list of registered background workers, in private memory.
40 : */
41 : dlist_head BackgroundWorkerList = DLIST_STATIC_INIT(BackgroundWorkerList);
42 :
43 : /*
44 : * BackgroundWorkerSlots exist in shared memory and can be accessed (via
45 : * the BackgroundWorkerArray) by both the postmaster and by regular backends.
46 : * However, the postmaster cannot take locks, even spinlocks, because this
47 : * might allow it to crash or become wedged if shared memory gets corrupted.
48 : * Such an outcome is intolerable. Therefore, we need a lockless protocol
49 : * for coordinating access to this data.
50 : *
51 : * The 'in_use' flag is used to hand off responsibility for the slot between
52 : * the postmaster and the rest of the system. When 'in_use' is false,
53 : * the postmaster will ignore the slot entirely, except for the 'in_use' flag
54 : * itself, which it may read. In this state, regular backends may modify the
55 : * slot. Once a backend sets 'in_use' to true, the slot becomes the
56 : * responsibility of the postmaster. Regular backends may no longer modify it,
57 : * but the postmaster may examine it. Thus, a backend initializing a slot
58 : * must fully initialize the slot - and insert a write memory barrier - before
59 : * marking it as in use.
60 : *
61 : * As an exception, however, even when the slot is in use, regular backends
62 : * may set the 'terminate' flag for a slot, telling the postmaster not
63 : * to restart it. Once the background worker is no longer running, the slot
64 : * will be released for reuse.
65 : *
66 : * In addition to coordinating with the postmaster, backends modifying this
67 : * data structure must coordinate with each other. Since they can take locks,
68 : * this is straightforward: any backend wishing to manipulate a slot must
69 : * take BackgroundWorkerLock in exclusive mode. Backends wishing to read
70 : * data that might get concurrently modified by other backends should take
71 : * this lock in shared mode. No matter what, backends reading this data
72 : * structure must be able to tolerate concurrent modifications by the
73 : * postmaster.
74 : */
75 : typedef struct BackgroundWorkerSlot
76 : {
77 : bool in_use;
78 : bool terminate;
79 : pid_t pid; /* InvalidPid = not started yet; 0 = dead */
80 : uint64 generation; /* incremented when slot is recycled */
81 : BackgroundWorker worker;
82 : } BackgroundWorkerSlot;
83 :
84 : /*
85 : * In order to limit the total number of parallel workers (according to
86 : * max_parallel_workers GUC), we maintain the number of active parallel
87 : * workers. Since the postmaster cannot take locks, two variables are used for
88 : * this purpose: the number of registered parallel workers (modified by the
89 : * backends, protected by BackgroundWorkerLock) and the number of terminated
90 : * parallel workers (modified only by the postmaster, lockless). The active
91 : * number of parallel workers is the number of registered workers minus the
92 : * terminated ones. These counters can of course overflow, but it's not
93 : * important here since the subtraction will still give the right number.
94 : */
95 : typedef struct BackgroundWorkerArray
96 : {
97 : int total_slots;
98 : uint32 parallel_register_count;
99 : uint32 parallel_terminate_count;
100 : BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
101 : } BackgroundWorkerArray;
102 :
103 : struct BackgroundWorkerHandle
104 : {
105 : int slot;
106 : uint64 generation;
107 : };
108 :
109 : static BackgroundWorkerArray *BackgroundWorkerData;
110 :
111 : /*
112 : * List of internal background worker entry points. We need this for
113 : * reasons explained in LookupBackgroundWorkerFunction(), below.
114 : */
115 : static const struct
116 : {
117 : const char *fn_name;
118 : bgworker_main_type fn_addr;
119 : } InternalBGWorkers[] =
120 :
121 : {
122 : {
123 : .fn_name = "ParallelWorkerMain",
124 : .fn_addr = ParallelWorkerMain
125 : },
126 : {
127 : .fn_name = "ApplyLauncherMain",
128 : .fn_addr = ApplyLauncherMain
129 : },
130 : {
131 : .fn_name = "ApplyWorkerMain",
132 : .fn_addr = ApplyWorkerMain
133 : },
134 : {
135 : .fn_name = "ParallelApplyWorkerMain",
136 : .fn_addr = ParallelApplyWorkerMain
137 : },
138 : {
139 : .fn_name = "TableSyncWorkerMain",
140 : .fn_addr = TableSyncWorkerMain
141 : },
142 : {
143 : .fn_name = "SequenceSyncWorkerMain",
144 : .fn_addr = SequenceSyncWorkerMain
145 : }
146 : };
147 :
148 : /* Private functions. */
149 : static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
150 :
151 :
152 : /*
153 : * Calculate shared memory needed.
154 : */
155 : Size
156 6534 : BackgroundWorkerShmemSize(void)
157 : {
158 : Size size;
159 :
160 : /* Array of workers is variably sized. */
161 6534 : size = offsetof(BackgroundWorkerArray, slot);
162 6534 : size = add_size(size, mul_size(max_worker_processes,
163 : sizeof(BackgroundWorkerSlot)));
164 :
165 6534 : return size;
166 : }
167 :
168 : /*
169 : * Initialize shared memory.
170 : */
171 : void
172 2280 : BackgroundWorkerShmemInit(void)
173 : {
174 : bool found;
175 :
176 2280 : BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
177 : BackgroundWorkerShmemSize(),
178 : &found);
179 2280 : if (!IsUnderPostmaster)
180 : {
181 : dlist_iter iter;
182 2280 : int slotno = 0;
183 :
184 2280 : BackgroundWorkerData->total_slots = max_worker_processes;
185 2280 : BackgroundWorkerData->parallel_register_count = 0;
186 2280 : BackgroundWorkerData->parallel_terminate_count = 0;
187 :
188 : /*
189 : * Copy contents of worker list into shared memory. Record the shared
190 : * memory slot assigned to each worker. This ensures a 1-to-1
191 : * correspondence between the postmaster's private list and the array
192 : * in shared memory.
193 : */
194 4010 : dlist_foreach(iter, &BackgroundWorkerList)
195 : {
196 1730 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
197 : RegisteredBgWorker *rw;
198 :
199 1730 : rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
200 : Assert(slotno < max_worker_processes);
201 1730 : slot->in_use = true;
202 1730 : slot->terminate = false;
203 1730 : slot->pid = InvalidPid;
204 1730 : slot->generation = 0;
205 1730 : rw->rw_shmem_slot = slotno;
206 1730 : rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
207 1730 : memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
208 1730 : ++slotno;
209 : }
210 :
211 : /*
212 : * Mark any remaining slots as not in use.
213 : */
214 18772 : while (slotno < max_worker_processes)
215 : {
216 16492 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
217 :
218 16492 : slot->in_use = false;
219 16492 : ++slotno;
220 : }
221 : }
222 : else
223 : Assert(found);
224 2280 : }
225 :
226 : /*
227 : * Search the postmaster's backend-private list of RegisteredBgWorker objects
228 : * for the one that maps to the given slot number.
229 : */
230 : static RegisteredBgWorker *
231 9468 : FindRegisteredWorkerBySlotNumber(int slotno)
232 : {
233 : dlist_iter iter;
234 :
235 23550 : dlist_foreach(iter, &BackgroundWorkerList)
236 : {
237 : RegisteredBgWorker *rw;
238 :
239 19424 : rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
240 19424 : if (rw->rw_shmem_slot == slotno)
241 5342 : return rw;
242 : }
243 :
244 4126 : return NULL;
245 : }
246 :
247 : /*
248 : * Notice changes to shared memory made by other backends.
249 : * Accept new worker requests only if allow_new_workers is true.
250 : *
251 : * This code runs in the postmaster, so we must be very careful not to assume
252 : * that shared memory contents are sane. Otherwise, a rogue backend could
253 : * take out the postmaster.
254 : */
255 : void
256 2786 : BackgroundWorkerStateChange(bool allow_new_workers)
257 : {
258 : int slotno;
259 :
260 : /*
261 : * The total number of slots stored in shared memory should match our
262 : * notion of max_worker_processes. If it does not, something is very
263 : * wrong. Further down, we always refer to this value as
264 : * max_worker_processes, in case shared memory gets corrupted while we're
265 : * looping.
266 : */
267 2786 : if (max_worker_processes != BackgroundWorkerData->total_slots)
268 : {
269 0 : ereport(LOG,
270 : (errmsg("inconsistent background worker state (\"max_worker_processes\"=%d, total slots=%d)",
271 : max_worker_processes,
272 : BackgroundWorkerData->total_slots)));
273 0 : return;
274 : }
275 :
276 : /*
277 : * Iterate through slots, looking for newly-registered workers or workers
278 : * who must die.
279 : */
280 25362 : for (slotno = 0; slotno < max_worker_processes; ++slotno)
281 : {
282 22576 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
283 : RegisteredBgWorker *rw;
284 :
285 22576 : if (!slot->in_use)
286 13108 : continue;
287 :
288 : /*
289 : * Make sure we don't see the in_use flag before the updated slot
290 : * contents.
291 : */
292 9468 : pg_read_barrier();
293 :
294 : /* See whether we already know about this worker. */
295 9468 : rw = FindRegisteredWorkerBySlotNumber(slotno);
296 9468 : if (rw != NULL)
297 : {
298 : /*
299 : * In general, the worker data can't change after it's initially
300 : * registered. However, someone can set the terminate flag.
301 : */
302 5342 : if (slot->terminate && !rw->rw_terminate)
303 : {
304 20 : rw->rw_terminate = true;
305 20 : if (rw->rw_pid != 0)
306 20 : kill(rw->rw_pid, SIGTERM);
307 : else
308 : {
309 : /* Report never-started, now-terminated worker as dead. */
310 0 : ReportBackgroundWorkerPID(rw);
311 : }
312 : }
313 5342 : continue;
314 : }
315 :
316 : /*
317 : * If we aren't allowing new workers, then immediately mark it for
318 : * termination; the next stanza will take care of cleaning it up.
319 : * Doing this ensures that any process waiting for the worker will get
320 : * awoken, even though the worker will never be allowed to run.
321 : */
322 4126 : if (!allow_new_workers)
323 0 : slot->terminate = true;
324 :
325 : /*
326 : * If the worker is marked for termination, we don't need to add it to
327 : * the registered workers list; we can just free the slot. However, if
328 : * bgw_notify_pid is set, the process that registered the worker may
329 : * need to know that we've processed the terminate request, so be sure
330 : * to signal it.
331 : */
332 4126 : if (slot->terminate)
333 0 : {
334 : int notify_pid;
335 :
336 : /*
337 : * We need a memory barrier here to make sure that the load of
338 : * bgw_notify_pid and the update of parallel_terminate_count
339 : * complete before the store to in_use.
340 : */
341 0 : notify_pid = slot->worker.bgw_notify_pid;
342 0 : if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
343 0 : BackgroundWorkerData->parallel_terminate_count++;
344 0 : slot->pid = 0;
345 :
346 0 : pg_memory_barrier();
347 0 : slot->in_use = false;
348 :
349 0 : if (notify_pid != 0)
350 0 : kill(notify_pid, SIGUSR1);
351 :
352 0 : continue;
353 : }
354 :
355 : /*
356 : * Copy the registration data into the registered workers list.
357 : */
358 4126 : rw = MemoryContextAllocExtended(PostmasterContext,
359 : sizeof(RegisteredBgWorker),
360 : MCXT_ALLOC_NO_OOM | MCXT_ALLOC_ZERO);
361 4126 : if (rw == NULL)
362 : {
363 0 : ereport(LOG,
364 : (errcode(ERRCODE_OUT_OF_MEMORY),
365 : errmsg("out of memory")));
366 0 : return;
367 : }
368 :
369 : /*
370 : * Copy strings in a paranoid way. If shared memory is corrupted, the
371 : * source data might not even be NUL-terminated.
372 : */
373 4126 : ascii_safe_strlcpy(rw->rw_worker.bgw_name,
374 4126 : slot->worker.bgw_name, BGW_MAXLEN);
375 4126 : ascii_safe_strlcpy(rw->rw_worker.bgw_type,
376 4126 : slot->worker.bgw_type, BGW_MAXLEN);
377 4126 : ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
378 4126 : slot->worker.bgw_library_name, MAXPGPATH);
379 4126 : ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
380 4126 : slot->worker.bgw_function_name, BGW_MAXLEN);
381 :
382 : /*
383 : * Copy various fixed-size fields.
384 : *
385 : * flags, start_time, and restart_time are examined by the postmaster,
386 : * but nothing too bad will happen if they are corrupted. The
387 : * remaining fields will only be examined by the child process. It
388 : * might crash, but we won't.
389 : */
390 4126 : rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
391 4126 : rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
392 4126 : rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
393 4126 : rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
394 4126 : memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
395 :
396 : /*
397 : * Copy the PID to be notified about state changes, but only if the
398 : * postmaster knows about a backend with that PID. It isn't an error
399 : * if the postmaster doesn't know about the PID, because the backend
400 : * that requested the worker could have died (or been killed) just
401 : * after doing so. Nonetheless, at least until we get some experience
402 : * with how this plays out in the wild, log a message at a relative
403 : * high debug level.
404 : */
405 4126 : rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
406 4126 : if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
407 : {
408 0 : elog(DEBUG1, "worker notification PID %d is not valid",
409 : (int) rw->rw_worker.bgw_notify_pid);
410 0 : rw->rw_worker.bgw_notify_pid = 0;
411 : }
412 :
413 : /* Initialize postmaster bookkeeping. */
414 4126 : rw->rw_pid = 0;
415 4126 : rw->rw_crashed_at = 0;
416 4126 : rw->rw_shmem_slot = slotno;
417 4126 : rw->rw_terminate = false;
418 :
419 : /* Log it! */
420 4126 : ereport(DEBUG1,
421 : (errmsg_internal("registering background worker \"%s\"",
422 : rw->rw_worker.bgw_name)));
423 :
424 4126 : dlist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
425 : }
426 : }
427 :
428 : /*
429 : * Forget about a background worker that's no longer needed.
430 : *
431 : * NOTE: The entry is unlinked from BackgroundWorkerList. If the caller is
432 : * iterating through it, better use a mutable iterator!
433 : *
434 : * Caller is responsible for notifying bgw_notify_pid, if appropriate.
435 : *
436 : * This function must be invoked only in the postmaster.
437 : */
438 : void
439 4070 : ForgetBackgroundWorker(RegisteredBgWorker *rw)
440 : {
441 : BackgroundWorkerSlot *slot;
442 :
443 : Assert(rw->rw_shmem_slot < max_worker_processes);
444 4070 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
445 : Assert(slot->in_use);
446 :
447 : /*
448 : * We need a memory barrier here to make sure that the update of
449 : * parallel_terminate_count completes before the store to in_use.
450 : */
451 4070 : if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
452 2964 : BackgroundWorkerData->parallel_terminate_count++;
453 :
454 4070 : pg_memory_barrier();
455 4070 : slot->in_use = false;
456 :
457 4070 : ereport(DEBUG1,
458 : (errmsg_internal("unregistering background worker \"%s\"",
459 : rw->rw_worker.bgw_name)));
460 :
461 4070 : dlist_delete(&rw->rw_lnode);
462 4070 : pfree(rw);
463 4070 : }
464 :
465 : /*
466 : * Report the PID of a newly-launched background worker in shared memory.
467 : *
468 : * This function should only be called from the postmaster.
469 : */
470 : void
471 5630 : ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
472 : {
473 : BackgroundWorkerSlot *slot;
474 :
475 : Assert(rw->rw_shmem_slot < max_worker_processes);
476 5630 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
477 5630 : slot->pid = rw->rw_pid;
478 :
479 5630 : if (rw->rw_worker.bgw_notify_pid != 0)
480 4126 : kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
481 5630 : }
482 :
483 : /*
484 : * Report that the PID of a background worker is now zero because a
485 : * previously-running background worker has exited.
486 : *
487 : * NOTE: The entry may be unlinked from BackgroundWorkerList. If the caller
488 : * is iterating through it, better use a mutable iterator!
489 : *
490 : * This function should only be called from the postmaster.
491 : */
492 : void
493 4966 : ReportBackgroundWorkerExit(RegisteredBgWorker *rw)
494 : {
495 : BackgroundWorkerSlot *slot;
496 : int notify_pid;
497 :
498 : Assert(rw->rw_shmem_slot < max_worker_processes);
499 4966 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
500 4966 : slot->pid = rw->rw_pid;
501 4966 : notify_pid = rw->rw_worker.bgw_notify_pid;
502 :
503 : /*
504 : * If this worker is slated for deregistration, do that before notifying
505 : * the process which started it. Otherwise, if that process tries to
506 : * reuse the slot immediately, it might not be available yet. In theory
507 : * that could happen anyway if the process checks slot->pid at just the
508 : * wrong moment, but this makes the window narrower.
509 : */
510 4966 : if (rw->rw_terminate ||
511 1372 : rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
512 4070 : ForgetBackgroundWorker(rw);
513 :
514 4966 : if (notify_pid != 0)
515 3996 : kill(notify_pid, SIGUSR1);
516 4966 : }
517 :
518 : /*
519 : * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
520 : *
521 : * This function should only be called from the postmaster.
522 : */
523 : void
524 532 : BackgroundWorkerStopNotifications(pid_t pid)
525 : {
526 : dlist_iter iter;
527 :
528 1714 : dlist_foreach(iter, &BackgroundWorkerList)
529 : {
530 : RegisteredBgWorker *rw;
531 :
532 1182 : rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
533 1182 : if (rw->rw_worker.bgw_notify_pid == pid)
534 78 : rw->rw_worker.bgw_notify_pid = 0;
535 : }
536 532 : }
537 :
538 : /*
539 : * Cancel any not-yet-started worker requests that have waiting processes.
540 : *
541 : * This is called during a normal ("smart" or "fast") database shutdown.
542 : * After this point, no new background workers will be started, so anything
543 : * that might be waiting for them needs to be kicked off its wait. We do
544 : * that by canceling the bgworker registration entirely, which is perhaps
545 : * overkill, but since we're shutting down it does not matter whether the
546 : * registration record sticks around.
547 : *
548 : * This function should only be called from the postmaster.
549 : */
550 : void
551 1130 : ForgetUnstartedBackgroundWorkers(void)
552 : {
553 : dlist_mutable_iter iter;
554 :
555 2250 : dlist_foreach_modify(iter, &BackgroundWorkerList)
556 : {
557 : RegisteredBgWorker *rw;
558 : BackgroundWorkerSlot *slot;
559 :
560 1120 : rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
561 : Assert(rw->rw_shmem_slot < max_worker_processes);
562 1120 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
563 :
564 : /* If it's not yet started, and there's someone waiting ... */
565 1120 : if (slot->pid == InvalidPid &&
566 120 : rw->rw_worker.bgw_notify_pid != 0)
567 : {
568 : /* ... then zap it, and notify the waiter */
569 0 : int notify_pid = rw->rw_worker.bgw_notify_pid;
570 :
571 0 : ForgetBackgroundWorker(rw);
572 0 : if (notify_pid != 0)
573 0 : kill(notify_pid, SIGUSR1);
574 : }
575 : }
576 1130 : }
577 :
578 : /*
579 : * Reset background worker crash state.
580 : *
581 : * We assume that, after a crash-and-restart cycle, background workers without
582 : * the never-restart flag should be restarted immediately, instead of waiting
583 : * for bgw_restart_time to elapse. On the other hand, workers with that flag
584 : * should be forgotten immediately, since we won't ever restart them.
585 : *
586 : * This function should only be called from the postmaster.
587 : */
588 : void
589 10 : ResetBackgroundWorkerCrashTimes(void)
590 : {
591 : dlist_mutable_iter iter;
592 :
593 20 : dlist_foreach_modify(iter, &BackgroundWorkerList)
594 : {
595 : RegisteredBgWorker *rw;
596 :
597 10 : rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
598 :
599 10 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
600 : {
601 : /*
602 : * Workers marked BGW_NEVER_RESTART shouldn't get relaunched after
603 : * the crash, so forget about them. (If we wait until after the
604 : * crash to forget about them, and they are parallel workers,
605 : * parallel_terminate_count will get incremented after we've
606 : * already zeroed parallel_register_count, which would be bad.)
607 : */
608 0 : ForgetBackgroundWorker(rw);
609 : }
610 : else
611 : {
612 : /*
613 : * The accounting which we do via parallel_register_count and
614 : * parallel_terminate_count would get messed up if a worker marked
615 : * parallel could survive a crash and restart cycle. All such
616 : * workers should be marked BGW_NEVER_RESTART, and thus control
617 : * should never reach this branch.
618 : */
619 : Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0);
620 :
621 : /*
622 : * Allow this worker to be restarted immediately after we finish
623 : * resetting.
624 : */
625 10 : rw->rw_crashed_at = 0;
626 10 : rw->rw_pid = 0;
627 :
628 : /*
629 : * If there was anyone waiting for it, they're history.
630 : */
631 10 : rw->rw_worker.bgw_notify_pid = 0;
632 : }
633 : }
634 10 : }
635 :
636 : /*
637 : * Complain about the BackgroundWorker definition using error level elevel.
638 : * Return true if it looks ok, false if not (unless elevel >= ERROR, in
639 : * which case we won't return at all in the not-OK case).
640 : */
641 : static bool
642 5616 : SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
643 : {
644 : /* sanity check for flags */
645 :
646 : /*
647 : * We used to support workers not connected to shared memory, but don't
648 : * anymore. Thus this is a required flag now. We're not removing the flag
649 : * for compatibility reasons and because the flag still provides some
650 : * signal when reading code.
651 : */
652 5616 : if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
653 : {
654 0 : ereport(elevel,
655 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
656 : errmsg("background worker \"%s\": background workers without shared memory access are not supported",
657 : worker->bgw_name)));
658 0 : return false;
659 : }
660 :
661 5616 : if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
662 : {
663 5598 : if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
664 : {
665 0 : ereport(elevel,
666 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
667 : errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
668 : worker->bgw_name)));
669 0 : return false;
670 : }
671 :
672 : /* XXX other checks? */
673 : }
674 :
675 : /* Interruptible workers require a database connection */
676 5616 : if ((worker->bgw_flags & BGWORKER_INTERRUPTIBLE) &&
677 8 : !(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
678 : {
679 0 : ereport(elevel,
680 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
681 : errmsg("background worker \"%s\": cannot make background workers interruptible without database access",
682 : worker->bgw_name)));
683 0 : return false;
684 : }
685 :
686 5616 : if ((worker->bgw_restart_time < 0 &&
687 3888 : worker->bgw_restart_time != BGW_NEVER_RESTART) ||
688 5616 : (worker->bgw_restart_time > USECS_PER_DAY / 1000))
689 : {
690 0 : ereport(elevel,
691 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
692 : errmsg("background worker \"%s\": invalid restart interval",
693 : worker->bgw_name)));
694 0 : return false;
695 : }
696 :
697 : /*
698 : * Parallel workers may not be configured for restart, because the
699 : * parallel_register_count/parallel_terminate_count accounting can't
700 : * handle parallel workers lasting through a crash-and-restart cycle.
701 : */
702 5616 : if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
703 1728 : (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
704 : {
705 0 : ereport(elevel,
706 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
707 : errmsg("background worker \"%s\": parallel workers may not be configured for restart",
708 : worker->bgw_name)));
709 0 : return false;
710 : }
711 :
712 : /*
713 : * If bgw_type is not filled in, use bgw_name.
714 : */
715 5616 : if (strcmp(worker->bgw_type, "") == 0)
716 0 : strcpy(worker->bgw_type, worker->bgw_name);
717 :
718 5616 : return true;
719 : }
720 :
721 : /*
722 : * Standard SIGTERM handler for background workers
723 : */
724 : static void
725 0 : bgworker_die(SIGNAL_ARGS)
726 : {
727 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
728 :
729 0 : ereport(FATAL,
730 : (errcode(ERRCODE_ADMIN_SHUTDOWN),
731 : errmsg("terminating background worker \"%s\" due to administrator command",
732 : MyBgworkerEntry->bgw_type)));
733 : }
734 :
735 : /*
736 : * Main entry point for background worker processes.
737 : */
738 : void
739 4992 : BackgroundWorkerMain(const void *startup_data, size_t startup_data_len)
740 : {
741 : sigjmp_buf local_sigjmp_buf;
742 : BackgroundWorker *worker;
743 : bgworker_main_type entrypt;
744 :
745 4992 : if (startup_data == NULL)
746 0 : elog(FATAL, "unable to find bgworker entry");
747 : Assert(startup_data_len == sizeof(BackgroundWorker));
748 4992 : worker = MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
749 4992 : memcpy(worker, startup_data, sizeof(BackgroundWorker));
750 :
751 : /*
752 : * Now that we're done reading the startup data, release postmaster's
753 : * working memory context.
754 : */
755 4992 : if (PostmasterContext)
756 : {
757 4992 : MemoryContextDelete(PostmasterContext);
758 4992 : PostmasterContext = NULL;
759 : }
760 :
761 4992 : MyBgworkerEntry = worker;
762 4992 : init_ps_display(worker->bgw_name);
763 :
764 : Assert(GetProcessingMode() == InitProcessing);
765 :
766 : /* Apply PostAuthDelay */
767 4992 : if (PostAuthDelay > 0)
768 0 : pg_usleep(PostAuthDelay * 1000000L);
769 :
770 : /*
771 : * Set up signal handlers.
772 : */
773 4992 : if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
774 : {
775 : /*
776 : * SIGINT is used to signal canceling the current action
777 : */
778 4974 : pqsignal(SIGINT, StatementCancelHandler);
779 4974 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
780 4974 : pqsignal(SIGFPE, FloatExceptionHandler);
781 :
782 : /* XXX Any other handlers needed here? */
783 : }
784 : else
785 : {
786 18 : pqsignal(SIGINT, SIG_IGN);
787 18 : pqsignal(SIGUSR1, SIG_IGN);
788 18 : pqsignal(SIGFPE, SIG_IGN);
789 : }
790 4992 : pqsignal(SIGTERM, bgworker_die);
791 : /* SIGQUIT handler was already set up by InitPostmasterChild */
792 4992 : pqsignal(SIGHUP, SIG_IGN);
793 :
794 4992 : InitializeTimeouts(); /* establishes SIGALRM handler */
795 :
796 4992 : pqsignal(SIGPIPE, SIG_IGN);
797 4992 : pqsignal(SIGUSR2, SIG_IGN);
798 4992 : pqsignal(SIGCHLD, SIG_DFL);
799 :
800 : /*
801 : * If an exception is encountered, processing resumes here.
802 : *
803 : * We just need to clean up, report the error, and go away.
804 : */
805 4992 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
806 : {
807 : /* Since not using PG_TRY, must reset error stack by hand */
808 266 : error_context_stack = NULL;
809 :
810 : /* Prevent interrupts while cleaning up */
811 266 : HOLD_INTERRUPTS();
812 :
813 : /*
814 : * sigsetjmp will have blocked all signals, but we may need to accept
815 : * signals while communicating with our parallel leader. Once we've
816 : * done HOLD_INTERRUPTS() it should be safe to unblock signals.
817 : */
818 266 : BackgroundWorkerUnblockSignals();
819 :
820 : /* Report the error to the parallel leader and the server log */
821 266 : EmitErrorReport();
822 :
823 : /*
824 : * Do we need more cleanup here? For shmem-connected bgworkers, we
825 : * will call InitProcess below, which will install ProcKill as exit
826 : * callback. That will take care of releasing locks, etc.
827 : */
828 :
829 : /* and go away */
830 266 : proc_exit(1);
831 : }
832 :
833 : /* We can now handle ereport(ERROR) */
834 4992 : PG_exception_stack = &local_sigjmp_buf;
835 :
836 : /*
837 : * Create a per-backend PGPROC struct in shared memory. We must do this
838 : * before we can use LWLocks or access any shared memory.
839 : */
840 4992 : InitProcess();
841 :
842 : /*
843 : * Early initialization.
844 : */
845 4992 : BaseInit();
846 :
847 : /*
848 : * Look up the entry point function, loading its library if necessary.
849 : */
850 9984 : entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
851 4992 : worker->bgw_function_name);
852 :
853 : /*
854 : * Note that in normal processes, we would call InitPostgres here. For a
855 : * worker, however, we don't know what database to connect to, yet; so we
856 : * need to wait until the user code does it via
857 : * BackgroundWorkerInitializeConnection().
858 : */
859 :
860 : /*
861 : * Now invoke the user-defined worker code
862 : */
863 4992 : entrypt(worker->bgw_main_arg);
864 :
865 : /* ... and if it returns, we're done */
866 2958 : proc_exit(0);
867 : }
868 :
869 : /*
870 : * Connect background worker to a database.
871 : */
872 : void
873 902 : BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
874 : {
875 902 : BackgroundWorker *worker = MyBgworkerEntry;
876 902 : bits32 init_flags = 0; /* never honor session_preload_libraries */
877 :
878 : /* ignore datallowconn and ACL_CONNECT? */
879 902 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
880 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
881 : /* ignore rolcanlogin? */
882 902 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
883 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
884 :
885 : /* XXX is this the right errcode? */
886 902 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
887 0 : ereport(FATAL,
888 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
889 : errmsg("database connection requirement not indicated during registration")));
890 :
891 902 : InitPostgres(dbname, InvalidOid, /* database to connect to */
892 : username, InvalidOid, /* role to connect as */
893 : init_flags,
894 : NULL); /* no out_dbname */
895 :
896 : /* it had better not gotten out of "init" mode yet */
897 902 : if (!IsInitProcessingMode())
898 0 : ereport(ERROR,
899 : (errmsg("invalid processing mode in background worker")));
900 902 : SetProcessingMode(NormalProcessing);
901 902 : }
902 :
903 : /*
904 : * Connect background worker to a database using OIDs.
905 : */
906 : void
907 4072 : BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
908 : {
909 4072 : BackgroundWorker *worker = MyBgworkerEntry;
910 4072 : bits32 init_flags = 0; /* never honor session_preload_libraries */
911 :
912 : /* ignore datallowconn and ACL_CONNECT? */
913 4072 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
914 2964 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
915 : /* ignore rolcanlogin? */
916 4072 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
917 2964 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
918 :
919 : /* XXX is this the right errcode? */
920 4072 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
921 0 : ereport(FATAL,
922 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
923 : errmsg("database connection requirement not indicated during registration")));
924 :
925 4072 : InitPostgres(NULL, dboid, /* database to connect to */
926 : NULL, useroid, /* role to connect as */
927 : init_flags,
928 : NULL); /* no out_dbname */
929 :
930 : /* it had better not gotten out of "init" mode yet */
931 4064 : if (!IsInitProcessingMode())
932 0 : ereport(ERROR,
933 : (errmsg("invalid processing mode in background worker")));
934 4064 : SetProcessingMode(NormalProcessing);
935 4064 : }
936 :
937 : /*
938 : * Block/unblock signals in a background worker
939 : */
940 : void
941 0 : BackgroundWorkerBlockSignals(void)
942 : {
943 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
944 0 : }
945 :
946 : void
947 5258 : BackgroundWorkerUnblockSignals(void)
948 : {
949 5258 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
950 5258 : }
951 :
952 : /*
953 : * Register a new static background worker.
954 : *
955 : * This can only be called directly from postmaster or in the _PG_init
956 : * function of a module library that's loaded by shared_preload_libraries;
957 : * otherwise it will have no effect.
958 : */
959 : void
960 1734 : RegisterBackgroundWorker(BackgroundWorker *worker)
961 : {
962 : RegisteredBgWorker *rw;
963 : static int numworkers = 0;
964 :
965 : /*
966 : * Static background workers can only be registered in the postmaster
967 : * process.
968 : */
969 1734 : if (IsUnderPostmaster || !IsPostmasterEnvironment)
970 : {
971 : /*
972 : * In EXEC_BACKEND or single-user mode, we process
973 : * shared_preload_libraries in backend processes too. We cannot
974 : * register static background workers at that stage, but many
975 : * libraries' _PG_init() functions don't distinguish whether they're
976 : * being loaded in the postmaster or in a backend, they just check
977 : * process_shared_preload_libraries_in_progress. It's a bit sloppy,
978 : * but for historical reasons we tolerate it. In EXEC_BACKEND mode,
979 : * the background workers should already have been registered when the
980 : * library was loaded in postmaster.
981 : */
982 0 : if (process_shared_preload_libraries_in_progress)
983 0 : return;
984 0 : ereport(LOG,
985 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
986 : errmsg("background worker \"%s\": must be registered in \"shared_preload_libraries\"",
987 : worker->bgw_name)));
988 0 : return;
989 : }
990 :
991 : /*
992 : * Cannot register static background workers after calling
993 : * BackgroundWorkerShmemInit().
994 : */
995 1734 : if (BackgroundWorkerData != NULL)
996 0 : elog(ERROR, "cannot register background worker \"%s\" after shmem init",
997 : worker->bgw_name);
998 :
999 1734 : ereport(DEBUG1,
1000 : (errmsg_internal("registering background worker \"%s\"", worker->bgw_name)));
1001 :
1002 1734 : if (!SanityCheckBackgroundWorker(worker, LOG))
1003 0 : return;
1004 :
1005 1734 : if (worker->bgw_notify_pid != 0)
1006 : {
1007 0 : ereport(LOG,
1008 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1009 : errmsg("background worker \"%s\": only dynamic background workers can request notification",
1010 : worker->bgw_name)));
1011 0 : return;
1012 : }
1013 :
1014 : /*
1015 : * Enforce maximum number of workers. Note this is overly restrictive: we
1016 : * could allow more non-shmem-connected workers, because these don't count
1017 : * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
1018 : * important to relax this restriction.
1019 : */
1020 1734 : if (++numworkers > max_worker_processes)
1021 : {
1022 0 : ereport(LOG,
1023 : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
1024 : errmsg("too many background workers"),
1025 : errdetail_plural("Up to %d background worker can be registered with the current settings.",
1026 : "Up to %d background workers can be registered with the current settings.",
1027 : max_worker_processes,
1028 : max_worker_processes),
1029 : errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
1030 0 : return;
1031 : }
1032 :
1033 : /*
1034 : * Copy the registration data into the registered workers list.
1035 : */
1036 1734 : rw = MemoryContextAllocExtended(PostmasterContext,
1037 : sizeof(RegisteredBgWorker),
1038 : MCXT_ALLOC_NO_OOM);
1039 1734 : if (rw == NULL)
1040 : {
1041 0 : ereport(LOG,
1042 : (errcode(ERRCODE_OUT_OF_MEMORY),
1043 : errmsg("out of memory")));
1044 0 : return;
1045 : }
1046 :
1047 1734 : rw->rw_worker = *worker;
1048 1734 : rw->rw_pid = 0;
1049 1734 : rw->rw_crashed_at = 0;
1050 1734 : rw->rw_terminate = false;
1051 :
1052 1734 : dlist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
1053 : }
1054 :
1055 : /*
1056 : * Register a new background worker from a regular backend.
1057 : *
1058 : * Returns true on success and false on failure. Failure typically indicates
1059 : * that no background worker slots are currently available.
1060 : *
1061 : * If handle != NULL, we'll set *handle to a pointer that can subsequently
1062 : * be used as an argument to GetBackgroundWorkerPid(). The caller can
1063 : * free this pointer using pfree(), if desired.
1064 : */
1065 : bool
1066 3882 : RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
1067 : BackgroundWorkerHandle **handle)
1068 : {
1069 : int slotno;
1070 3882 : bool success = false;
1071 : bool parallel;
1072 3882 : uint64 generation = 0;
1073 :
1074 : /*
1075 : * We can't register dynamic background workers from the postmaster. If
1076 : * this is a standalone backend, we're the only process and can't start
1077 : * any more. In a multi-process environment, it might be theoretically
1078 : * possible, but we don't currently support it due to locking
1079 : * considerations; see comments on the BackgroundWorkerSlot data
1080 : * structure.
1081 : */
1082 3882 : if (!IsUnderPostmaster)
1083 0 : return false;
1084 :
1085 3882 : if (!SanityCheckBackgroundWorker(worker, ERROR))
1086 0 : return false;
1087 :
1088 3882 : parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
1089 :
1090 3882 : LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1091 :
1092 : /*
1093 : * If this is a parallel worker, check whether there are already too many
1094 : * parallel workers; if so, don't register another one. Our view of
1095 : * parallel_terminate_count may be slightly stale, but that doesn't really
1096 : * matter: we would have gotten the same result if we'd arrived here
1097 : * slightly earlier anyway. There's no help for it, either, since the
1098 : * postmaster must not take locks; a memory barrier wouldn't guarantee
1099 : * anything useful.
1100 : */
1101 3882 : if (parallel && (BackgroundWorkerData->parallel_register_count -
1102 3000 : BackgroundWorkerData->parallel_terminate_count) >=
1103 : max_parallel_workers)
1104 : {
1105 : Assert(BackgroundWorkerData->parallel_register_count -
1106 : BackgroundWorkerData->parallel_terminate_count <=
1107 : MAX_PARALLEL_WORKER_LIMIT);
1108 20 : LWLockRelease(BackgroundWorkerLock);
1109 20 : return false;
1110 : }
1111 :
1112 : /*
1113 : * Look for an unused slot. If we find one, grab it.
1114 : */
1115 12394 : for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
1116 : {
1117 12372 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1118 :
1119 12372 : if (!slot->in_use)
1120 : {
1121 3840 : memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
1122 3840 : slot->pid = InvalidPid; /* indicates not started yet */
1123 3840 : slot->generation++;
1124 3840 : slot->terminate = false;
1125 3840 : generation = slot->generation;
1126 3840 : if (parallel)
1127 2964 : BackgroundWorkerData->parallel_register_count++;
1128 :
1129 : /*
1130 : * Make sure postmaster doesn't see the slot as in use before it
1131 : * sees the new contents.
1132 : */
1133 3840 : pg_write_barrier();
1134 :
1135 3840 : slot->in_use = true;
1136 3840 : success = true;
1137 3840 : break;
1138 : }
1139 : }
1140 :
1141 3862 : LWLockRelease(BackgroundWorkerLock);
1142 :
1143 : /* If we found a slot, tell the postmaster to notice the change. */
1144 3862 : if (success)
1145 3840 : SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1146 :
1147 : /*
1148 : * If we found a slot and the user has provided a handle, initialize it.
1149 : */
1150 3862 : if (success && handle)
1151 : {
1152 3840 : *handle = palloc_object(BackgroundWorkerHandle);
1153 3840 : (*handle)->slot = slotno;
1154 3840 : (*handle)->generation = generation;
1155 : }
1156 :
1157 3862 : return success;
1158 : }
1159 :
1160 : /*
1161 : * Get the PID of a dynamically-registered background worker.
1162 : *
1163 : * If the worker is determined to be running, the return value will be
1164 : * BGWH_STARTED and *pidp will get the PID of the worker process. If the
1165 : * postmaster has not yet attempted to start the worker, the return value will
1166 : * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1167 : *
1168 : * BGWH_STOPPED can indicate either that the worker is temporarily stopped
1169 : * (because it is configured for automatic restart and exited non-zero),
1170 : * or that the worker is permanently stopped (because it exited with exit
1171 : * code 0, or was not configured for automatic restart), or even that the
1172 : * worker was unregistered without ever starting (either because startup
1173 : * failed and the worker is not configured for automatic restart, or because
1174 : * TerminateBackgroundWorker was used before the worker was successfully
1175 : * started).
1176 : */
1177 : BgwHandleStatus
1178 21506708 : GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
1179 : {
1180 : BackgroundWorkerSlot *slot;
1181 : pid_t pid;
1182 :
1183 : Assert(handle->slot < max_worker_processes);
1184 21506708 : slot = &BackgroundWorkerData->slot[handle->slot];
1185 :
1186 : /*
1187 : * We could probably arrange to synchronize access to data using memory
1188 : * barriers only, but for now, let's just keep it simple and grab the
1189 : * lock. It seems unlikely that there will be enough traffic here to
1190 : * result in meaningful contention.
1191 : */
1192 21506708 : LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1193 :
1194 : /*
1195 : * The generation number can't be concurrently changed while we hold the
1196 : * lock. The pid, which is updated by the postmaster, can change at any
1197 : * time, but we assume such changes are atomic. So the value we read
1198 : * won't be garbage, but it might be out of date by the time the caller
1199 : * examines it (but that's unavoidable anyway).
1200 : *
1201 : * The in_use flag could be in the process of changing from true to false,
1202 : * but if it is already false then it can't change further.
1203 : */
1204 21506708 : if (handle->generation != slot->generation || !slot->in_use)
1205 2966 : pid = 0;
1206 : else
1207 21503742 : pid = slot->pid;
1208 :
1209 : /* All done. */
1210 21506708 : LWLockRelease(BackgroundWorkerLock);
1211 :
1212 21506708 : if (pid == 0)
1213 2966 : return BGWH_STOPPED;
1214 21503742 : else if (pid == InvalidPid)
1215 796788 : return BGWH_NOT_YET_STARTED;
1216 20706954 : *pidp = pid;
1217 20706954 : return BGWH_STARTED;
1218 : }
1219 :
1220 : /*
1221 : * Wait for a background worker to start up.
1222 : *
1223 : * This is like GetBackgroundWorkerPid(), except that if the worker has not
1224 : * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1225 : * returned. However, if the postmaster has died, we give up and return
1226 : * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1227 : * take place.
1228 : *
1229 : * The caller *must* have set our PID as the worker's bgw_notify_pid,
1230 : * else we will not be awoken promptly when the worker's state changes.
1231 : */
1232 : BgwHandleStatus
1233 24 : WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
1234 : {
1235 : BgwHandleStatus status;
1236 : int rc;
1237 :
1238 : for (;;)
1239 8 : {
1240 : pid_t pid;
1241 :
1242 32 : CHECK_FOR_INTERRUPTS();
1243 :
1244 32 : status = GetBackgroundWorkerPid(handle, &pid);
1245 32 : if (status == BGWH_STARTED)
1246 24 : *pidp = pid;
1247 32 : if (status != BGWH_NOT_YET_STARTED)
1248 24 : break;
1249 :
1250 8 : rc = WaitLatch(MyLatch,
1251 : WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1252 : WAIT_EVENT_BGWORKER_STARTUP);
1253 :
1254 8 : if (rc & WL_POSTMASTER_DEATH)
1255 : {
1256 0 : status = BGWH_POSTMASTER_DIED;
1257 0 : break;
1258 : }
1259 :
1260 8 : ResetLatch(MyLatch);
1261 : }
1262 :
1263 24 : return status;
1264 : }
1265 :
1266 : /*
1267 : * Wait for a background worker to stop.
1268 : *
1269 : * If the worker hasn't yet started, or is running, we wait for it to stop
1270 : * and then return BGWH_STOPPED. However, if the postmaster has died, we give
1271 : * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1272 : * notifies us when a worker's state changes.
1273 : *
1274 : * The caller *must* have set our PID as the worker's bgw_notify_pid,
1275 : * else we will not be awoken promptly when the worker's state changes.
1276 : */
1277 : BgwHandleStatus
1278 2966 : WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
1279 : {
1280 : BgwHandleStatus status;
1281 : int rc;
1282 :
1283 : for (;;)
1284 4232 : {
1285 : pid_t pid;
1286 :
1287 7198 : CHECK_FOR_INTERRUPTS();
1288 :
1289 7198 : status = GetBackgroundWorkerPid(handle, &pid);
1290 7198 : if (status == BGWH_STOPPED)
1291 2966 : break;
1292 :
1293 4232 : rc = WaitLatch(MyLatch,
1294 : WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1295 : WAIT_EVENT_BGWORKER_SHUTDOWN);
1296 :
1297 4232 : if (rc & WL_POSTMASTER_DEATH)
1298 : {
1299 0 : status = BGWH_POSTMASTER_DIED;
1300 0 : break;
1301 : }
1302 :
1303 4232 : ResetLatch(MyLatch);
1304 : }
1305 :
1306 2966 : return status;
1307 : }
1308 :
1309 : /*
1310 : * Instruct the postmaster to terminate a background worker.
1311 : *
1312 : * Note that it's safe to do this without regard to whether the worker is
1313 : * still running, or even if the worker may already have exited and been
1314 : * unregistered.
1315 : */
1316 : void
1317 12 : TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
1318 : {
1319 : BackgroundWorkerSlot *slot;
1320 12 : bool signal_postmaster = false;
1321 :
1322 : Assert(handle->slot < max_worker_processes);
1323 12 : slot = &BackgroundWorkerData->slot[handle->slot];
1324 :
1325 : /* Set terminate flag in shared memory, unless slot has been reused. */
1326 12 : LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1327 12 : if (handle->generation == slot->generation)
1328 : {
1329 12 : slot->terminate = true;
1330 12 : signal_postmaster = true;
1331 : }
1332 12 : LWLockRelease(BackgroundWorkerLock);
1333 :
1334 : /* Make sure the postmaster notices the change to shared memory. */
1335 12 : if (signal_postmaster)
1336 12 : SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1337 12 : }
1338 :
1339 : /*
1340 : * Look up (and possibly load) a bgworker entry point function.
1341 : *
1342 : * For functions contained in the core code, we use library name "postgres"
1343 : * and consult the InternalBGWorkers array. External functions are
1344 : * looked up, and loaded if necessary, using load_external_function().
1345 : *
1346 : * The point of this is to pass function names as strings across process
1347 : * boundaries. We can't pass actual function addresses because of the
1348 : * possibility that the function has been loaded at a different address
1349 : * in a different process. This is obviously a hazard for functions in
1350 : * loadable libraries, but it can happen even for functions in the core code
1351 : * on platforms using EXEC_BACKEND (e.g., Windows).
1352 : *
1353 : * At some point it might be worthwhile to get rid of InternalBGWorkers[]
1354 : * in favor of applying load_external_function() for core functions too;
1355 : * but that raises portability issues that are not worth addressing now.
1356 : */
1357 : static bgworker_main_type
1358 4992 : LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
1359 : {
1360 : /*
1361 : * If the function is to be loaded from postgres itself, search the
1362 : * InternalBGWorkers array.
1363 : */
1364 4992 : if (strcmp(libraryname, "postgres") == 0)
1365 : {
1366 : int i;
1367 :
1368 8916 : for (i = 0; i < lengthof(InternalBGWorkers); i++)
1369 : {
1370 8916 : if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0)
1371 4956 : return InternalBGWorkers[i].fn_addr;
1372 : }
1373 :
1374 : /* We can only reach this by programming error. */
1375 0 : elog(ERROR, "internal function \"%s\" not found", funcname);
1376 : }
1377 :
1378 : /* Otherwise load from external library. */
1379 36 : return (bgworker_main_type)
1380 36 : load_external_function(libraryname, funcname, true, NULL);
1381 : }
1382 :
1383 : /*
1384 : * Given a PID, get the bgw_type of the background worker. Returns NULL if
1385 : * not a valid background worker.
1386 : *
1387 : * The return value is in static memory belonging to this function, so it has
1388 : * to be used before calling this function again. This is so that the caller
1389 : * doesn't have to worry about the background worker locking protocol.
1390 : */
1391 : const char *
1392 2002 : GetBackgroundWorkerTypeByPid(pid_t pid)
1393 : {
1394 : int slotno;
1395 2002 : bool found = false;
1396 : static char result[BGW_MAXLEN];
1397 :
1398 2002 : LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1399 :
1400 2330 : for (slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++)
1401 : {
1402 2330 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1403 :
1404 2330 : if (slot->pid > 0 && slot->pid == pid)
1405 : {
1406 2002 : strcpy(result, slot->worker.bgw_type);
1407 2002 : found = true;
1408 2002 : break;
1409 : }
1410 : }
1411 :
1412 2002 : LWLockRelease(BackgroundWorkerLock);
1413 :
1414 2002 : if (!found)
1415 0 : return NULL;
1416 :
1417 2002 : return result;
1418 : }
1419 :
1420 : /*
1421 : * Terminate all background workers connected to the given database, if the
1422 : * workers can be interrupted.
1423 : */
1424 : void
1425 14 : TerminateBackgroundWorkersForDatabase(Oid databaseId)
1426 : {
1427 14 : bool signal_postmaster = false;
1428 :
1429 14 : LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1430 :
1431 : /*
1432 : * Iterate through slots, looking for workers connected to the given
1433 : * database.
1434 : */
1435 126 : for (int slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++)
1436 : {
1437 112 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1438 :
1439 112 : if (slot->in_use &&
1440 28 : (slot->worker.bgw_flags & BGWORKER_INTERRUPTIBLE))
1441 : {
1442 8 : PGPROC *proc = BackendPidGetProc(slot->pid);
1443 :
1444 8 : if (proc && proc->databaseId == databaseId)
1445 : {
1446 8 : slot->terminate = true;
1447 8 : signal_postmaster = true;
1448 : }
1449 : }
1450 : }
1451 :
1452 14 : LWLockRelease(BackgroundWorkerLock);
1453 :
1454 : /* Make sure the postmaster notices the change to shared memory. */
1455 14 : if (signal_postmaster)
1456 8 : SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1457 14 : }
|