Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * condition_variable.c
4 : * Implementation of condition variables. Condition variables provide
5 : * a way for one process to wait until a specific condition occurs,
6 : * without needing to know the specific identity of the process for
7 : * which they are waiting. Waits for condition variables can be
8 : * interrupted, unlike LWLock waits. Condition variables are safe
9 : * to use within dynamic shared memory segments.
10 : *
11 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
12 : * Portions Copyright (c) 1994, Regents of the University of California
13 : *
14 : * src/backend/storage/lmgr/condition_variable.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 :
19 : #include "postgres.h"
20 :
21 : #include <limits.h>
22 :
23 : #include "miscadmin.h"
24 : #include "portability/instr_time.h"
25 : #include "storage/condition_variable.h"
26 : #include "storage/proc.h"
27 : #include "storage/proclist.h"
28 : #include "storage/spin.h"
29 :
30 : /* Initially, we are not prepared to sleep on any condition variable. */
31 : static ConditionVariable *cv_sleep_target = NULL;
32 :
33 : /*
34 : * Initialize a condition variable.
35 : */
36 : void
37 34317016 : ConditionVariableInit(ConditionVariable *cv)
38 : {
39 34317016 : SpinLockInit(&cv->mutex);
40 34317016 : proclist_init(&cv->wakeup);
41 34317016 : }
42 :
43 : /*
44 : * Prepare to wait on a given condition variable.
45 : *
46 : * This can optionally be called before entering a test/sleep loop.
47 : * Doing so is more efficient if we'll need to sleep at least once.
48 : * However, if the first test of the exit condition is likely to succeed,
49 : * it's more efficient to omit the ConditionVariablePrepareToSleep call.
50 : * See comments in ConditionVariableSleep for more detail.
51 : *
52 : * Caution: "before entering the loop" means you *must* test the exit
53 : * condition between calling ConditionVariablePrepareToSleep and calling
54 : * ConditionVariableSleep. If that is inconvenient, omit calling
55 : * ConditionVariablePrepareToSleep.
56 : */
57 : void
58 510544 : ConditionVariablePrepareToSleep(ConditionVariable *cv)
59 : {
60 510544 : int pgprocno = MyProcNumber;
61 :
62 : /*
63 : * If some other sleep is already prepared, cancel it; this is necessary
64 : * because we have just one static variable tracking the prepared sleep,
65 : * and also only one cvWaitLink in our PGPROC. It's okay to do this
66 : * because whenever control does return to the other test-and-sleep loop,
67 : * its ConditionVariableSleep call will just re-establish that sleep as
68 : * the prepared one.
69 : */
70 510544 : if (cv_sleep_target != NULL)
71 3264 : ConditionVariableCancelSleep();
72 :
73 : /* Record the condition variable on which we will sleep. */
74 510544 : cv_sleep_target = cv;
75 :
76 : /* Add myself to the wait queue. */
77 510544 : SpinLockAcquire(&cv->mutex);
78 510544 : proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
79 510544 : SpinLockRelease(&cv->mutex);
80 510544 : }
81 :
82 : /*
83 : * Wait for the given condition variable to be signaled.
84 : *
85 : * This should be called in a predicate loop that tests for a specific exit
86 : * condition and otherwise sleeps, like so:
87 : *
88 : * ConditionVariablePrepareToSleep(cv); // optional
89 : * while (condition for which we are waiting is not true)
90 : * ConditionVariableSleep(cv, wait_event_info);
91 : * ConditionVariableCancelSleep();
92 : *
93 : * wait_event_info should be a value from one of the WaitEventXXX enums
94 : * defined in pgstat.h. This controls the contents of pg_stat_activity's
95 : * wait_event_type and wait_event columns while waiting.
96 : */
97 : void
98 266280 : ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
99 : {
100 266280 : (void) ConditionVariableTimedSleep(cv, -1 /* no timeout */ ,
101 : wait_event_info);
102 266278 : }
103 :
104 : /*
105 : * Wait for a condition variable to be signaled or a timeout to be reached.
106 : *
107 : * The "timeout" is given in milliseconds.
108 : *
109 : * Returns true when timeout expires, otherwise returns false.
110 : *
111 : * See ConditionVariableSleep() for general usage.
112 : */
113 : bool
114 267054 : ConditionVariableTimedSleep(ConditionVariable *cv, long timeout,
115 : uint32 wait_event_info)
116 : {
117 267054 : long cur_timeout = -1;
118 : instr_time start_time;
119 : instr_time cur_time;
120 : int wait_events;
121 :
122 : /*
123 : * If the caller didn't prepare to sleep explicitly, then do so now and
124 : * return immediately. The caller's predicate loop should immediately
125 : * call again if its exit condition is not yet met. This will result in
126 : * the exit condition being tested twice before we first sleep. The extra
127 : * test can be prevented by calling ConditionVariablePrepareToSleep(cv)
128 : * first. Whether it's worth doing that depends on whether you expect the
129 : * exit condition to be met initially, in which case skipping the prepare
130 : * is recommended because it avoids manipulations of the wait list, or not
131 : * met initially, in which case preparing first is better because it
132 : * avoids one extra test of the exit condition.
133 : *
134 : * If we are currently prepared to sleep on some other CV, we just cancel
135 : * that and prepare this one; see ConditionVariablePrepareToSleep.
136 : */
137 267054 : if (cv_sleep_target != cv)
138 : {
139 506 : ConditionVariablePrepareToSleep(cv);
140 506 : return false;
141 : }
142 :
143 : /*
144 : * Record the current time so that we can calculate the remaining timeout
145 : * if we are woken up spuriously.
146 : */
147 266548 : if (timeout >= 0)
148 : {
149 388 : INSTR_TIME_SET_CURRENT(start_time);
150 : Assert(timeout >= 0 && timeout <= INT_MAX);
151 388 : cur_timeout = timeout;
152 388 : wait_events = WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH;
153 : }
154 : else
155 266160 : wait_events = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH;
156 :
157 : while (true)
158 4902 : {
159 271450 : bool done = false;
160 :
161 : /*
162 : * Wait for latch to be set. (If we're awakened for some other
163 : * reason, the code below will cope anyway.)
164 : */
165 271450 : (void) WaitLatch(MyLatch, wait_events, cur_timeout, wait_event_info);
166 :
167 : /* Reset latch before examining the state of the wait list. */
168 271450 : ResetLatch(MyLatch);
169 :
170 : /*
171 : * If this process has been taken out of the wait list, then we know
172 : * that it has been signaled by ConditionVariableSignal (or
173 : * ConditionVariableBroadcast), so we should return to the caller. But
174 : * that doesn't guarantee that the exit condition is met, only that we
175 : * ought to check it. So we must put the process back into the wait
176 : * list, to ensure we don't miss any additional wakeup occurring while
177 : * the caller checks its exit condition. We can take ourselves out of
178 : * the wait list only when the caller calls
179 : * ConditionVariableCancelSleep.
180 : *
181 : * If we're still in the wait list, then the latch must have been set
182 : * by something other than ConditionVariableSignal; though we don't
183 : * guarantee not to return spuriously, we'll avoid this obvious case.
184 : */
185 271450 : SpinLockAcquire(&cv->mutex);
186 271450 : if (!proclist_contains(&cv->wakeup, MyProcNumber, cvWaitLink))
187 : {
188 266382 : done = true;
189 266382 : proclist_push_tail(&cv->wakeup, MyProcNumber, cvWaitLink);
190 : }
191 271450 : SpinLockRelease(&cv->mutex);
192 :
193 : /*
194 : * Check for interrupts, and return spuriously if that caused the
195 : * current sleep target to change (meaning that interrupt handler code
196 : * waited for a different condition variable).
197 : */
198 271450 : CHECK_FOR_INTERRUPTS();
199 271448 : if (cv != cv_sleep_target)
200 164 : done = true;
201 :
202 : /* We were signaled, so return */
203 271448 : if (done)
204 266540 : return false;
205 :
206 : /* If we're not done, update cur_timeout for next iteration */
207 4908 : if (timeout >= 0)
208 : {
209 52 : INSTR_TIME_SET_CURRENT(cur_time);
210 52 : INSTR_TIME_SUBTRACT(cur_time, start_time);
211 52 : cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
212 :
213 : /* Have we crossed the timeout threshold? */
214 52 : if (cur_timeout <= 0)
215 6 : return true;
216 : }
217 : }
218 : }
219 :
220 : /*
221 : * Cancel any pending sleep operation.
222 : *
223 : * We just need to remove ourselves from the wait queue of any condition
224 : * variable for which we have previously prepared a sleep.
225 : *
226 : * Do nothing if nothing is pending; this allows this function to be called
227 : * during transaction abort to clean up any unfinished CV sleep.
228 : *
229 : * Return true if we've been signaled.
230 : */
231 : bool
232 646936 : ConditionVariableCancelSleep(void)
233 : {
234 646936 : ConditionVariable *cv = cv_sleep_target;
235 646936 : bool signaled = false;
236 :
237 646936 : if (cv == NULL)
238 136392 : return false;
239 :
240 510544 : SpinLockAcquire(&cv->mutex);
241 510544 : if (proclist_contains(&cv->wakeup, MyProcNumber, cvWaitLink))
242 464806 : proclist_delete(&cv->wakeup, MyProcNumber, cvWaitLink);
243 : else
244 45738 : signaled = true;
245 510544 : SpinLockRelease(&cv->mutex);
246 :
247 510544 : cv_sleep_target = NULL;
248 :
249 510544 : return signaled;
250 : }
251 :
252 : /*
253 : * Wake up the oldest process sleeping on the CV, if there is any.
254 : *
255 : * Note: it's difficult to tell whether this has any real effect: we know
256 : * whether we took an entry off the list, but the entry might only be a
257 : * sentinel. Hence, think twice before proposing that this should return
258 : * a flag telling whether it woke somebody.
259 : */
260 : void
261 1652 : ConditionVariableSignal(ConditionVariable *cv)
262 : {
263 1652 : PGPROC *proc = NULL;
264 :
265 : /* Remove the first process from the wakeup queue (if any). */
266 1652 : SpinLockAcquire(&cv->mutex);
267 1652 : if (!proclist_is_empty(&cv->wakeup))
268 118 : proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
269 1652 : SpinLockRelease(&cv->mutex);
270 :
271 : /* If we found someone sleeping, set their latch to wake them up. */
272 1652 : if (proc != NULL)
273 118 : SetLatch(&proc->procLatch);
274 1652 : }
275 :
276 : /*
277 : * Wake up all processes sleeping on the given CV.
278 : *
279 : * This guarantees to wake all processes that were sleeping on the CV
280 : * at time of call, but processes that add themselves to the list mid-call
281 : * will typically not get awakened.
282 : */
283 : void
284 12642452 : ConditionVariableBroadcast(ConditionVariable *cv)
285 : {
286 12642452 : int pgprocno = MyProcNumber;
287 12642452 : PGPROC *proc = NULL;
288 12642452 : bool have_sentinel = false;
289 :
290 : /*
291 : * In some use-cases, it is common for awakened processes to immediately
292 : * re-queue themselves. If we just naively try to reduce the wakeup list
293 : * to empty, we'll get into a potentially-indefinite loop against such a
294 : * process. The semantics we really want are just to be sure that we have
295 : * wakened all processes that were in the list at entry. We can use our
296 : * own cvWaitLink as a sentinel to detect when we've finished.
297 : *
298 : * A seeming flaw in this approach is that someone else might signal the
299 : * CV and in doing so remove our sentinel entry. But that's fine: since
300 : * CV waiters are always added and removed in order, that must mean that
301 : * every previous waiter has been wakened, so we're done. We'll get an
302 : * extra "set" on our latch from the someone else's signal, which is
303 : * slightly inefficient but harmless.
304 : *
305 : * We can't insert our cvWaitLink as a sentinel if it's already in use in
306 : * some other proclist. While that's not expected to be true for typical
307 : * uses of this function, we can deal with it by simply canceling any
308 : * prepared CV sleep. The next call to ConditionVariableSleep will take
309 : * care of re-establishing the lost state.
310 : */
311 12642452 : if (cv_sleep_target != NULL)
312 184 : ConditionVariableCancelSleep();
313 :
314 : /*
315 : * Inspect the state of the queue. If it's empty, we have nothing to do.
316 : * If there's exactly one entry, we need only remove and signal that
317 : * entry. Otherwise, remove the first entry and insert our sentinel.
318 : */
319 12642452 : SpinLockAcquire(&cv->mutex);
320 : /* While we're here, let's assert we're not in the list. */
321 : Assert(!proclist_contains(&cv->wakeup, pgprocno, cvWaitLink));
322 :
323 12642452 : if (!proclist_is_empty(&cv->wakeup))
324 : {
325 292618 : proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
326 292618 : if (!proclist_is_empty(&cv->wakeup))
327 : {
328 1984 : proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink);
329 1984 : have_sentinel = true;
330 : }
331 : }
332 12642452 : SpinLockRelease(&cv->mutex);
333 :
334 : /* Awaken first waiter, if there was one. */
335 12642452 : if (proc != NULL)
336 292618 : SetLatch(&proc->procLatch);
337 :
338 12646520 : while (have_sentinel)
339 : {
340 : /*
341 : * Each time through the loop, remove the first wakeup list entry, and
342 : * signal it unless it's our sentinel. Repeat as long as the sentinel
343 : * remains in the list.
344 : *
345 : * Notice that if someone else removes our sentinel, we will waken one
346 : * additional process before exiting. That's intentional, because if
347 : * someone else signals the CV, they may be intending to waken some
348 : * third process that added itself to the list after we added the
349 : * sentinel. Better to give a spurious wakeup (which should be
350 : * harmless beyond wasting some cycles) than to lose a wakeup.
351 : */
352 4068 : proc = NULL;
353 4068 : SpinLockAcquire(&cv->mutex);
354 4068 : if (!proclist_is_empty(&cv->wakeup))
355 4068 : proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink);
356 4068 : have_sentinel = proclist_contains(&cv->wakeup, pgprocno, cvWaitLink);
357 4068 : SpinLockRelease(&cv->mutex);
358 :
359 4068 : if (proc != NULL && proc != MyProc)
360 2084 : SetLatch(&proc->procLatch);
361 : }
362 12642452 : }
|