Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam.h
4 : * POSTGRES heap access method definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/access/heapam.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef HEAPAM_H
15 : #define HEAPAM_H
16 :
17 : #include "access/heapam_xlog.h"
18 : #include "access/relation.h" /* for backward compatibility */
19 : #include "access/relscan.h"
20 : #include "access/sdir.h"
21 : #include "access/skey.h"
22 : #include "access/table.h" /* for backward compatibility */
23 : #include "access/tableam.h"
24 : #include "nodes/lockoptions.h"
25 : #include "nodes/primnodes.h"
26 : #include "storage/bufpage.h"
27 : #include "storage/dsm.h"
28 : #include "storage/lockdefs.h"
29 : #include "storage/read_stream.h"
30 : #include "storage/shm_toc.h"
31 : #include "utils/relcache.h"
32 : #include "utils/snapshot.h"
33 :
34 :
35 : /* "options" flag bits for heap_insert */
36 : #define HEAP_INSERT_SKIP_FSM TABLE_INSERT_SKIP_FSM
37 : #define HEAP_INSERT_FROZEN TABLE_INSERT_FROZEN
38 : #define HEAP_INSERT_NO_LOGICAL TABLE_INSERT_NO_LOGICAL
39 : #define HEAP_INSERT_SPECULATIVE 0x0010
40 :
41 : /* "options" flag bits for heap_page_prune_and_freeze */
42 : #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0)
43 : #define HEAP_PAGE_PRUNE_FREEZE (1 << 1)
44 :
45 : typedef struct BulkInsertStateData *BulkInsertState;
46 : struct TupleTableSlot;
47 : struct VacuumCutoffs;
48 :
49 : #define MaxLockTupleMode LockTupleExclusive
50 :
51 : /*
52 : * Descriptor for heap table scans.
53 : */
54 : typedef struct HeapScanDescData
55 : {
56 : TableScanDescData rs_base; /* AM independent part of the descriptor */
57 :
58 : /* state set up at initscan time */
59 : BlockNumber rs_nblocks; /* total number of blocks in rel */
60 : BlockNumber rs_startblock; /* block # to start at */
61 : BlockNumber rs_numblocks; /* max number of blocks to scan */
62 : /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
63 :
64 : /* scan current state */
65 : bool rs_inited; /* false = scan not init'd yet */
66 : OffsetNumber rs_coffset; /* current offset # in non-page-at-a-time mode */
67 : BlockNumber rs_cblock; /* current block # in scan, if any */
68 : Buffer rs_cbuf; /* current buffer in scan, if any */
69 : /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
70 :
71 : BufferAccessStrategy rs_strategy; /* access strategy for reads */
72 :
73 : HeapTupleData rs_ctup; /* current tuple in scan, if any */
74 :
75 : /* For scans that stream reads */
76 : ReadStream *rs_read_stream;
77 :
78 : /*
79 : * For sequential scans and TID range scans to stream reads. The read
80 : * stream is allocated at the beginning of the scan and reset on rescan or
81 : * when the scan direction changes. The scan direction is saved each time
82 : * a new page is requested. If the scan direction changes from one page to
83 : * the next, the read stream releases all previously pinned buffers and
84 : * resets the prefetch block.
85 : */
86 : ScanDirection rs_dir;
87 : BlockNumber rs_prefetch_block;
88 :
89 : /*
90 : * For parallel scans to store page allocation data. NULL when not
91 : * performing a parallel scan.
92 : */
93 : ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
94 :
95 : /* these fields only used in page-at-a-time mode and for bitmap scans */
96 : uint32 rs_cindex; /* current tuple's index in vistuples */
97 : uint32 rs_ntuples; /* number of visible tuples on page */
98 : OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
99 : } HeapScanDescData;
100 : typedef struct HeapScanDescData *HeapScanDesc;
101 :
102 : typedef struct BitmapHeapScanDescData
103 : {
104 : HeapScanDescData rs_heap_base;
105 :
106 : /* Holds no data */
107 : } BitmapHeapScanDescData;
108 : typedef struct BitmapHeapScanDescData *BitmapHeapScanDesc;
109 :
110 : /*
111 : * Descriptor for fetches from heap via an index.
112 : */
113 : typedef struct IndexFetchHeapData
114 : {
115 : IndexFetchTableData xs_base; /* AM independent part of the descriptor */
116 :
117 : Buffer xs_cbuf; /* current heap buffer in scan, if any */
118 : /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
119 : } IndexFetchHeapData;
120 :
121 : /* Result codes for HeapTupleSatisfiesVacuum */
122 : typedef enum
123 : {
124 : HEAPTUPLE_DEAD, /* tuple is dead and deletable */
125 : HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
126 : HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
127 : HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
128 : HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
129 : } HTSV_Result;
130 :
131 : /*
132 : * heap_prepare_freeze_tuple may request that heap_freeze_execute_prepared
133 : * check any tuple's to-be-frozen xmin and/or xmax status using pg_xact
134 : */
135 : #define HEAP_FREEZE_CHECK_XMIN_COMMITTED 0x01
136 : #define HEAP_FREEZE_CHECK_XMAX_ABORTED 0x02
137 :
138 : /* heap_prepare_freeze_tuple state describing how to freeze a tuple */
139 : typedef struct HeapTupleFreeze
140 : {
141 : /* Fields describing how to process tuple */
142 : TransactionId xmax;
143 : uint16 t_infomask2;
144 : uint16 t_infomask;
145 : uint8 frzflags;
146 :
147 : /* xmin/xmax check flags */
148 : uint8 checkflags;
149 : /* Page offset number for tuple */
150 : OffsetNumber offset;
151 : } HeapTupleFreeze;
152 :
153 : /*
154 : * State used by VACUUM to track the details of freezing all eligible tuples
155 : * on a given heap page.
156 : *
157 : * VACUUM prepares freeze plans for each page via heap_prepare_freeze_tuple
158 : * calls (every tuple with storage gets its own call). This page-level freeze
159 : * state is updated across each call, which ultimately determines whether or
160 : * not freezing the page is required.
161 : *
162 : * Aside from the basic question of whether or not freezing will go ahead, the
163 : * state also tracks the oldest extant XID/MXID in the table as a whole, for
164 : * the purposes of advancing relfrozenxid/relminmxid values in pg_class later
165 : * on. Each heap_prepare_freeze_tuple call pushes NewRelfrozenXid and/or
166 : * NewRelminMxid back as required to avoid unsafe final pg_class values. Any
167 : * and all unfrozen XIDs or MXIDs that remain after VACUUM finishes _must_
168 : * have values >= the final relfrozenxid/relminmxid values in pg_class. This
169 : * includes XIDs that remain as MultiXact members from any tuple's xmax.
170 : *
171 : * When 'freeze_required' flag isn't set after all tuples are examined, the
172 : * final choice on freezing is made by vacuumlazy.c. It can decide to trigger
173 : * freezing based on whatever criteria it deems appropriate. However, it is
174 : * recommended that vacuumlazy.c avoid early freezing when freezing does not
175 : * enable setting the target page all-frozen in the visibility map afterwards.
176 : */
177 : typedef struct HeapPageFreeze
178 : {
179 : /* Is heap_prepare_freeze_tuple caller required to freeze page? */
180 : bool freeze_required;
181 :
182 : /*
183 : * "Freeze" NewRelfrozenXid/NewRelminMxid trackers.
184 : *
185 : * Trackers used when heap_freeze_execute_prepared freezes, or when there
186 : * are zero freeze plans for a page. It is always valid for vacuumlazy.c
187 : * to freeze any page, by definition. This even includes pages that have
188 : * no tuples with storage to consider in the first place. That way the
189 : * 'totally_frozen' results from heap_prepare_freeze_tuple can always be
190 : * used in the same way, even when no freeze plans need to be executed to
191 : * "freeze the page". Only the "freeze" path needs to consider the need
192 : * to set pages all-frozen in the visibility map under this scheme.
193 : *
194 : * When we freeze a page, we generally freeze all XIDs < OldestXmin, only
195 : * leaving behind XIDs that are ineligible for freezing, if any. And so
196 : * you might wonder why these trackers are necessary at all; why should
197 : * _any_ page that VACUUM freezes _ever_ be left with XIDs/MXIDs that
198 : * ratchet back the top-level NewRelfrozenXid/NewRelminMxid trackers?
199 : *
200 : * It is useful to use a definition of "freeze the page" that does not
201 : * overspecify how MultiXacts are affected. heap_prepare_freeze_tuple
202 : * generally prefers to remove Multis eagerly, but lazy processing is used
203 : * in cases where laziness allows VACUUM to avoid allocating a new Multi.
204 : * The "freeze the page" trackers enable this flexibility.
205 : */
206 : TransactionId FreezePageRelfrozenXid;
207 : MultiXactId FreezePageRelminMxid;
208 :
209 : /*
210 : * "No freeze" NewRelfrozenXid/NewRelminMxid trackers.
211 : *
212 : * These trackers are maintained in the same way as the trackers used when
213 : * VACUUM scans a page that isn't cleanup locked. Both code paths are
214 : * based on the same general idea (do less work for this page during the
215 : * ongoing VACUUM, at the cost of having to accept older final values).
216 : */
217 : TransactionId NoFreezePageRelfrozenXid;
218 : MultiXactId NoFreezePageRelminMxid;
219 :
220 : } HeapPageFreeze;
221 :
222 : /*
223 : * Per-page state returned by heap_page_prune_and_freeze()
224 : */
225 : typedef struct PruneFreezeResult
226 : {
227 : int ndeleted; /* Number of tuples deleted from the page */
228 : int nnewlpdead; /* Number of newly LP_DEAD items */
229 : int nfrozen; /* Number of tuples we froze */
230 :
231 : /* Number of live and recently dead tuples on the page, after pruning */
232 : int live_tuples;
233 : int recently_dead_tuples;
234 :
235 : /*
236 : * all_visible and all_frozen indicate if the all-visible and all-frozen
237 : * bits in the visibility map can be set for this page, after pruning.
238 : *
239 : * vm_conflict_horizon is the newest xmin of live tuples on the page. The
240 : * caller can use it as the conflict horizon when setting the VM bits. It
241 : * is only valid if we froze some tuples (nfrozen > 0), and all_frozen is
242 : * true.
243 : *
244 : * These are only set if the HEAP_PRUNE_FREEZE option is set.
245 : */
246 : bool all_visible;
247 : bool all_frozen;
248 : TransactionId vm_conflict_horizon;
249 :
250 : /*
251 : * Whether or not the page makes rel truncation unsafe. This is set to
252 : * 'true', even if the page contains LP_DEAD items. VACUUM will remove
253 : * them before attempting to truncate.
254 : */
255 : bool hastup;
256 :
257 : /*
258 : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
259 : * items.
260 : */
261 : int lpdead_items;
262 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
263 : } PruneFreezeResult;
264 :
265 : /* 'reason' codes for heap_page_prune_and_freeze() */
266 : typedef enum
267 : {
268 : PRUNE_ON_ACCESS, /* on-access pruning */
269 : PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
270 : PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
271 : } PruneReason;
272 :
273 : /* ----------------
274 : * function prototypes for heap access method
275 : *
276 : * heap_create, heap_create_with_catalog, and heap_drop_with_catalog
277 : * are declared in catalog/heap.h
278 : * ----------------
279 : */
280 :
281 :
282 : /*
283 : * HeapScanIsValid
284 : * True iff the heap scan is valid.
285 : */
286 : #define HeapScanIsValid(scan) PointerIsValid(scan)
287 :
288 : extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
289 : int nkeys, ScanKey key,
290 : ParallelTableScanDesc parallel_scan,
291 : uint32 flags);
292 : extern void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk,
293 : BlockNumber numBlks);
294 : extern void heap_prepare_pagescan(TableScanDesc sscan);
295 : extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
296 : bool allow_strat, bool allow_sync, bool allow_pagemode);
297 : extern void heap_endscan(TableScanDesc sscan);
298 : extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction);
299 : extern bool heap_getnextslot(TableScanDesc sscan,
300 : ScanDirection direction, struct TupleTableSlot *slot);
301 : extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
302 : ItemPointer maxtid);
303 : extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
304 : ScanDirection direction,
305 : TupleTableSlot *slot);
306 : extern bool heap_fetch(Relation relation, Snapshot snapshot,
307 : HeapTuple tuple, Buffer *userbuf, bool keep_buf);
308 : extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
309 : Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
310 : bool *all_dead, bool first_call);
311 :
312 : extern void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid);
313 :
314 : extern BulkInsertState GetBulkInsertState(void);
315 : extern void FreeBulkInsertState(BulkInsertState);
316 : extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
317 :
318 : extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
319 : int options, BulkInsertState bistate);
320 : extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
321 : int ntuples, CommandId cid, int options,
322 : BulkInsertState bistate);
323 : extern TM_Result heap_delete(Relation relation, ItemPointer tid,
324 : CommandId cid, Snapshot crosscheck, bool wait,
325 : struct TM_FailureData *tmfd, bool changingPart);
326 : extern void heap_finish_speculative(Relation relation, ItemPointer tid);
327 : extern void heap_abort_speculative(Relation relation, ItemPointer tid);
328 : extern TM_Result heap_update(Relation relation, ItemPointer otid,
329 : HeapTuple newtup,
330 : CommandId cid, Snapshot crosscheck, bool wait,
331 : struct TM_FailureData *tmfd, LockTupleMode *lockmode,
332 : TU_UpdateIndexes *update_indexes);
333 : extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
334 : CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
335 : bool follow_updates,
336 : Buffer *buffer, struct TM_FailureData *tmfd);
337 :
338 : extern bool heap_inplace_lock(Relation relation,
339 : HeapTuple oldtup_ptr, Buffer buffer,
340 : void (*release_callback) (void *), void *arg);
341 : extern void heap_inplace_update_and_unlock(Relation relation,
342 : HeapTuple oldtup, HeapTuple tuple,
343 : Buffer buffer);
344 : extern void heap_inplace_unlock(Relation relation,
345 : HeapTuple oldtup, Buffer buffer);
346 : extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
347 : const struct VacuumCutoffs *cutoffs,
348 : HeapPageFreeze *pagefrz,
349 : HeapTupleFreeze *frz, bool *totally_frozen);
350 :
351 : extern void heap_pre_freeze_checks(Buffer buffer,
352 : HeapTupleFreeze *tuples, int ntuples);
353 : extern void heap_freeze_prepared_tuples(Buffer buffer,
354 : HeapTupleFreeze *tuples, int ntuples);
355 : extern bool heap_freeze_tuple(HeapTupleHeader tuple,
356 : TransactionId relfrozenxid, TransactionId relminmxid,
357 : TransactionId FreezeLimit, TransactionId MultiXactCutoff);
358 : extern bool heap_tuple_should_freeze(HeapTupleHeader tuple,
359 : const struct VacuumCutoffs *cutoffs,
360 : TransactionId *NoFreezePageRelfrozenXid,
361 : MultiXactId *NoFreezePageRelminMxid);
362 : extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
363 :
364 : extern void simple_heap_insert(Relation relation, HeapTuple tup);
365 : extern void simple_heap_delete(Relation relation, ItemPointer tid);
366 : extern void simple_heap_update(Relation relation, ItemPointer otid,
367 : HeapTuple tup, TU_UpdateIndexes *update_indexes);
368 :
369 : extern TransactionId heap_index_delete_tuples(Relation rel,
370 : TM_IndexDeleteOp *delstate);
371 :
372 : /* in heap/pruneheap.c */
373 : struct GlobalVisState;
374 : extern void heap_page_prune_opt(Relation relation, Buffer buffer);
375 : extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer,
376 : struct GlobalVisState *vistest,
377 : int options,
378 : struct VacuumCutoffs *cutoffs,
379 : PruneFreezeResult *presult,
380 : PruneReason reason,
381 : OffsetNumber *off_loc,
382 : TransactionId *new_relfrozen_xid,
383 : MultiXactId *new_relmin_mxid);
384 : extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
385 : OffsetNumber *redirected, int nredirected,
386 : OffsetNumber *nowdead, int ndead,
387 : OffsetNumber *nowunused, int nunused);
388 : extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
389 : extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
390 : TransactionId conflict_xid,
391 : bool cleanup_lock,
392 : PruneReason reason,
393 : HeapTupleFreeze *frozen, int nfrozen,
394 : OffsetNumber *redirected, int nredirected,
395 : OffsetNumber *dead, int ndead,
396 : OffsetNumber *unused, int nunused);
397 :
398 : /* in heap/vacuumlazy.c */
399 : struct VacuumParams;
400 : extern void heap_vacuum_rel(Relation rel,
401 : struct VacuumParams *params, BufferAccessStrategy bstrategy);
402 :
403 : /* in heap/heapam_visibility.c */
404 : extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot,
405 : Buffer buffer);
406 : extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
407 : Buffer buffer);
408 : extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
409 : Buffer buffer);
410 : extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer,
411 : TransactionId *dead_after);
412 : extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
413 : uint16 infomask, TransactionId xid);
414 : extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
415 : extern bool HeapTupleIsSurelyDead(HeapTuple htup,
416 : struct GlobalVisState *vistest);
417 :
418 : /*
419 : * To avoid leaking too much knowledge about reorderbuffer implementation
420 : * details this is implemented in reorderbuffer.c not heapam_visibility.c
421 : */
422 : struct HTAB;
423 : extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
424 : Snapshot snapshot,
425 : HeapTuple htup,
426 : Buffer buffer,
427 : CommandId *cmin, CommandId *cmax);
428 : extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple,
429 : Buffer buffer, Snapshot snapshot);
430 :
431 : /*
432 : * heap_execute_freeze_tuple
433 : * Execute the prepared freezing of a tuple with caller's freeze plan.
434 : *
435 : * Caller is responsible for ensuring that no other backend can access the
436 : * storage underlying this tuple, either by holding an exclusive lock on the
437 : * buffer containing it (which is what lazy VACUUM does), or by having it be
438 : * in private storage (which is what CLUSTER and friends do).
439 : */
440 : static inline void
441 2480358 : heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
442 : {
443 2480358 : HeapTupleHeaderSetXmax(tuple, frz->xmax);
444 :
445 2480358 : if (frz->frzflags & XLH_FREEZE_XVAC)
446 0 : HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
447 :
448 2480358 : if (frz->frzflags & XLH_INVALID_XVAC)
449 0 : HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
450 :
451 2480358 : tuple->t_infomask = frz->t_infomask;
452 2480358 : tuple->t_infomask2 = frz->t_infomask2;
453 2480358 : }
454 :
455 : #endif /* HEAPAM_H */
|