Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam.h
4 : * POSTGRES heap access method definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/access/heapam.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef HEAPAM_H
15 : #define HEAPAM_H
16 :
17 : #include "access/heapam_xlog.h"
18 : #include "access/relation.h" /* for backward compatibility */
19 : #include "access/relscan.h"
20 : #include "access/sdir.h"
21 : #include "access/skey.h"
22 : #include "access/table.h" /* for backward compatibility */
23 : #include "access/tableam.h"
24 : #include "nodes/lockoptions.h"
25 : #include "nodes/primnodes.h"
26 : #include "storage/bufpage.h"
27 : #include "storage/dsm.h"
28 : #include "storage/lockdefs.h"
29 : #include "storage/read_stream.h"
30 : #include "storage/shm_toc.h"
31 : #include "utils/relcache.h"
32 : #include "utils/snapshot.h"
33 :
34 :
35 : /* "options" flag bits for heap_insert */
36 : #define HEAP_INSERT_SKIP_FSM TABLE_INSERT_SKIP_FSM
37 : #define HEAP_INSERT_FROZEN TABLE_INSERT_FROZEN
38 : #define HEAP_INSERT_NO_LOGICAL TABLE_INSERT_NO_LOGICAL
39 : #define HEAP_INSERT_SPECULATIVE 0x0010
40 :
41 : /* "options" flag bits for heap_page_prune_and_freeze */
42 : #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0)
43 : #define HEAP_PAGE_PRUNE_FREEZE (1 << 1)
44 :
45 : typedef struct BulkInsertStateData *BulkInsertState;
46 : struct TupleTableSlot;
47 : struct VacuumCutoffs;
48 :
49 : #define MaxLockTupleMode LockTupleExclusive
50 :
51 : /*
52 : * Descriptor for heap table scans.
53 : */
54 : typedef struct HeapScanDescData
55 : {
56 : TableScanDescData rs_base; /* AM independent part of the descriptor */
57 :
58 : /* state set up at initscan time */
59 : BlockNumber rs_nblocks; /* total number of blocks in rel */
60 : BlockNumber rs_startblock; /* block # to start at */
61 : BlockNumber rs_numblocks; /* max number of blocks to scan */
62 : /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
63 :
64 : /* scan current state */
65 : bool rs_inited; /* false = scan not init'd yet */
66 : OffsetNumber rs_coffset; /* current offset # in non-page-at-a-time mode */
67 : BlockNumber rs_cblock; /* current block # in scan, if any */
68 : Buffer rs_cbuf; /* current buffer in scan, if any */
69 : /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
70 :
71 : BufferAccessStrategy rs_strategy; /* access strategy for reads */
72 :
73 : HeapTupleData rs_ctup; /* current tuple in scan, if any */
74 :
75 : /* For scans that stream reads */
76 : ReadStream *rs_read_stream;
77 :
78 : /*
79 : * For sequential scans and TID range scans to stream reads. The read
80 : * stream is allocated at the beginning of the scan and reset on rescan or
81 : * when the scan direction changes. The scan direction is saved each time
82 : * a new page is requested. If the scan direction changes from one page to
83 : * the next, the read stream releases all previously pinned buffers and
84 : * resets the prefetch block.
85 : */
86 : ScanDirection rs_dir;
87 : BlockNumber rs_prefetch_block;
88 :
89 : /*
90 : * For parallel scans to store page allocation data. NULL when not
91 : * performing a parallel scan.
92 : */
93 : ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
94 :
95 : /* these fields only used in page-at-a-time mode and for bitmap scans */
96 : uint32 rs_cindex; /* current tuple's index in vistuples */
97 : uint32 rs_ntuples; /* number of visible tuples on page */
98 : OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
99 : } HeapScanDescData;
100 : typedef struct HeapScanDescData *HeapScanDesc;
101 :
102 : typedef struct BitmapHeapScanDescData
103 : {
104 : HeapScanDescData rs_heap_base;
105 :
106 : /*
107 : * These fields are only used for bitmap scans for the "skip fetch"
108 : * optimization. Bitmap scans needing no fields from the heap may skip
109 : * fetching an all visible block, instead using the number of tuples per
110 : * block reported by the bitmap to determine how many NULL-filled tuples
111 : * to return. They are common to parallel and serial BitmapHeapScans
112 : */
113 :
114 : /* page of VM containing info for current block */
115 : Buffer rs_vmbuffer;
116 : int rs_empty_tuples_pending;
117 : } BitmapHeapScanDescData;
118 : typedef struct BitmapHeapScanDescData *BitmapHeapScanDesc;
119 :
120 : /*
121 : * Descriptor for fetches from heap via an index.
122 : */
123 : typedef struct IndexFetchHeapData
124 : {
125 : IndexFetchTableData xs_base; /* AM independent part of the descriptor */
126 :
127 : Buffer xs_cbuf; /* current heap buffer in scan, if any */
128 : /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
129 : } IndexFetchHeapData;
130 :
131 : /* Result codes for HeapTupleSatisfiesVacuum */
132 : typedef enum
133 : {
134 : HEAPTUPLE_DEAD, /* tuple is dead and deletable */
135 : HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
136 : HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
137 : HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
138 : HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
139 : } HTSV_Result;
140 :
141 : /*
142 : * heap_prepare_freeze_tuple may request that heap_freeze_execute_prepared
143 : * check any tuple's to-be-frozen xmin and/or xmax status using pg_xact
144 : */
145 : #define HEAP_FREEZE_CHECK_XMIN_COMMITTED 0x01
146 : #define HEAP_FREEZE_CHECK_XMAX_ABORTED 0x02
147 :
148 : /* heap_prepare_freeze_tuple state describing how to freeze a tuple */
149 : typedef struct HeapTupleFreeze
150 : {
151 : /* Fields describing how to process tuple */
152 : TransactionId xmax;
153 : uint16 t_infomask2;
154 : uint16 t_infomask;
155 : uint8 frzflags;
156 :
157 : /* xmin/xmax check flags */
158 : uint8 checkflags;
159 : /* Page offset number for tuple */
160 : OffsetNumber offset;
161 : } HeapTupleFreeze;
162 :
163 : /*
164 : * State used by VACUUM to track the details of freezing all eligible tuples
165 : * on a given heap page.
166 : *
167 : * VACUUM prepares freeze plans for each page via heap_prepare_freeze_tuple
168 : * calls (every tuple with storage gets its own call). This page-level freeze
169 : * state is updated across each call, which ultimately determines whether or
170 : * not freezing the page is required.
171 : *
172 : * Aside from the basic question of whether or not freezing will go ahead, the
173 : * state also tracks the oldest extant XID/MXID in the table as a whole, for
174 : * the purposes of advancing relfrozenxid/relminmxid values in pg_class later
175 : * on. Each heap_prepare_freeze_tuple call pushes NewRelfrozenXid and/or
176 : * NewRelminMxid back as required to avoid unsafe final pg_class values. Any
177 : * and all unfrozen XIDs or MXIDs that remain after VACUUM finishes _must_
178 : * have values >= the final relfrozenxid/relminmxid values in pg_class. This
179 : * includes XIDs that remain as MultiXact members from any tuple's xmax.
180 : *
181 : * When 'freeze_required' flag isn't set after all tuples are examined, the
182 : * final choice on freezing is made by vacuumlazy.c. It can decide to trigger
183 : * freezing based on whatever criteria it deems appropriate. However, it is
184 : * recommended that vacuumlazy.c avoid early freezing when freezing does not
185 : * enable setting the target page all-frozen in the visibility map afterwards.
186 : */
187 : typedef struct HeapPageFreeze
188 : {
189 : /* Is heap_prepare_freeze_tuple caller required to freeze page? */
190 : bool freeze_required;
191 :
192 : /*
193 : * "Freeze" NewRelfrozenXid/NewRelminMxid trackers.
194 : *
195 : * Trackers used when heap_freeze_execute_prepared freezes, or when there
196 : * are zero freeze plans for a page. It is always valid for vacuumlazy.c
197 : * to freeze any page, by definition. This even includes pages that have
198 : * no tuples with storage to consider in the first place. That way the
199 : * 'totally_frozen' results from heap_prepare_freeze_tuple can always be
200 : * used in the same way, even when no freeze plans need to be executed to
201 : * "freeze the page". Only the "freeze" path needs to consider the need
202 : * to set pages all-frozen in the visibility map under this scheme.
203 : *
204 : * When we freeze a page, we generally freeze all XIDs < OldestXmin, only
205 : * leaving behind XIDs that are ineligible for freezing, if any. And so
206 : * you might wonder why these trackers are necessary at all; why should
207 : * _any_ page that VACUUM freezes _ever_ be left with XIDs/MXIDs that
208 : * ratchet back the top-level NewRelfrozenXid/NewRelminMxid trackers?
209 : *
210 : * It is useful to use a definition of "freeze the page" that does not
211 : * overspecify how MultiXacts are affected. heap_prepare_freeze_tuple
212 : * generally prefers to remove Multis eagerly, but lazy processing is used
213 : * in cases where laziness allows VACUUM to avoid allocating a new Multi.
214 : * The "freeze the page" trackers enable this flexibility.
215 : */
216 : TransactionId FreezePageRelfrozenXid;
217 : MultiXactId FreezePageRelminMxid;
218 :
219 : /*
220 : * "No freeze" NewRelfrozenXid/NewRelminMxid trackers.
221 : *
222 : * These trackers are maintained in the same way as the trackers used when
223 : * VACUUM scans a page that isn't cleanup locked. Both code paths are
224 : * based on the same general idea (do less work for this page during the
225 : * ongoing VACUUM, at the cost of having to accept older final values).
226 : */
227 : TransactionId NoFreezePageRelfrozenXid;
228 : MultiXactId NoFreezePageRelminMxid;
229 :
230 : } HeapPageFreeze;
231 :
232 : /*
233 : * Per-page state returned by heap_page_prune_and_freeze()
234 : */
235 : typedef struct PruneFreezeResult
236 : {
237 : int ndeleted; /* Number of tuples deleted from the page */
238 : int nnewlpdead; /* Number of newly LP_DEAD items */
239 : int nfrozen; /* Number of tuples we froze */
240 :
241 : /* Number of live and recently dead tuples on the page, after pruning */
242 : int live_tuples;
243 : int recently_dead_tuples;
244 :
245 : /*
246 : * all_visible and all_frozen indicate if the all-visible and all-frozen
247 : * bits in the visibility map can be set for this page, after pruning.
248 : *
249 : * vm_conflict_horizon is the newest xmin of live tuples on the page. The
250 : * caller can use it as the conflict horizon when setting the VM bits. It
251 : * is only valid if we froze some tuples (nfrozen > 0), and all_frozen is
252 : * true.
253 : *
254 : * These are only set if the HEAP_PRUNE_FREEZE option is set.
255 : */
256 : bool all_visible;
257 : bool all_frozen;
258 : TransactionId vm_conflict_horizon;
259 :
260 : /*
261 : * Whether or not the page makes rel truncation unsafe. This is set to
262 : * 'true', even if the page contains LP_DEAD items. VACUUM will remove
263 : * them before attempting to truncate.
264 : */
265 : bool hastup;
266 :
267 : /*
268 : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
269 : * items.
270 : */
271 : int lpdead_items;
272 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
273 : } PruneFreezeResult;
274 :
275 : /* 'reason' codes for heap_page_prune_and_freeze() */
276 : typedef enum
277 : {
278 : PRUNE_ON_ACCESS, /* on-access pruning */
279 : PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
280 : PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
281 : } PruneReason;
282 :
283 : /* ----------------
284 : * function prototypes for heap access method
285 : *
286 : * heap_create, heap_create_with_catalog, and heap_drop_with_catalog
287 : * are declared in catalog/heap.h
288 : * ----------------
289 : */
290 :
291 :
292 : /*
293 : * HeapScanIsValid
294 : * True iff the heap scan is valid.
295 : */
296 : #define HeapScanIsValid(scan) PointerIsValid(scan)
297 :
298 : extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
299 : int nkeys, ScanKey key,
300 : ParallelTableScanDesc parallel_scan,
301 : uint32 flags);
302 : extern void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk,
303 : BlockNumber numBlks);
304 : extern void heap_prepare_pagescan(TableScanDesc sscan);
305 : extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
306 : bool allow_strat, bool allow_sync, bool allow_pagemode);
307 : extern void heap_endscan(TableScanDesc sscan);
308 : extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction);
309 : extern bool heap_getnextslot(TableScanDesc sscan,
310 : ScanDirection direction, struct TupleTableSlot *slot);
311 : extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
312 : ItemPointer maxtid);
313 : extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
314 : ScanDirection direction,
315 : TupleTableSlot *slot);
316 : extern bool heap_fetch(Relation relation, Snapshot snapshot,
317 : HeapTuple tuple, Buffer *userbuf, bool keep_buf);
318 : extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
319 : Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
320 : bool *all_dead, bool first_call);
321 :
322 : extern void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid);
323 :
324 : extern BulkInsertState GetBulkInsertState(void);
325 : extern void FreeBulkInsertState(BulkInsertState);
326 : extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
327 :
328 : extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
329 : int options, BulkInsertState bistate);
330 : extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
331 : int ntuples, CommandId cid, int options,
332 : BulkInsertState bistate);
333 : extern TM_Result heap_delete(Relation relation, ItemPointer tid,
334 : CommandId cid, Snapshot crosscheck, bool wait,
335 : struct TM_FailureData *tmfd, bool changingPart);
336 : extern void heap_finish_speculative(Relation relation, ItemPointer tid);
337 : extern void heap_abort_speculative(Relation relation, ItemPointer tid);
338 : extern TM_Result heap_update(Relation relation, ItemPointer otid,
339 : HeapTuple newtup,
340 : CommandId cid, Snapshot crosscheck, bool wait,
341 : struct TM_FailureData *tmfd, LockTupleMode *lockmode,
342 : TU_UpdateIndexes *update_indexes);
343 : extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
344 : CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
345 : bool follow_updates,
346 : Buffer *buffer, struct TM_FailureData *tmfd);
347 :
348 : extern bool heap_inplace_lock(Relation relation,
349 : HeapTuple oldtup_ptr, Buffer buffer,
350 : void (*release_callback) (void *), void *arg);
351 : extern void heap_inplace_update_and_unlock(Relation relation,
352 : HeapTuple oldtup, HeapTuple tuple,
353 : Buffer buffer);
354 : extern void heap_inplace_unlock(Relation relation,
355 : HeapTuple oldtup, Buffer buffer);
356 : extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
357 : const struct VacuumCutoffs *cutoffs,
358 : HeapPageFreeze *pagefrz,
359 : HeapTupleFreeze *frz, bool *totally_frozen);
360 :
361 : extern void heap_pre_freeze_checks(Buffer buffer,
362 : HeapTupleFreeze *tuples, int ntuples);
363 : extern void heap_freeze_prepared_tuples(Buffer buffer,
364 : HeapTupleFreeze *tuples, int ntuples);
365 : extern bool heap_freeze_tuple(HeapTupleHeader tuple,
366 : TransactionId relfrozenxid, TransactionId relminmxid,
367 : TransactionId FreezeLimit, TransactionId MultiXactCutoff);
368 : extern bool heap_tuple_should_freeze(HeapTupleHeader tuple,
369 : const struct VacuumCutoffs *cutoffs,
370 : TransactionId *NoFreezePageRelfrozenXid,
371 : MultiXactId *NoFreezePageRelminMxid);
372 : extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
373 :
374 : extern void simple_heap_insert(Relation relation, HeapTuple tup);
375 : extern void simple_heap_delete(Relation relation, ItemPointer tid);
376 : extern void simple_heap_update(Relation relation, ItemPointer otid,
377 : HeapTuple tup, TU_UpdateIndexes *update_indexes);
378 :
379 : extern TransactionId heap_index_delete_tuples(Relation rel,
380 : TM_IndexDeleteOp *delstate);
381 :
382 : /* in heap/pruneheap.c */
383 : struct GlobalVisState;
384 : extern void heap_page_prune_opt(Relation relation, Buffer buffer);
385 : extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer,
386 : struct GlobalVisState *vistest,
387 : int options,
388 : struct VacuumCutoffs *cutoffs,
389 : PruneFreezeResult *presult,
390 : PruneReason reason,
391 : OffsetNumber *off_loc,
392 : TransactionId *new_relfrozen_xid,
393 : MultiXactId *new_relmin_mxid);
394 : extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
395 : OffsetNumber *redirected, int nredirected,
396 : OffsetNumber *nowdead, int ndead,
397 : OffsetNumber *nowunused, int nunused);
398 : extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
399 : extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
400 : TransactionId conflict_xid,
401 : bool cleanup_lock,
402 : PruneReason reason,
403 : HeapTupleFreeze *frozen, int nfrozen,
404 : OffsetNumber *redirected, int nredirected,
405 : OffsetNumber *dead, int ndead,
406 : OffsetNumber *unused, int nunused);
407 :
408 : /* in heap/vacuumlazy.c */
409 : struct VacuumParams;
410 : extern void heap_vacuum_rel(Relation rel,
411 : struct VacuumParams *params, BufferAccessStrategy bstrategy);
412 :
413 : /* in heap/heapam_visibility.c */
414 : extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot,
415 : Buffer buffer);
416 : extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
417 : Buffer buffer);
418 : extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
419 : Buffer buffer);
420 : extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer,
421 : TransactionId *dead_after);
422 : extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
423 : uint16 infomask, TransactionId xid);
424 : extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
425 : extern bool HeapTupleIsSurelyDead(HeapTuple htup,
426 : struct GlobalVisState *vistest);
427 :
428 : /*
429 : * To avoid leaking too much knowledge about reorderbuffer implementation
430 : * details this is implemented in reorderbuffer.c not heapam_visibility.c
431 : */
432 : struct HTAB;
433 : extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
434 : Snapshot snapshot,
435 : HeapTuple htup,
436 : Buffer buffer,
437 : CommandId *cmin, CommandId *cmax);
438 : extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple,
439 : Buffer buffer, Snapshot snapshot);
440 :
441 : /*
442 : * heap_execute_freeze_tuple
443 : * Execute the prepared freezing of a tuple with caller's freeze plan.
444 : *
445 : * Caller is responsible for ensuring that no other backend can access the
446 : * storage underlying this tuple, either by holding an exclusive lock on the
447 : * buffer containing it (which is what lazy VACUUM does), or by having it be
448 : * in private storage (which is what CLUSTER and friends do).
449 : */
450 : static inline void
451 1893732 : heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
452 : {
453 1893732 : HeapTupleHeaderSetXmax(tuple, frz->xmax);
454 :
455 1893732 : if (frz->frzflags & XLH_FREEZE_XVAC)
456 0 : HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
457 :
458 1893732 : if (frz->frzflags & XLH_INVALID_XVAC)
459 0 : HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
460 :
461 1893732 : tuple->t_infomask = frz->t_infomask;
462 1893732 : tuple->t_infomask2 = frz->t_infomask2;
463 1893732 : }
464 :
465 : #endif /* HEAPAM_H */
|