Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xlogreader.h
4 : * Definitions for the generic XLog reading facility
5 : *
6 : * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/include/access/xlogreader.h
10 : *
11 : * NOTES
12 : * See the definition of the XLogReaderState struct for instructions on
13 : * how to use the XLogReader infrastructure.
14 : *
15 : * The basic idea is to allocate an XLogReaderState via
16 : * XLogReaderAllocate(), position the reader to the first record with
17 : * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18 : * until it returns NULL.
19 : *
20 : * Callers supply a page_read callback if they want to call
21 : * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22 : * otherwise. The WALRead function can be used as a helper to write
23 : * page_read callbacks, but it is not mandatory; callers that use it,
24 : * must supply segment_open callbacks. The segment_close callback
25 : * must always be supplied.
26 : *
27 : * After reading a record with XLogReadRecord(), it's decomposed into
28 : * the per-block and main data parts, and the parts can be accessed
29 : * with the XLogRec* macros and functions. You can also decode a
30 : * record that's already constructed in memory, without reading from
31 : * disk, by calling the DecodeXLogRecord() function.
32 : *-------------------------------------------------------------------------
33 : */
34 : #ifndef XLOGREADER_H
35 : #define XLOGREADER_H
36 :
37 : #ifndef FRONTEND
38 : #include "access/transam.h"
39 : #endif
40 :
41 : #include "access/xlogrecord.h"
42 : #include "storage/buf.h"
43 :
44 : /* WALOpenSegment represents a WAL segment being read. */
45 : typedef struct WALOpenSegment
46 : {
47 : int ws_file; /* segment file descriptor */
48 : XLogSegNo ws_segno; /* segment number */
49 : TimeLineID ws_tli; /* timeline ID of the currently open file */
50 : } WALOpenSegment;
51 :
52 : /* WALSegmentContext carries context information about WAL segments to read */
53 : typedef struct WALSegmentContext
54 : {
55 : char ws_dir[MAXPGPATH];
56 : int ws_segsize;
57 : } WALSegmentContext;
58 :
59 : typedef struct XLogReaderState XLogReaderState;
60 :
61 : /* Function type definitions for various xlogreader interactions */
62 : typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
63 : XLogRecPtr targetPagePtr,
64 : int reqLen,
65 : XLogRecPtr targetRecPtr,
66 : char *readBuf);
67 : typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
68 : XLogSegNo nextSegNo,
69 : TimeLineID *tli_p);
70 : typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
71 :
72 : typedef struct XLogReaderRoutine
73 : {
74 : /*
75 : * Data input callback
76 : *
77 : * This callback shall read at least reqLen valid bytes of the xlog page
78 : * starting at targetPagePtr, and store them in readBuf. The callback
79 : * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80 : * -1 on failure. The callback shall sleep, if necessary, to wait for the
81 : * requested bytes to become available. The callback will not be invoked
82 : * again for the same page unless more than the returned number of bytes
83 : * are needed.
84 : *
85 : * targetRecPtr is the position of the WAL record we're reading. Usually
86 : * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87 : * to read and verify the page or segment header, before it reads the
88 : * actual WAL record it's interested in. In that case, targetRecPtr can
89 : * be used to determine which timeline to read the page from.
90 : *
91 : * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92 : * read from.
93 : */
94 : XLogPageReadCB page_read;
95 :
96 : /*
97 : * Callback to open the specified WAL segment for reading. ->seg.ws_file
98 : * shall be set to the file descriptor of the opened segment. In case of
99 : * failure, an error shall be raised by the callback and it shall not
100 : * return.
101 : *
102 : * "nextSegNo" is the number of the segment to be opened.
103 : *
104 : * "tli_p" is an input/output argument. WALRead() uses it to pass the
105 : * timeline in which the new segment should be found, but the callback can
106 : * use it to return the TLI that it actually opened.
107 : */
108 : WALSegmentOpenCB segment_open;
109 :
110 : /*
111 : * WAL segment close callback. ->seg.ws_file shall be set to a negative
112 : * number.
113 : */
114 : WALSegmentCloseCB segment_close;
115 : } XLogReaderRoutine;
116 :
117 : #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118 :
119 : typedef struct
120 : {
121 : /* Is this block ref in use? */
122 : bool in_use;
123 :
124 : /* Identify the block this refers to */
125 : RelFileLocator rlocator;
126 : ForkNumber forknum;
127 : BlockNumber blkno;
128 :
129 : /* Prefetching workspace. */
130 : Buffer prefetch_buffer;
131 :
132 : /* copy of the fork_flags field from the XLogRecordBlockHeader */
133 : uint8 flags;
134 :
135 : /* Information on full-page image, if any */
136 : bool has_image; /* has image, even for consistency checking */
137 : bool apply_image; /* has image that should be restored */
138 : char *bkp_image;
139 : uint16 hole_offset;
140 : uint16 hole_length;
141 : uint16 bimg_len;
142 : uint8 bimg_info;
143 :
144 : /* Buffer holding the rmgr-specific data associated with this block */
145 : bool has_data;
146 : char *data;
147 : uint16 data_len;
148 : } DecodedBkpBlock;
149 :
150 : /*
151 : * The decoded contents of a record. This occupies a contiguous region of
152 : * memory, with main_data and blocks[n].data pointing to memory after the
153 : * members declared here.
154 : */
155 : typedef struct DecodedXLogRecord
156 : {
157 : /* Private member used for resource management. */
158 : size_t size; /* total size of decoded record */
159 : bool oversized; /* outside the regular decode buffer? */
160 : struct DecodedXLogRecord *next; /* decoded record queue link */
161 :
162 : /* Public members. */
163 : XLogRecPtr lsn; /* location */
164 : XLogRecPtr next_lsn; /* location of next record */
165 : XLogRecord header; /* header */
166 : RepOriginId record_origin;
167 : TransactionId toplevel_xid; /* XID of top-level transaction */
168 : char *main_data; /* record's main data portion */
169 : uint32 main_data_len; /* main data portion's length */
170 : int max_block_id; /* highest block_id in use (-1 if none) */
171 : DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
172 : } DecodedXLogRecord;
173 :
174 : struct XLogReaderState
175 : {
176 : /*
177 : * Operational callbacks
178 : */
179 : XLogReaderRoutine routine;
180 :
181 : /* ----------------------------------------
182 : * Public parameters
183 : * ----------------------------------------
184 : */
185 :
186 : /*
187 : * System identifier of the xlog files we're about to read. Set to zero
188 : * (the default value) if unknown or unimportant.
189 : */
190 : uint64 system_identifier;
191 :
192 : /*
193 : * Opaque data for callbacks to use. Not used by XLogReader.
194 : */
195 : void *private_data;
196 :
197 : /*
198 : * Start and end point of last record read. EndRecPtr is also used as the
199 : * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
200 : * starting position and ReadRecPtr to invalid.
201 : *
202 : * Start and end point of last record returned by XLogReadRecord(). These
203 : * are also available as record->lsn and record->next_lsn.
204 : */
205 : XLogRecPtr ReadRecPtr; /* start of last record read */
206 : XLogRecPtr EndRecPtr; /* end+1 of last record read */
207 :
208 : /*
209 : * Set at the end of recovery: the start point of a partial record at the
210 : * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
211 : * location of its first contrecord that went missing.
212 : */
213 : XLogRecPtr abortedRecPtr;
214 : XLogRecPtr missingContrecPtr;
215 : /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
216 : XLogRecPtr overwrittenRecPtr;
217 :
218 :
219 : /* ----------------------------------------
220 : * Decoded representation of current record
221 : *
222 : * Use XLogRecGet* functions to investigate the record; these fields
223 : * should not be accessed directly.
224 : * ----------------------------------------
225 : * Start and end point of the last record read and decoded by
226 : * XLogReadRecord(). NextRecPtr is also used as the position to decode
227 : * next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to the
228 : * requested starting position.
229 : */
230 : XLogRecPtr DecodeRecPtr; /* start of last record decoded */
231 : XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
232 : XLogRecPtr PrevRecPtr; /* start of previous record decoded */
233 :
234 : /* Last record returned by XLogReadRecord(). */
235 : DecodedXLogRecord *record;
236 :
237 : /* ----------------------------------------
238 : * private/internal state
239 : * ----------------------------------------
240 : */
241 :
242 : /*
243 : * Buffer for decoded records. This is a circular buffer, though
244 : * individual records can't be split in the middle, so some space is often
245 : * wasted at the end. Oversized records that don't fit in this space are
246 : * allocated separately.
247 : */
248 : char *decode_buffer;
249 : size_t decode_buffer_size;
250 : bool free_decode_buffer; /* need to free? */
251 : char *decode_buffer_head; /* data is read from the head */
252 : char *decode_buffer_tail; /* new data is written at the tail */
253 :
254 : /*
255 : * Queue of records that have been decoded. This is a linked list that
256 : * usually consists of consecutive records in decode_buffer, but may also
257 : * contain oversized records allocated with palloc().
258 : */
259 : DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
260 : DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
261 :
262 : /*
263 : * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
264 : * readLen bytes)
265 : */
266 : char *readBuf;
267 : uint32 readLen;
268 :
269 : /* last read XLOG position for data currently in readBuf */
270 : WALSegmentContext segcxt;
271 : WALOpenSegment seg;
272 : uint32 segoff;
273 :
274 : /*
275 : * beginning of prior page read, and its TLI. Doesn't necessarily
276 : * correspond to what's in readBuf; used for timeline sanity checks.
277 : */
278 : XLogRecPtr latestPagePtr;
279 : TimeLineID latestPageTLI;
280 :
281 : /* beginning of the WAL record being read. */
282 : XLogRecPtr currRecPtr;
283 : /* timeline to read it from, 0 if a lookup is required */
284 : TimeLineID currTLI;
285 :
286 : /*
287 : * Safe point to read to in currTLI if current TLI is historical
288 : * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
289 : *
290 : * Actually set to the start of the segment containing the timeline switch
291 : * that ends currTLI's validity, not the LSN of the switch its self, since
292 : * we can't assume the old segment will be present.
293 : */
294 : XLogRecPtr currTLIValidUntil;
295 :
296 : /*
297 : * If currTLI is not the most recent known timeline, the next timeline to
298 : * read from when currTLIValidUntil is reached.
299 : */
300 : TimeLineID nextTLI;
301 :
302 : /*
303 : * Buffer for current ReadRecord result (expandable), used when a record
304 : * crosses a page boundary.
305 : */
306 : char *readRecordBuf;
307 : uint32 readRecordBufSize;
308 :
309 : /* Buffer to hold error message */
310 : char *errormsg_buf;
311 : bool errormsg_deferred;
312 :
313 : /*
314 : * Flag to indicate to XLogPageReadCB that it should not block waiting for
315 : * data.
316 : */
317 : bool nonblocking;
318 : };
319 :
320 : /*
321 : * Check if XLogNextRecord() has any more queued records or an error to return.
322 : */
323 : static inline bool
324 18077450 : XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
325 : {
326 18077450 : return (state->decode_queue_head != NULL) || state->errormsg_deferred;
327 : }
328 :
329 : /* Get a new XLogReader */
330 : extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
331 : const char *waldir,
332 : XLogReaderRoutine *routine,
333 : void *private_data);
334 :
335 : /* Free an XLogReader */
336 : extern void XLogReaderFree(XLogReaderState *state);
337 :
338 : /* Optionally provide a circular decoding buffer to allow readahead. */
339 : extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
340 : void *buffer,
341 : size_t size);
342 :
343 : /* Position the XLogReader to given record */
344 : extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
345 : extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
346 :
347 : /* Return values from XLogPageReadCB. */
348 : typedef enum XLogPageReadResult
349 : {
350 : XLREAD_SUCCESS = 0, /* record is successfully read */
351 : XLREAD_FAIL = -1, /* failed during reading a record */
352 : XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
353 : } XLogPageReadResult;
354 :
355 : /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
356 : extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
357 : char **errormsg);
358 :
359 : /* Consume the next record or error. */
360 : extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state,
361 : char **errormsg);
362 :
363 : /* Release the previously returned record, if necessary. */
364 : extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state);
365 :
366 : /* Try to read ahead, if there is data and space. */
367 : extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state,
368 : bool nonblocking);
369 :
370 : /* Validate a page */
371 : extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
372 : XLogRecPtr recptr, char *phdr);
373 :
374 : /* Forget error produced by XLogReaderValidatePageHeader(). */
375 : extern void XLogReaderResetError(XLogReaderState *state);
376 :
377 : /*
378 : * Error information from WALRead that both backend and frontend caller can
379 : * process. Currently only errors from pg_pread can be reported.
380 : */
381 : typedef struct WALReadError
382 : {
383 : int wre_errno; /* errno set by the last pg_pread() */
384 : int wre_off; /* Offset we tried to read from. */
385 : int wre_req; /* Bytes requested to be read. */
386 : int wre_read; /* Bytes read by the last read(). */
387 : WALOpenSegment wre_seg; /* Segment we tried to read from. */
388 : } WALReadError;
389 :
390 : extern bool WALRead(XLogReaderState *state,
391 : char *buf, XLogRecPtr startptr, Size count,
392 : TimeLineID tli, WALReadError *errinfo);
393 :
394 : /* Functions for decoding an XLogRecord */
395 :
396 : extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
397 : extern bool DecodeXLogRecord(XLogReaderState *state,
398 : DecodedXLogRecord *decoded,
399 : XLogRecord *record,
400 : XLogRecPtr lsn,
401 : char **errormsg);
402 :
403 : /*
404 : * Macros that provide access to parts of the record most recently returned by
405 : * XLogReadRecord() or XLogNextRecord().
406 : */
407 : #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
408 : #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
409 : #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
410 : #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
411 : #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
412 : #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
413 : #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
414 : #define XLogRecGetData(decoder) ((decoder)->record->main_data)
415 : #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
416 : #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
417 : #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
418 : #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
419 : #define XLogRecHasBlockRef(decoder, block_id) \
420 : (((decoder)->record->max_block_id >= (block_id)) && \
421 : ((decoder)->record->blocks[block_id].in_use))
422 : #define XLogRecHasBlockImage(decoder, block_id) \
423 : ((decoder)->record->blocks[block_id].has_image)
424 : #define XLogRecBlockImageApply(decoder, block_id) \
425 : ((decoder)->record->blocks[block_id].apply_image)
426 : #define XLogRecHasBlockData(decoder, block_id) \
427 : ((decoder)->record->blocks[block_id].has_data)
428 :
429 : #ifndef FRONTEND
430 : extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
431 : #endif
432 :
433 : extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
434 : extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
435 : extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
436 : RelFileLocator *rlocator, ForkNumber *forknum,
437 : BlockNumber *blknum);
438 : extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
439 : RelFileLocator *rlocator, ForkNumber *forknum,
440 : BlockNumber *blknum,
441 : Buffer *prefetch_buffer);
442 :
443 : #endif /* XLOGREADER_H */
|