Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xlogreader.h
4 : * Definitions for the generic XLog reading facility
5 : *
6 : * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/include/access/xlogreader.h
10 : *
11 : * NOTES
12 : * See the definition of the XLogReaderState struct for instructions on
13 : * how to use the XLogReader infrastructure.
14 : *
15 : * The basic idea is to allocate an XLogReaderState via
16 : * XLogReaderAllocate(), position the reader to the first record with
17 : * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18 : * until it returns NULL.
19 : *
20 : * Callers supply a page_read callback if they want to call
21 : * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22 : * otherwise. The WALRead function can be used as a helper to write
23 : * page_read callbacks, but it is not mandatory; callers that use it,
24 : * must supply segment_open callbacks. The segment_close callback
25 : * must always be supplied.
26 : *
27 : * After reading a record with XLogReadRecord(), it's decomposed into
28 : * the per-block and main data parts, and the parts can be accessed
29 : * with the XLogRec* macros and functions. You can also decode a
30 : * record that's already constructed in memory, without reading from
31 : * disk, by calling the DecodeXLogRecord() function.
32 : *-------------------------------------------------------------------------
33 : */
34 : #ifndef XLOGREADER_H
35 : #define XLOGREADER_H
36 :
37 : #ifndef FRONTEND
38 : #include "access/transam.h"
39 : #endif
40 :
41 : #include "access/xlogrecord.h"
42 : #include "storage/buf.h"
43 :
44 : /* WALOpenSegment represents a WAL segment being read. */
45 : typedef struct WALOpenSegment
46 : {
47 : int ws_file; /* segment file descriptor */
48 : XLogSegNo ws_segno; /* segment number */
49 : TimeLineID ws_tli; /* timeline ID of the currently open file */
50 : } WALOpenSegment;
51 :
52 : /* WALSegmentContext carries context information about WAL segments to read */
53 : typedef struct WALSegmentContext
54 : {
55 : char ws_dir[MAXPGPATH];
56 : int ws_segsize;
57 : } WALSegmentContext;
58 :
59 : typedef struct XLogReaderState XLogReaderState;
60 :
61 : /* Function type definitions for various xlogreader interactions */
62 : typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
63 : XLogRecPtr targetPagePtr,
64 : int reqLen,
65 : XLogRecPtr targetRecPtr,
66 : char *readBuf);
67 : typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
68 : XLogSegNo nextSegNo,
69 : TimeLineID *tli_p);
70 : typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
71 :
72 : typedef struct XLogReaderRoutine
73 : {
74 : /*
75 : * Data input callback
76 : *
77 : * This callback shall read at least reqLen valid bytes of the xlog page
78 : * starting at targetPagePtr, and store them in readBuf. The callback
79 : * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80 : * -1 on failure. The callback shall sleep, if necessary, to wait for the
81 : * requested bytes to become available. The callback will not be invoked
82 : * again for the same page unless more than the returned number of bytes
83 : * are needed.
84 : *
85 : * targetRecPtr is the position of the WAL record we're reading. Usually
86 : * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87 : * to read and verify the page or segment header, before it reads the
88 : * actual WAL record it's interested in. In that case, targetRecPtr can
89 : * be used to determine which timeline to read the page from.
90 : *
91 : * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92 : * read from.
93 : */
94 : XLogPageReadCB page_read;
95 :
96 : /*
97 : * Callback to open the specified WAL segment for reading. ->seg.ws_file
98 : * shall be set to the file descriptor of the opened segment. In case of
99 : * failure, an error shall be raised by the callback and it shall not
100 : * return.
101 : *
102 : * "nextSegNo" is the number of the segment to be opened.
103 : *
104 : * "tli_p" is an input/output argument. WALRead() uses it to pass the
105 : * timeline in which the new segment should be found, but the callback can
106 : * use it to return the TLI that it actually opened.
107 : */
108 : WALSegmentOpenCB segment_open;
109 :
110 : /*
111 : * WAL segment close callback. ->seg.ws_file shall be set to a negative
112 : * number.
113 : */
114 : WALSegmentCloseCB segment_close;
115 : } XLogReaderRoutine;
116 :
117 : #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118 :
119 : typedef struct
120 : {
121 : /* Is this block ref in use? */
122 : bool in_use;
123 :
124 : /* Identify the block this refers to */
125 : RelFileLocator rlocator;
126 : ForkNumber forknum;
127 : BlockNumber blkno;
128 :
129 : /* Prefetching workspace. */
130 : Buffer prefetch_buffer;
131 :
132 : /* copy of the fork_flags field from the XLogRecordBlockHeader */
133 : uint8 flags;
134 :
135 : /* Information on full-page image, if any */
136 : bool has_image; /* has image, even for consistency checking */
137 : bool apply_image; /* has image that should be restored */
138 : char *bkp_image;
139 : uint16 hole_offset;
140 : uint16 hole_length;
141 : uint16 bimg_len;
142 : uint8 bimg_info;
143 :
144 : /* Buffer holding the rmgr-specific data associated with this block */
145 : bool has_data;
146 : char *data;
147 : uint16 data_len;
148 : uint16 data_bufsz;
149 : } DecodedBkpBlock;
150 :
151 : /*
152 : * The decoded contents of a record. This occupies a contiguous region of
153 : * memory, with main_data and blocks[n].data pointing to memory after the
154 : * members declared here.
155 : */
156 : typedef struct DecodedXLogRecord
157 : {
158 : /* Private member used for resource management. */
159 : size_t size; /* total size of decoded record */
160 : bool oversized; /* outside the regular decode buffer? */
161 : struct DecodedXLogRecord *next; /* decoded record queue link */
162 :
163 : /* Public members. */
164 : XLogRecPtr lsn; /* location */
165 : XLogRecPtr next_lsn; /* location of next record */
166 : XLogRecord header; /* header */
167 : RepOriginId record_origin;
168 : TransactionId toplevel_xid; /* XID of top-level transaction */
169 : char *main_data; /* record's main data portion */
170 : uint32 main_data_len; /* main data portion's length */
171 : int max_block_id; /* highest block_id in use (-1 if none) */
172 : DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
173 : } DecodedXLogRecord;
174 :
175 : struct XLogReaderState
176 : {
177 : /*
178 : * Operational callbacks
179 : */
180 : XLogReaderRoutine routine;
181 :
182 : /* ----------------------------------------
183 : * Public parameters
184 : * ----------------------------------------
185 : */
186 :
187 : /*
188 : * System identifier of the xlog files we're about to read. Set to zero
189 : * (the default value) if unknown or unimportant.
190 : */
191 : uint64 system_identifier;
192 :
193 : /*
194 : * Opaque data for callbacks to use. Not used by XLogReader.
195 : */
196 : void *private_data;
197 :
198 : /*
199 : * Start and end point of last record read. EndRecPtr is also used as the
200 : * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
201 : * starting position and ReadRecPtr to invalid.
202 : *
203 : * Start and end point of last record returned by XLogReadRecord(). These
204 : * are also available as record->lsn and record->next_lsn.
205 : */
206 : XLogRecPtr ReadRecPtr; /* start of last record read */
207 : XLogRecPtr EndRecPtr; /* end+1 of last record read */
208 :
209 : /*
210 : * Set at the end of recovery: the start point of a partial record at the
211 : * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
212 : * location of its first contrecord that went missing.
213 : */
214 : XLogRecPtr abortedRecPtr;
215 : XLogRecPtr missingContrecPtr;
216 : /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
217 : XLogRecPtr overwrittenRecPtr;
218 :
219 :
220 : /* ----------------------------------------
221 : * Decoded representation of current record
222 : *
223 : * Use XLogRecGet* functions to investigate the record; these fields
224 : * should not be accessed directly.
225 : * ----------------------------------------
226 : * Start and end point of the last record read and decoded by
227 : * XLogReadRecord(). NextRecPtr is also used as the position to decode
228 : * next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to the
229 : * requested starting position.
230 : */
231 : XLogRecPtr DecodeRecPtr; /* start of last record decoded */
232 : XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
233 : XLogRecPtr PrevRecPtr; /* start of previous record decoded */
234 :
235 : /* Last record returned by XLogReadRecord(). */
236 : DecodedXLogRecord *record;
237 :
238 : /* ----------------------------------------
239 : * private/internal state
240 : * ----------------------------------------
241 : */
242 :
243 : /*
244 : * Buffer for decoded records. This is a circular buffer, though
245 : * individual records can't be split in the middle, so some space is often
246 : * wasted at the end. Oversized records that don't fit in this space are
247 : * allocated separately.
248 : */
249 : char *decode_buffer;
250 : size_t decode_buffer_size;
251 : bool free_decode_buffer; /* need to free? */
252 : char *decode_buffer_head; /* data is read from the head */
253 : char *decode_buffer_tail; /* new data is written at the tail */
254 :
255 : /*
256 : * Queue of records that have been decoded. This is a linked list that
257 : * usually consists of consecutive records in decode_buffer, but may also
258 : * contain oversized records allocated with palloc().
259 : */
260 : DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
261 : DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
262 :
263 : /*
264 : * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
265 : * readLen bytes)
266 : */
267 : char *readBuf;
268 : uint32 readLen;
269 :
270 : /* last read XLOG position for data currently in readBuf */
271 : WALSegmentContext segcxt;
272 : WALOpenSegment seg;
273 : uint32 segoff;
274 :
275 : /*
276 : * beginning of prior page read, and its TLI. Doesn't necessarily
277 : * correspond to what's in readBuf; used for timeline sanity checks.
278 : */
279 : XLogRecPtr latestPagePtr;
280 : TimeLineID latestPageTLI;
281 :
282 : /* beginning of the WAL record being read. */
283 : XLogRecPtr currRecPtr;
284 : /* timeline to read it from, 0 if a lookup is required */
285 : TimeLineID currTLI;
286 :
287 : /*
288 : * Safe point to read to in currTLI if current TLI is historical
289 : * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
290 : *
291 : * Actually set to the start of the segment containing the timeline switch
292 : * that ends currTLI's validity, not the LSN of the switch its self, since
293 : * we can't assume the old segment will be present.
294 : */
295 : XLogRecPtr currTLIValidUntil;
296 :
297 : /*
298 : * If currTLI is not the most recent known timeline, the next timeline to
299 : * read from when currTLIValidUntil is reached.
300 : */
301 : TimeLineID nextTLI;
302 :
303 : /*
304 : * Buffer for current ReadRecord result (expandable), used when a record
305 : * crosses a page boundary.
306 : */
307 : char *readRecordBuf;
308 : uint32 readRecordBufSize;
309 :
310 : /* Buffer to hold error message */
311 : char *errormsg_buf;
312 : bool errormsg_deferred;
313 :
314 : /*
315 : * Flag to indicate to XLogPageReadCB that it should not block waiting for
316 : * data.
317 : */
318 : bool nonblocking;
319 : };
320 :
321 : /*
322 : * Check if XLogNextRecord() has any more queued records or an error to return.
323 : */
324 : static inline bool
325 17192044 : XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
326 : {
327 17192044 : return (state->decode_queue_head != NULL) || state->errormsg_deferred;
328 : }
329 :
330 : /* Get a new XLogReader */
331 : extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
332 : const char *waldir,
333 : XLogReaderRoutine *routine,
334 : void *private_data);
335 :
336 : /* Free an XLogReader */
337 : extern void XLogReaderFree(XLogReaderState *state);
338 :
339 : /* Optionally provide a circular decoding buffer to allow readahead. */
340 : extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
341 : void *buffer,
342 : size_t size);
343 :
344 : /* Position the XLogReader to given record */
345 : extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
346 : extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
347 :
348 : /* Return values from XLogPageReadCB. */
349 : typedef enum XLogPageReadResult
350 : {
351 : XLREAD_SUCCESS = 0, /* record is successfully read */
352 : XLREAD_FAIL = -1, /* failed during reading a record */
353 : XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
354 : } XLogPageReadResult;
355 :
356 : /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
357 : extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
358 : char **errormsg);
359 :
360 : /* Consume the next record or error. */
361 : extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state,
362 : char **errormsg);
363 :
364 : /* Release the previously returned record, if necessary. */
365 : extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state);
366 :
367 : /* Try to read ahead, if there is data and space. */
368 : extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state,
369 : bool nonblocking);
370 :
371 : /* Validate a page */
372 : extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
373 : XLogRecPtr recptr, char *phdr);
374 :
375 : /* Forget error produced by XLogReaderValidatePageHeader(). */
376 : extern void XLogReaderResetError(XLogReaderState *state);
377 :
378 : /*
379 : * Error information from WALRead that both backend and frontend caller can
380 : * process. Currently only errors from pg_pread can be reported.
381 : */
382 : typedef struct WALReadError
383 : {
384 : int wre_errno; /* errno set by the last pg_pread() */
385 : int wre_off; /* Offset we tried to read from. */
386 : int wre_req; /* Bytes requested to be read. */
387 : int wre_read; /* Bytes read by the last read(). */
388 : WALOpenSegment wre_seg; /* Segment we tried to read from. */
389 : } WALReadError;
390 :
391 : extern bool WALRead(XLogReaderState *state,
392 : char *buf, XLogRecPtr startptr, Size count,
393 : TimeLineID tli, WALReadError *errinfo);
394 :
395 : /* Functions for decoding an XLogRecord */
396 :
397 : extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
398 : extern bool DecodeXLogRecord(XLogReaderState *state,
399 : DecodedXLogRecord *decoded,
400 : XLogRecord *record,
401 : XLogRecPtr lsn,
402 : char **errormsg);
403 :
404 : /*
405 : * Macros that provide access to parts of the record most recently returned by
406 : * XLogReadRecord() or XLogNextRecord().
407 : */
408 : #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
409 : #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
410 : #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
411 : #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
412 : #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
413 : #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
414 : #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
415 : #define XLogRecGetData(decoder) ((decoder)->record->main_data)
416 : #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
417 : #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
418 : #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
419 : #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
420 : #define XLogRecHasBlockRef(decoder, block_id) \
421 : (((decoder)->record->max_block_id >= (block_id)) && \
422 : ((decoder)->record->blocks[block_id].in_use))
423 : #define XLogRecHasBlockImage(decoder, block_id) \
424 : ((decoder)->record->blocks[block_id].has_image)
425 : #define XLogRecBlockImageApply(decoder, block_id) \
426 : ((decoder)->record->blocks[block_id].apply_image)
427 : #define XLogRecHasBlockData(decoder, block_id) \
428 : ((decoder)->record->blocks[block_id].has_data)
429 :
430 : #ifndef FRONTEND
431 : extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
432 : #endif
433 :
434 : extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
435 : extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
436 : extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
437 : RelFileLocator *rlocator, ForkNumber *forknum,
438 : BlockNumber *blknum);
439 : extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
440 : RelFileLocator *rlocator, ForkNumber *forknum,
441 : BlockNumber *blknum,
442 : Buffer *prefetch_buffer);
443 :
444 : #endif /* XLOGREADER_H */
|