Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * timeline.c
4 : * Functions for reading and writing timeline history files.
5 : *
6 : * A timeline history file lists the timeline changes of the timeline, in
7 : * a simple text format. They are archived along with the WAL segments.
8 : *
9 : * The files are named like "<tli>.history". For example, if the database
10 : * starts up and switches to timeline 5, the timeline history file would be
11 : * called "00000005.history".
12 : *
13 : * Each line in the file represents a timeline switch:
14 : *
15 : * <parentTLI> <switchpoint> <reason>
16 : *
17 : * parentTLI ID of the parent timeline
18 : * switchpoint XLogRecPtr of the WAL location where the switch happened
19 : * reason human-readable explanation of why the timeline was changed
20 : *
21 : * The fields are separated by tabs. Lines beginning with # are comments, and
22 : * are ignored. Empty lines are also ignored.
23 : *
24 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : * src/backend/access/transam/timeline.c
28 : *
29 : *-------------------------------------------------------------------------
30 : */
31 :
32 : #include "postgres.h"
33 :
34 : #include <sys/stat.h>
35 : #include <unistd.h>
36 :
37 : #include "access/timeline.h"
38 : #include "access/xlog.h"
39 : #include "access/xlog_internal.h"
40 : #include "access/xlogarchive.h"
41 : #include "access/xlogdefs.h"
42 : #include "pgstat.h"
43 : #include "storage/fd.h"
44 :
45 : /*
46 : * Copies all timeline history files with id's between 'begin' and 'end'
47 : * from archive to pg_wal.
48 : */
49 : void
50 1662 : restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
51 : {
52 : char path[MAXPGPATH];
53 : char histfname[MAXFNAMELEN];
54 : TimeLineID tli;
55 :
56 1678 : for (tli = begin; tli < end; tli++)
57 : {
58 16 : if (tli == 1)
59 10 : continue;
60 :
61 6 : TLHistoryFileName(histfname, tli);
62 6 : if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
63 2 : KeepFileRestoredFromArchive(path, histfname);
64 : }
65 1662 : }
66 :
67 : /*
68 : * Try to read a timeline's history file.
69 : *
70 : * If successful, return the list of component TLIs (the given TLI followed by
71 : * its ancestor TLIs). If we can't find the history file, assume that the
72 : * timeline has no parents, and return a list of just the specified timeline
73 : * ID.
74 : */
75 : List *
76 4942 : readTimeLineHistory(TimeLineID targetTLI)
77 : {
78 : List *result;
79 : char path[MAXPGPATH];
80 : char histfname[MAXFNAMELEN];
81 : FILE *fd;
82 : TimeLineHistoryEntry *entry;
83 4942 : TimeLineID lasttli = 0;
84 : XLogRecPtr prevend;
85 4942 : bool fromArchive = false;
86 :
87 : /* Timeline 1 does not have a history file, so no need to check */
88 4942 : if (targetTLI == 1)
89 : {
90 4746 : entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91 4746 : entry->tli = targetTLI;
92 4746 : entry->begin = entry->end = InvalidXLogRecPtr;
93 4746 : return list_make1(entry);
94 : }
95 :
96 196 : if (ArchiveRecoveryRequested)
97 : {
98 76 : TLHistoryFileName(histfname, targetTLI);
99 : fromArchive =
100 76 : RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101 : }
102 : else
103 120 : TLHistoryFilePath(path, targetTLI);
104 :
105 196 : fd = AllocateFile(path, "r");
106 196 : if (fd == NULL)
107 : {
108 0 : if (errno != ENOENT)
109 0 : ereport(FATAL,
110 : (errcode_for_file_access(),
111 : errmsg("could not open file \"%s\": %m", path)));
112 : /* Not there, so assume no parents */
113 0 : entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114 0 : entry->tli = targetTLI;
115 0 : entry->begin = entry->end = InvalidXLogRecPtr;
116 0 : return list_make1(entry);
117 : }
118 :
119 196 : result = NIL;
120 :
121 : /*
122 : * Parse the file...
123 : */
124 196 : prevend = InvalidXLogRecPtr;
125 : for (;;)
126 524 : {
127 : char fline[MAXPGPATH];
128 : char *res;
129 : char *ptr;
130 : TimeLineID tli;
131 : uint32 switchpoint_hi;
132 : uint32 switchpoint_lo;
133 : int nfields;
134 :
135 720 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
136 720 : res = fgets(fline, sizeof(fline), fd);
137 720 : pgstat_report_wait_end();
138 720 : if (res == NULL)
139 : {
140 196 : if (ferror(fd))
141 0 : ereport(ERROR,
142 : (errcode_for_file_access(),
143 : errmsg("could not read file \"%s\": %m", path)));
144 :
145 196 : break;
146 : }
147 :
148 : /* skip leading whitespace and check for # comment */
149 692 : for (ptr = fline; *ptr; ptr++)
150 : {
151 524 : if (!isspace((unsigned char) *ptr))
152 356 : break;
153 : }
154 524 : if (*ptr == '\0' || *ptr == '#')
155 168 : continue;
156 :
157 356 : nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
158 :
159 356 : if (nfields < 1)
160 : {
161 : /* expect a numeric timeline ID as first field of line */
162 0 : ereport(FATAL,
163 : (errmsg("syntax error in history file: %s", fline),
164 : errhint("Expected a numeric timeline ID.")));
165 : }
166 356 : if (nfields != 3)
167 0 : ereport(FATAL,
168 : (errmsg("syntax error in history file: %s", fline),
169 : errhint("Expected a write-ahead log switchpoint location.")));
170 :
171 356 : if (result && tli <= lasttli)
172 0 : ereport(FATAL,
173 : (errmsg("invalid data in history file: %s", fline),
174 : errhint("Timeline IDs must be in increasing sequence.")));
175 :
176 356 : lasttli = tli;
177 :
178 356 : entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
179 356 : entry->tli = tli;
180 356 : entry->begin = prevend;
181 356 : entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
182 356 : prevend = entry->end;
183 :
184 : /* Build list with newest item first */
185 356 : result = lcons(entry, result);
186 :
187 : /* we ignore the remainder of each line */
188 : }
189 :
190 196 : FreeFile(fd);
191 :
192 196 : if (result && targetTLI <= lasttli)
193 0 : ereport(FATAL,
194 : (errmsg("invalid data in history file \"%s\"", path),
195 : errhint("Timeline IDs must be less than child timeline's ID.")));
196 :
197 : /*
198 : * Create one more entry for the "tip" of the timeline, which has no entry
199 : * in the history file.
200 : */
201 196 : entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
202 196 : entry->tli = targetTLI;
203 196 : entry->begin = prevend;
204 196 : entry->end = InvalidXLogRecPtr;
205 :
206 196 : result = lcons(entry, result);
207 :
208 : /*
209 : * If the history file was fetched from archive, save it in pg_wal for
210 : * future reference.
211 : */
212 196 : if (fromArchive)
213 6 : KeepFileRestoredFromArchive(path, histfname);
214 :
215 196 : return result;
216 : }
217 :
218 : /*
219 : * Probe whether a timeline history file exists for the given timeline ID
220 : */
221 : bool
222 666 : existsTimeLineHistory(TimeLineID probeTLI)
223 : {
224 : char path[MAXPGPATH];
225 : char histfname[MAXFNAMELEN];
226 : FILE *fd;
227 :
228 : /* Timeline 1 does not have a history file, so no need to check */
229 666 : if (probeTLI == 1)
230 0 : return false;
231 :
232 666 : if (ArchiveRecoveryRequested)
233 : {
234 576 : TLHistoryFileName(histfname, probeTLI);
235 576 : RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
236 : }
237 : else
238 90 : TLHistoryFilePath(path, probeTLI);
239 :
240 666 : fd = AllocateFile(path, "r");
241 666 : if (fd != NULL)
242 : {
243 90 : FreeFile(fd);
244 90 : return true;
245 : }
246 : else
247 : {
248 576 : if (errno != ENOENT)
249 0 : ereport(FATAL,
250 : (errcode_for_file_access(),
251 : errmsg("could not open file \"%s\": %m", path)));
252 576 : return false;
253 : }
254 : }
255 :
256 : /*
257 : * Find the newest existing timeline, assuming that startTLI exists.
258 : *
259 : * Note: while this is somewhat heuristic, it does positively guarantee
260 : * that (result + 1) is not a known timeline, and therefore it should
261 : * be safe to assign that ID to a new timeline.
262 : */
263 : TimeLineID
264 554 : findNewestTimeLine(TimeLineID startTLI)
265 : {
266 : TimeLineID newestTLI;
267 : TimeLineID probeTLI;
268 :
269 : /*
270 : * The algorithm is just to probe for the existence of timeline history
271 : * files. XXX is it useful to allow gaps in the sequence?
272 : */
273 554 : newestTLI = startTLI;
274 :
275 576 : for (probeTLI = startTLI + 1;; probeTLI++)
276 : {
277 576 : if (existsTimeLineHistory(probeTLI))
278 : {
279 22 : newestTLI = probeTLI; /* probeTLI exists */
280 : }
281 : else
282 : {
283 : /* doesn't exist, assume we're done */
284 554 : break;
285 : }
286 : }
287 :
288 554 : return newestTLI;
289 : }
290 :
291 : /*
292 : * Create a new timeline history file.
293 : *
294 : * newTLI: ID of the new timeline
295 : * parentTLI: ID of its immediate parent
296 : * switchpoint: WAL location where the system switched to the new timeline
297 : * reason: human-readable explanation of why the timeline was switched
298 : *
299 : * Currently this is only used at the end recovery, and so there are no locking
300 : * considerations. But we should be just as tense as XLogFileInit to avoid
301 : * emplacing a bogus file.
302 : */
303 : void
304 96 : writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
305 : XLogRecPtr switchpoint, char *reason)
306 : {
307 : char path[MAXPGPATH];
308 : char tmppath[MAXPGPATH];
309 : char histfname[MAXFNAMELEN];
310 : char buffer[BLCKSZ];
311 : int srcfd;
312 : int fd;
313 : int nbytes;
314 :
315 : Assert(newTLI > parentTLI); /* else bad selection of newTLI */
316 :
317 : /*
318 : * Write into a temp file name.
319 : */
320 96 : snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
321 :
322 96 : unlink(tmppath);
323 :
324 : /* do not use get_sync_bit() here --- want to fsync only at end of fill */
325 96 : fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
326 96 : if (fd < 0)
327 0 : ereport(ERROR,
328 : (errcode_for_file_access(),
329 : errmsg("could not create file \"%s\": %m", tmppath)));
330 :
331 : /*
332 : * If a history file exists for the parent, copy it verbatim
333 : */
334 96 : if (ArchiveRecoveryRequested)
335 : {
336 96 : TLHistoryFileName(histfname, parentTLI);
337 96 : RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
338 : }
339 : else
340 0 : TLHistoryFilePath(path, parentTLI);
341 :
342 96 : srcfd = OpenTransientFile(path, O_RDONLY);
343 96 : if (srcfd < 0)
344 : {
345 80 : if (errno != ENOENT)
346 0 : ereport(ERROR,
347 : (errcode_for_file_access(),
348 : errmsg("could not open file \"%s\": %m", path)));
349 : /* Not there, so assume parent has no parents */
350 : }
351 : else
352 : {
353 : for (;;)
354 : {
355 16 : errno = 0;
356 32 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_READ);
357 32 : nbytes = (int) read(srcfd, buffer, sizeof(buffer));
358 32 : pgstat_report_wait_end();
359 32 : if (nbytes < 0 || errno != 0)
360 0 : ereport(ERROR,
361 : (errcode_for_file_access(),
362 : errmsg("could not read file \"%s\": %m", path)));
363 32 : if (nbytes == 0)
364 16 : break;
365 16 : errno = 0;
366 16 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
367 16 : if ((int) write(fd, buffer, nbytes) != nbytes)
368 : {
369 0 : int save_errno = errno;
370 :
371 : /*
372 : * If we fail to make the file, delete it to release disk
373 : * space
374 : */
375 0 : unlink(tmppath);
376 :
377 : /*
378 : * if write didn't set errno, assume problem is no disk space
379 : */
380 0 : errno = save_errno ? save_errno : ENOSPC;
381 :
382 0 : ereport(ERROR,
383 : (errcode_for_file_access(),
384 : errmsg("could not write to file \"%s\": %m", tmppath)));
385 : }
386 16 : pgstat_report_wait_end();
387 : }
388 :
389 16 : if (CloseTransientFile(srcfd) != 0)
390 0 : ereport(ERROR,
391 : (errcode_for_file_access(),
392 : errmsg("could not close file \"%s\": %m", path)));
393 : }
394 :
395 : /*
396 : * Append one line with the details of this timeline split.
397 : *
398 : * If we did have a parent file, insert an extra newline just in case the
399 : * parent file failed to end with one.
400 : */
401 96 : snprintf(buffer, sizeof(buffer),
402 : "%s%u\t%X/%X\t%s\n",
403 : (srcfd < 0) ? "" : "\n",
404 : parentTLI,
405 96 : LSN_FORMAT_ARGS(switchpoint),
406 : reason);
407 :
408 96 : nbytes = strlen(buffer);
409 96 : errno = 0;
410 96 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_WRITE);
411 96 : if ((int) write(fd, buffer, nbytes) != nbytes)
412 : {
413 0 : int save_errno = errno;
414 :
415 : /*
416 : * If we fail to make the file, delete it to release disk space
417 : */
418 0 : unlink(tmppath);
419 : /* if write didn't set errno, assume problem is no disk space */
420 0 : errno = save_errno ? save_errno : ENOSPC;
421 :
422 0 : ereport(ERROR,
423 : (errcode_for_file_access(),
424 : errmsg("could not write to file \"%s\": %m", tmppath)));
425 : }
426 96 : pgstat_report_wait_end();
427 :
428 96 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
429 96 : if (pg_fsync(fd) != 0)
430 0 : ereport(data_sync_elevel(ERROR),
431 : (errcode_for_file_access(),
432 : errmsg("could not fsync file \"%s\": %m", tmppath)));
433 96 : pgstat_report_wait_end();
434 :
435 96 : if (CloseTransientFile(fd) != 0)
436 0 : ereport(ERROR,
437 : (errcode_for_file_access(),
438 : errmsg("could not close file \"%s\": %m", tmppath)));
439 :
440 : /*
441 : * Now move the completed history file into place with its final name.
442 : */
443 96 : TLHistoryFilePath(path, newTLI);
444 : Assert(access(path, F_OK) != 0 && errno == ENOENT);
445 96 : durable_rename(tmppath, path, ERROR);
446 :
447 : /* The history file can be archived immediately. */
448 96 : if (XLogArchivingActive())
449 : {
450 26 : TLHistoryFileName(histfname, newTLI);
451 26 : XLogArchiveNotify(histfname);
452 : }
453 96 : }
454 :
455 : /*
456 : * Writes a history file for given timeline and contents.
457 : *
458 : * Currently this is only used in the walreceiver process, and so there are
459 : * no locking considerations. But we should be just as tense as XLogFileInit
460 : * to avoid emplacing a bogus file.
461 : */
462 : void
463 22 : writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
464 : {
465 : char path[MAXPGPATH];
466 : char tmppath[MAXPGPATH];
467 : int fd;
468 :
469 : /*
470 : * Write into a temp file name.
471 : */
472 22 : snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
473 :
474 22 : unlink(tmppath);
475 :
476 : /* do not use get_sync_bit() here --- want to fsync only at end of fill */
477 22 : fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL);
478 22 : if (fd < 0)
479 0 : ereport(ERROR,
480 : (errcode_for_file_access(),
481 : errmsg("could not create file \"%s\": %m", tmppath)));
482 :
483 22 : errno = 0;
484 22 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE);
485 22 : if ((int) write(fd, content, size) != size)
486 : {
487 0 : int save_errno = errno;
488 :
489 : /*
490 : * If we fail to make the file, delete it to release disk space
491 : */
492 0 : unlink(tmppath);
493 : /* if write didn't set errno, assume problem is no disk space */
494 0 : errno = save_errno ? save_errno : ENOSPC;
495 :
496 0 : ereport(ERROR,
497 : (errcode_for_file_access(),
498 : errmsg("could not write to file \"%s\": %m", tmppath)));
499 : }
500 22 : pgstat_report_wait_end();
501 :
502 22 : pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
503 22 : if (pg_fsync(fd) != 0)
504 0 : ereport(data_sync_elevel(ERROR),
505 : (errcode_for_file_access(),
506 : errmsg("could not fsync file \"%s\": %m", tmppath)));
507 22 : pgstat_report_wait_end();
508 :
509 22 : if (CloseTransientFile(fd) != 0)
510 0 : ereport(ERROR,
511 : (errcode_for_file_access(),
512 : errmsg("could not close file \"%s\": %m", tmppath)));
513 :
514 : /*
515 : * Now move the completed history file into place with its final name,
516 : * replacing any existing file with the same name.
517 : */
518 22 : TLHistoryFilePath(path, tli);
519 22 : durable_rename(tmppath, path, ERROR);
520 22 : }
521 :
522 : /*
523 : * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
524 : */
525 : bool
526 5327434 : tliInHistory(TimeLineID tli, List *expectedTLEs)
527 : {
528 : ListCell *cell;
529 :
530 5350778 : foreach(cell, expectedTLEs)
531 : {
532 5350778 : if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
533 5327434 : return true;
534 : }
535 :
536 0 : return false;
537 : }
538 :
539 : /*
540 : * Returns the ID of the timeline in use at a particular point in time, in
541 : * the given timeline history.
542 : */
543 : TimeLineID
544 4954 : tliOfPointInHistory(XLogRecPtr ptr, List *history)
545 : {
546 : ListCell *cell;
547 :
548 4982 : foreach(cell, history)
549 : {
550 4982 : TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
551 :
552 4982 : if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
553 4954 : (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
554 : {
555 : /* found it */
556 4954 : return tle->tli;
557 : }
558 : }
559 :
560 : /* shouldn't happen. */
561 0 : elog(ERROR, "timeline history was not contiguous");
562 : return 0; /* keep compiler quiet */
563 : }
564 :
565 : /*
566 : * Returns the point in history where we branched off the given timeline,
567 : * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
568 : * the timeline is current, ie. we have not branched off from it, and throws
569 : * an error if the timeline is not part of this server's history.
570 : */
571 : XLogRecPtr
572 2958 : tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
573 : {
574 : ListCell *cell;
575 :
576 2958 : if (nextTLI)
577 2958 : *nextTLI = 0;
578 2992 : foreach(cell, history)
579 : {
580 2992 : TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
581 :
582 2992 : if (tle->tli == tli)
583 2958 : return tle->end;
584 34 : if (nextTLI)
585 34 : *nextTLI = tle->tli;
586 : }
587 :
588 0 : ereport(ERROR,
589 : (errmsg("requested timeline %u is not in this server's history",
590 : tli)));
591 : return InvalidXLogRecPtr; /* keep compiler quiet */
592 : }
|