Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * reinit.c
4 : * Reinitialization of unlogged relations
5 : *
6 : * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/storage/file/reinit.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include <unistd.h>
18 :
19 : #include "common/relpath.h"
20 : #include "storage/copydir.h"
21 : #include "storage/fd.h"
22 : #include "storage/reinit.h"
23 : #include "utils/hsearch.h"
24 : #include "utils/memutils.h"
25 :
26 : static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
27 : int op);
28 : static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
29 : int op);
30 :
31 : typedef struct
32 : {
33 : Oid reloid; /* hash key */
34 : } unlogged_relation_entry;
35 :
36 : /*
37 : * Reset unlogged relations from before the last restart.
38 : *
39 : * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
40 : * relation with an "init" fork, except for the "init" fork itself.
41 : *
42 : * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
43 : * fork.
44 : */
45 : void
46 384 : ResetUnloggedRelations(int op)
47 : {
48 : char temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
49 : DIR *spc_dir;
50 : struct dirent *spc_de;
51 : MemoryContext tmpctx,
52 : oldctx;
53 :
54 : /* Log it. */
55 384 : elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
56 : (op & UNLOGGED_RELATION_CLEANUP) != 0,
57 : (op & UNLOGGED_RELATION_INIT) != 0);
58 :
59 : /*
60 : * Just to be sure we don't leak any memory, let's create a temporary
61 : * memory context for this operation.
62 : */
63 384 : tmpctx = AllocSetContextCreate(CurrentMemoryContext,
64 : "ResetUnloggedRelations",
65 : ALLOCSET_DEFAULT_SIZES);
66 384 : oldctx = MemoryContextSwitchTo(tmpctx);
67 :
68 : /*
69 : * First process unlogged files in pg_default ($PGDATA/base)
70 : */
71 384 : ResetUnloggedRelationsInTablespaceDir("base", op);
72 :
73 : /*
74 : * Cycle through directories for all non-default tablespaces.
75 : */
76 384 : spc_dir = AllocateDir("pg_tblspc");
77 :
78 1284 : while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
79 : {
80 900 : if (strcmp(spc_de->d_name, ".") == 0 ||
81 516 : strcmp(spc_de->d_name, "..") == 0)
82 768 : continue;
83 :
84 132 : snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
85 132 : spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
86 132 : ResetUnloggedRelationsInTablespaceDir(temp_path, op);
87 : }
88 :
89 384 : FreeDir(spc_dir);
90 :
91 : /*
92 : * Restore memory context.
93 : */
94 384 : MemoryContextSwitchTo(oldctx);
95 384 : MemoryContextDelete(tmpctx);
96 384 : }
97 :
98 : /*
99 : * Process one tablespace directory for ResetUnloggedRelations
100 : */
101 : static void
102 516 : ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
103 : {
104 : DIR *ts_dir;
105 : struct dirent *de;
106 : char dbspace_path[MAXPGPATH * 2];
107 :
108 516 : ts_dir = AllocateDir(tsdirname);
109 :
110 : /*
111 : * If we get ENOENT on a tablespace directory, log it and return. This
112 : * can happen if a previous DROP TABLESPACE crashed between removing the
113 : * tablespace directory and removing the symlink in pg_tblspc. We don't
114 : * really want to prevent database startup in that scenario, so let it
115 : * pass instead. Any other type of error will be reported by ReadDir
116 : * (causing a startup failure).
117 : */
118 516 : if (ts_dir == NULL && errno == ENOENT)
119 : {
120 0 : ereport(LOG,
121 : (errcode_for_file_access(),
122 : errmsg("could not open directory \"%s\": %m",
123 : tsdirname)));
124 0 : return;
125 : }
126 :
127 2890 : while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
128 : {
129 : /*
130 : * We're only interested in the per-database directories, which have
131 : * numeric names. Note that this code will also (properly) ignore "."
132 : * and "..".
133 : */
134 2374 : if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
135 1100 : continue;
136 :
137 1274 : snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
138 1274 : tsdirname, de->d_name);
139 1274 : ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
140 : }
141 :
142 516 : FreeDir(ts_dir);
143 : }
144 :
145 : /*
146 : * Process one per-dbspace directory for ResetUnloggedRelations
147 : */
148 : static void
149 1274 : ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
150 : {
151 : DIR *dbspace_dir;
152 : struct dirent *de;
153 : char rm_path[MAXPGPATH * 2];
154 :
155 : /* Caller must specify at least one operation. */
156 : Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
157 :
158 : /*
159 : * Cleanup is a two-pass operation. First, we go through and identify all
160 : * the files with init forks. Then, we go through again and nuke
161 : * everything with the same OID except the init fork.
162 : */
163 1274 : if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
164 : {
165 : HTAB *hash;
166 : HASHCTL ctl;
167 :
168 : /*
169 : * It's possible that someone could create a ton of unlogged relations
170 : * in the same database & tablespace, so we'd better use a hash table
171 : * rather than an array or linked list to keep track of which files
172 : * need to be reset. Otherwise, this cleanup operation would be
173 : * O(n^2).
174 : */
175 696 : ctl.keysize = sizeof(Oid);
176 696 : ctl.entrysize = sizeof(unlogged_relation_entry);
177 696 : ctl.hcxt = CurrentMemoryContext;
178 696 : hash = hash_create("unlogged relation OIDs", 32, &ctl,
179 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
180 :
181 : /* Scan the directory. */
182 696 : dbspace_dir = AllocateDir(dbspacedirname);
183 194006 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
184 : {
185 : ForkNumber forkNum;
186 : int oidchars;
187 : unlogged_relation_entry ent;
188 :
189 : /* Skip anything that doesn't look like a relation data file. */
190 193310 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
191 : &forkNum))
192 193306 : continue;
193 :
194 : /* Also skip it unless this is the init fork. */
195 190610 : if (forkNum != INIT_FORKNUM)
196 190606 : continue;
197 :
198 : /*
199 : * Put the OID portion of the name into the hash table, if it
200 : * isn't already.
201 : */
202 4 : ent.reloid = atooid(de->d_name);
203 4 : (void) hash_search(hash, &ent, HASH_ENTER, NULL);
204 : }
205 :
206 : /* Done with the first pass. */
207 696 : FreeDir(dbspace_dir);
208 :
209 : /*
210 : * If we didn't find any init forks, there's no point in continuing;
211 : * we can bail out now.
212 : */
213 696 : if (hash_get_num_entries(hash) == 0)
214 : {
215 692 : hash_destroy(hash);
216 692 : return;
217 : }
218 :
219 : /*
220 : * Now, make a second pass and remove anything that matches.
221 : */
222 4 : dbspace_dir = AllocateDir(dbspacedirname);
223 604 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
224 : {
225 : ForkNumber forkNum;
226 : int oidchars;
227 : unlogged_relation_entry ent;
228 :
229 : /* Skip anything that doesn't look like a relation data file. */
230 600 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
231 : &forkNum))
232 16 : continue;
233 :
234 : /* We never remove the init fork. */
235 588 : if (forkNum == INIT_FORKNUM)
236 4 : continue;
237 :
238 : /*
239 : * See whether the OID portion of the name shows up in the hash
240 : * table. If so, nuke it!
241 : */
242 584 : ent.reloid = atooid(de->d_name);
243 584 : if (hash_search(hash, &ent, HASH_FIND, NULL))
244 : {
245 8 : snprintf(rm_path, sizeof(rm_path), "%s/%s",
246 8 : dbspacedirname, de->d_name);
247 8 : if (unlink(rm_path) < 0)
248 0 : ereport(ERROR,
249 : (errcode_for_file_access(),
250 : errmsg("could not remove file \"%s\": %m",
251 : rm_path)));
252 : else
253 8 : elog(DEBUG2, "unlinked file \"%s\"", rm_path);
254 : }
255 : }
256 :
257 : /* Cleanup is complete. */
258 4 : FreeDir(dbspace_dir);
259 4 : hash_destroy(hash);
260 : }
261 :
262 : /*
263 : * Initialization happens after cleanup is complete: we copy each init
264 : * fork file to the corresponding main fork file. Note that if we are
265 : * asked to do both cleanup and init, we may never get here: if the
266 : * cleanup code determines that there are no init forks in this dbspace,
267 : * it will return before we get to this point.
268 : */
269 582 : if ((op & UNLOGGED_RELATION_INIT) != 0)
270 : {
271 : /* Scan the directory. */
272 578 : dbspace_dir = AllocateDir(dbspacedirname);
273 159428 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
274 : {
275 : ForkNumber forkNum;
276 : int oidchars;
277 : char oidbuf[OIDCHARS + 1];
278 : char srcpath[MAXPGPATH * 2];
279 : char dstpath[MAXPGPATH];
280 :
281 : /* Skip anything that doesn't look like a relation data file. */
282 158850 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
283 : &forkNum))
284 158846 : continue;
285 :
286 : /* Also skip it unless this is the init fork. */
287 156598 : if (forkNum != INIT_FORKNUM)
288 156594 : continue;
289 :
290 : /* Construct source pathname. */
291 4 : snprintf(srcpath, sizeof(srcpath), "%s/%s",
292 4 : dbspacedirname, de->d_name);
293 :
294 : /* Construct destination pathname. */
295 4 : memcpy(oidbuf, de->d_name, oidchars);
296 4 : oidbuf[oidchars] = '\0';
297 4 : snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
298 8 : dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
299 4 : strlen(forkNames[INIT_FORKNUM]));
300 :
301 : /* OK, we're ready to perform the actual copy. */
302 4 : elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
303 4 : copy_file(srcpath, dstpath);
304 : }
305 :
306 578 : FreeDir(dbspace_dir);
307 :
308 : /*
309 : * copy_file() above has already called pg_flush_data() on the files
310 : * it created. Now we need to fsync those files, because a checkpoint
311 : * won't do it for us while we're in recovery. We do this in a
312 : * separate pass to allow the kernel to perform all the flushes
313 : * (especially the metadata ones) at once.
314 : */
315 578 : dbspace_dir = AllocateDir(dbspacedirname);
316 159432 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
317 : {
318 : ForkNumber forkNum;
319 : int oidchars;
320 : char oidbuf[OIDCHARS + 1];
321 : char mainpath[MAXPGPATH];
322 :
323 : /* Skip anything that doesn't look like a relation data file. */
324 158854 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
325 : &forkNum))
326 158850 : continue;
327 :
328 : /* Also skip it unless this is the init fork. */
329 156602 : if (forkNum != INIT_FORKNUM)
330 156598 : continue;
331 :
332 : /* Construct main fork pathname. */
333 4 : memcpy(oidbuf, de->d_name, oidchars);
334 4 : oidbuf[oidchars] = '\0';
335 4 : snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
336 8 : dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
337 4 : strlen(forkNames[INIT_FORKNUM]));
338 :
339 4 : fsync_fname(mainpath, false);
340 : }
341 :
342 578 : FreeDir(dbspace_dir);
343 :
344 : /*
345 : * Lastly, fsync the database directory itself, ensuring the
346 : * filesystem remembers the file creations and deletions we've done.
347 : * We don't bother with this during a call that does only
348 : * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
349 : * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
350 : * too at the next startup attempt.
351 : */
352 578 : fsync_fname(dbspacedirname, true);
353 : }
354 : }
355 :
356 : /*
357 : * Basic parsing of putative relation filenames.
358 : *
359 : * This function returns true if the file appears to be in the correct format
360 : * for a non-temporary relation and false otherwise.
361 : *
362 : * NB: If this function returns true, the caller is entitled to assume that
363 : * *oidchars has been set to the a value no more than OIDCHARS, and thus
364 : * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
365 : * portion of the filename. This is critical to protect against a possible
366 : * buffer overrun.
367 : */
368 : bool
369 777318 : parse_filename_for_nontemp_relation(const char *name, int *oidchars,
370 : ForkNumber *fork)
371 : {
372 : int pos;
373 :
374 : /* Look for a non-empty string of digits (that isn't too long). */
375 3887170 : for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
376 : ;
377 777318 : if (pos == 0 || pos > OIDCHARS)
378 9116 : return false;
379 768202 : *oidchars = pos;
380 :
381 : /* Check for a fork name. */
382 768202 : if (name[pos] != '_')
383 576930 : *fork = MAIN_FORKNUM;
384 : else
385 : {
386 : int forkchar;
387 :
388 191272 : forkchar = forkname_chars(&name[pos + 1], fork);
389 191272 : if (forkchar <= 0)
390 0 : return false;
391 191272 : pos += forkchar + 1;
392 : }
393 :
394 : /* Check for a segment number. */
395 768202 : if (name[pos] == '.')
396 : {
397 : int segchar;
398 :
399 0 : for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
400 : ;
401 0 : if (segchar <= 1)
402 0 : return false;
403 0 : pos += segchar;
404 : }
405 :
406 : /* Now we should be at the end. */
407 768202 : if (name[pos] != '\0')
408 0 : return false;
409 768202 : return true;
410 : }
|