Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tzparser.c
4 : * Functions for parsing timezone offset files
5 : *
6 : * Note: this code is invoked from the check_hook for the GUC variable
7 : * timezone_abbreviations. Therefore, it should report problems using
8 : * GUC_check_errmsg() and related functions, and try to avoid throwing
9 : * elog(ERROR). This is not completely bulletproof at present --- in
10 : * particular out-of-memory will throw an error. Could probably fix with
11 : * PG_TRY if necessary.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/backend/utils/misc/tzparser.c
19 : *
20 : *-------------------------------------------------------------------------
21 : */
22 :
23 : #include "postgres.h"
24 :
25 : #include <ctype.h>
26 :
27 : #include "miscadmin.h"
28 : #include "storage/fd.h"
29 : #include "utils/datetime.h"
30 : #include "utils/guc.h"
31 : #include "utils/memutils.h"
32 : #include "utils/tzparser.h"
33 :
34 :
35 : #define WHITESPACE " \t\n\r"
36 :
37 : static bool validateTzEntry(tzEntry *tzentry);
38 : static bool splitTzLine(const char *filename, int lineno,
39 : char *line, tzEntry *tzentry);
40 : static int addToArray(tzEntry **base, int *arraysize, int n,
41 : tzEntry *entry, bool override);
42 : static int ParseTzFile(const char *filename, int depth,
43 : tzEntry **base, int *arraysize, int n);
44 :
45 :
46 : /*
47 : * Apply additional validation checks to a tzEntry
48 : *
49 : * Returns true if OK, else false
50 : */
51 : static bool
52 2394642 : validateTzEntry(tzEntry *tzentry)
53 : {
54 : unsigned char *p;
55 :
56 : /*
57 : * Check restrictions imposed by datetktbl storage format (see datetime.c)
58 : */
59 2394642 : if (strlen(tzentry->abbrev) > TOKMAXLEN)
60 : {
61 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62 : tzentry->abbrev, TOKMAXLEN,
63 : tzentry->filename, tzentry->lineno);
64 0 : return false;
65 : }
66 :
67 : /*
68 : * Sanity-check the offset: shouldn't exceed 14 hours
69 : */
70 2394642 : if (tzentry->offset > 14 * SECS_PER_HOUR ||
71 2394642 : tzentry->offset < -14 * SECS_PER_HOUR)
72 : {
73 0 : GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74 : tzentry->offset,
75 : tzentry->filename, tzentry->lineno);
76 0 : return false;
77 : }
78 :
79 : /*
80 : * Convert abbrev to lowercase (must match datetime.c's conversion)
81 : */
82 11101300 : for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83 8706658 : *p = pg_tolower(*p);
84 :
85 2394642 : return true;
86 : }
87 :
88 : /*
89 : * Attempt to parse the line as a timezone abbrev spec
90 : *
91 : * Valid formats are:
92 : * name zone
93 : * name offset dst
94 : *
95 : * Returns true if OK, else false; data is stored in *tzentry
96 : */
97 : static bool
98 2394642 : splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 : {
100 : char *brkl;
101 : char *abbrev;
102 : char *offset;
103 : char *offset_endptr;
104 : char *remain;
105 : char *is_dst;
106 :
107 2394642 : tzentry->lineno = lineno;
108 2394642 : tzentry->filename = filename;
109 :
110 2394642 : abbrev = strtok_r(line, WHITESPACE, &brkl);
111 2394642 : if (!abbrev)
112 : {
113 0 : GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
114 : filename, lineno);
115 0 : return false;
116 : }
117 2394642 : tzentry->abbrev = pstrdup(abbrev);
118 :
119 2394642 : offset = strtok_r(NULL, WHITESPACE, &brkl);
120 2394642 : if (!offset)
121 : {
122 0 : GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
123 : filename, lineno);
124 0 : return false;
125 : }
126 :
127 : /* We assume zone names don't begin with a digit or sign */
128 2394642 : if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
129 : {
130 1780642 : tzentry->zone = NULL;
131 1780642 : tzentry->offset = strtol(offset, &offset_endptr, 10);
132 1780642 : if (offset_endptr == offset || *offset_endptr != '\0')
133 : {
134 0 : GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
135 : filename, lineno);
136 0 : return false;
137 : }
138 :
139 1780642 : is_dst = strtok_r(NULL, WHITESPACE, &brkl);
140 1780642 : if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
141 : {
142 589440 : tzentry->is_dst = true;
143 589440 : remain = strtok_r(NULL, WHITESPACE, &brkl);
144 : }
145 : else
146 : {
147 : /* there was no 'D' dst specifier */
148 1191202 : tzentry->is_dst = false;
149 1191202 : remain = is_dst;
150 : }
151 : }
152 : else
153 : {
154 : /*
155 : * Assume entry is a zone name. We do not try to validate it by
156 : * looking up the zone, because that would force loading of a lot of
157 : * zones that probably will never be used in the current session.
158 : */
159 614000 : tzentry->zone = pstrdup(offset);
160 614000 : tzentry->offset = 0 * SECS_PER_HOUR;
161 614000 : tzentry->is_dst = false;
162 614000 : remain = strtok_r(NULL, WHITESPACE, &brkl);
163 : }
164 :
165 2394642 : if (!remain) /* no more non-whitespace chars */
166 0 : return true;
167 :
168 2394642 : if (remain[0] != '#') /* must be a comment */
169 : {
170 0 : GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
171 : filename, lineno);
172 0 : return false;
173 : }
174 2394642 : return true;
175 : }
176 :
177 : /*
178 : * Insert entry into sorted array
179 : *
180 : * *base: base address of array (changeable if must enlarge array)
181 : * *arraysize: allocated length of array (changeable if must enlarge array)
182 : * n: current number of valid elements in array
183 : * entry: new data to insert
184 : * override: true if OK to override
185 : *
186 : * Returns the new array length (new value for n), or -1 if error
187 : */
188 : static int
189 2394642 : addToArray(tzEntry **base, int *arraysize, int n,
190 : tzEntry *entry, bool override)
191 : {
192 : tzEntry *arrayptr;
193 : int low;
194 : int high;
195 :
196 : /*
197 : * Search the array for a duplicate; as a useful side effect, the array is
198 : * maintained in sorted order. We use strcmp() to ensure we match the
199 : * sort order datetime.c expects.
200 : */
201 2394642 : arrayptr = *base;
202 2394642 : low = 0;
203 2394642 : high = n - 1;
204 17523860 : while (low <= high)
205 : {
206 15129248 : int mid = (low + high) >> 1;
207 15129248 : tzEntry *midptr = arrayptr + mid;
208 : int cmp;
209 :
210 15129248 : cmp = strcmp(entry->abbrev, midptr->abbrev);
211 15129248 : if (cmp < 0)
212 6017326 : high = mid - 1;
213 9111922 : else if (cmp > 0)
214 9111892 : low = mid + 1;
215 : else
216 : {
217 : /*
218 : * Found a duplicate entry; complain unless it's the same.
219 : */
220 30 : if ((midptr->zone == NULL && entry->zone == NULL &&
221 24 : midptr->offset == entry->offset &&
222 0 : midptr->is_dst == entry->is_dst) ||
223 30 : (midptr->zone != NULL && entry->zone != NULL &&
224 0 : strcmp(midptr->zone, entry->zone) == 0))
225 : {
226 : /* return unchanged array */
227 0 : return n;
228 : }
229 30 : if (override)
230 : {
231 : /* same abbrev but something is different, override */
232 30 : midptr->zone = entry->zone;
233 30 : midptr->offset = entry->offset;
234 30 : midptr->is_dst = entry->is_dst;
235 30 : return n;
236 : }
237 : /* same abbrev but something is different, complain */
238 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
239 : entry->abbrev);
240 0 : GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
241 : midptr->filename, midptr->lineno,
242 : entry->filename, entry->lineno);
243 0 : return -1;
244 : }
245 : }
246 :
247 : /*
248 : * No match, insert at position "low".
249 : */
250 2394612 : if (n >= *arraysize)
251 : {
252 12280 : *arraysize *= 2;
253 12280 : *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
254 : }
255 :
256 2394612 : arrayptr = *base + low;
257 :
258 2394612 : memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
259 :
260 2394612 : memcpy(arrayptr, entry, sizeof(tzEntry));
261 :
262 2394612 : return n + 1;
263 : }
264 :
265 : /*
266 : * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
267 : *
268 : * filename: user-specified file name (does not include path)
269 : * depth: current recursion depth
270 : * *base: array for results (changeable if must enlarge array)
271 : * *arraysize: allocated length of array (changeable if must enlarge array)
272 : * n: current number of valid elements in array
273 : *
274 : * Returns the new array length (new value for n), or -1 if error
275 : */
276 : static int
277 12292 : ParseTzFile(const char *filename, int depth,
278 : tzEntry **base, int *arraysize, int n)
279 : {
280 : char share_path[MAXPGPATH];
281 : char file_path[MAXPGPATH];
282 : FILE *tzFile;
283 : char tzbuf[1024];
284 : char *line;
285 : tzEntry tzentry;
286 12292 : int lineno = 0;
287 12292 : bool override = false;
288 : const char *p;
289 :
290 : /*
291 : * We enforce that the filename is all alpha characters. This may be
292 : * overly restrictive, but we don't want to allow access to anything
293 : * outside the timezonesets directory, so for instance '/' *must* be
294 : * rejected.
295 : */
296 98336 : for (p = filename; *p; p++)
297 : {
298 86044 : if (!isalpha((unsigned char) *p))
299 : {
300 : /* at level 0, just use guc.c's regular "invalid value" message */
301 0 : if (depth > 0)
302 0 : GUC_check_errmsg("invalid time zone file name \"%s\"",
303 : filename);
304 0 : return -1;
305 : }
306 : }
307 :
308 : /*
309 : * The maximal recursion depth is a pretty arbitrary setting. It is hard
310 : * to imagine that someone needs more than 3 levels so stick with this
311 : * conservative setting until someone complains.
312 : */
313 12292 : if (depth > 3)
314 : {
315 0 : GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
316 : filename);
317 0 : return -1;
318 : }
319 :
320 12292 : get_share_path(my_exec_path, share_path);
321 12292 : snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
322 : share_path, filename);
323 12292 : tzFile = AllocateFile(file_path, "r");
324 12292 : if (!tzFile)
325 : {
326 : /*
327 : * Check to see if the problem is not the filename but the directory.
328 : * This is worth troubling over because if the installation share/
329 : * directory is missing or unreadable, this is likely to be the first
330 : * place we notice a problem during postmaster startup.
331 : */
332 0 : int save_errno = errno;
333 : DIR *tzdir;
334 :
335 0 : snprintf(file_path, sizeof(file_path), "%s/timezonesets",
336 : share_path);
337 0 : tzdir = AllocateDir(file_path);
338 0 : if (tzdir == NULL)
339 : {
340 0 : GUC_check_errmsg("could not open directory \"%s\": %m",
341 : file_path);
342 0 : GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
343 : my_exec_path);
344 0 : return -1;
345 : }
346 0 : FreeDir(tzdir);
347 0 : errno = save_errno;
348 :
349 : /*
350 : * otherwise, if file doesn't exist and it's level 0, guc.c's
351 : * complaint is enough
352 : */
353 0 : if (errno != ENOENT || depth > 0)
354 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
355 : filename);
356 :
357 0 : return -1;
358 : }
359 :
360 7736688 : while (!feof(tzFile))
361 : {
362 7736688 : lineno++;
363 7736688 : if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
364 : {
365 12292 : if (ferror(tzFile))
366 : {
367 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
368 : filename);
369 0 : n = -1;
370 0 : break;
371 : }
372 : /* else we're at EOF after all */
373 12292 : break;
374 : }
375 7724396 : if (strlen(tzbuf) == sizeof(tzbuf) - 1)
376 : {
377 : /* the line is too long for tzbuf */
378 0 : GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
379 : filename, lineno);
380 0 : n = -1;
381 0 : break;
382 : }
383 :
384 : /* skip over whitespace */
385 7724396 : line = tzbuf;
386 81895764 : while (*line && isspace((unsigned char) *line))
387 74171368 : line++;
388 :
389 7724396 : if (*line == '\0') /* empty line */
390 270226 : continue;
391 7454170 : if (*line == '#') /* comment line */
392 5059504 : continue;
393 :
394 2394666 : if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
395 : {
396 : /* pstrdup so we can use filename in result data structure */
397 12 : char *includeFile = pstrdup(line + strlen("@INCLUDE"));
398 : char *brki;
399 :
400 12 : includeFile = strtok_r(includeFile, WHITESPACE, &brki);
401 12 : if (!includeFile || !*includeFile)
402 : {
403 0 : GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
404 : filename, lineno);
405 0 : n = -1;
406 0 : break;
407 : }
408 12 : n = ParseTzFile(includeFile, depth + 1,
409 : base, arraysize, n);
410 12 : if (n < 0)
411 0 : break;
412 12 : continue;
413 : }
414 :
415 2394654 : if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
416 : {
417 12 : override = true;
418 12 : continue;
419 : }
420 :
421 2394642 : if (!splitTzLine(filename, lineno, line, &tzentry))
422 : {
423 0 : n = -1;
424 0 : break;
425 : }
426 2394642 : if (!validateTzEntry(&tzentry))
427 : {
428 0 : n = -1;
429 0 : break;
430 : }
431 2394642 : n = addToArray(base, arraysize, n, &tzentry, override);
432 2394642 : if (n < 0)
433 0 : break;
434 : }
435 :
436 12292 : FreeFile(tzFile);
437 :
438 12292 : return n;
439 : }
440 :
441 : /*
442 : * load_tzoffsets --- read and parse the specified timezone offset file
443 : *
444 : * On success, return a filled-in TimeZoneAbbrevTable, which must have been
445 : * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
446 : * and friends to give details of the problem.
447 : */
448 : TimeZoneAbbrevTable *
449 12280 : load_tzoffsets(const char *filename)
450 : {
451 12280 : TimeZoneAbbrevTable *result = NULL;
452 : MemoryContext tmpContext;
453 : MemoryContext oldContext;
454 : tzEntry *array;
455 : int arraysize;
456 : int n;
457 :
458 : /*
459 : * Create a temp memory context to work in. This makes it easy to clean
460 : * up afterwards.
461 : */
462 12280 : tmpContext = AllocSetContextCreate(CurrentMemoryContext,
463 : "TZParserMemory",
464 : ALLOCSET_SMALL_SIZES);
465 12280 : oldContext = MemoryContextSwitchTo(tmpContext);
466 :
467 : /* Initialize array at a reasonable size */
468 12280 : arraysize = 128;
469 12280 : array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
470 :
471 : /* Parse the file(s) */
472 12280 : n = ParseTzFile(filename, 0, &array, &arraysize, 0);
473 :
474 : /* If no errors so far, let datetime.c allocate memory & convert format */
475 12280 : if (n >= 0)
476 : {
477 12280 : result = ConvertTimeZoneAbbrevs(array, n);
478 12280 : if (!result)
479 0 : GUC_check_errmsg("out of memory");
480 : }
481 :
482 : /* Clean up */
483 12280 : MemoryContextSwitchTo(oldContext);
484 12280 : MemoryContextDelete(tmpContext);
485 :
486 12280 : return result;
487 : }
|