Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tzparser.c
4 : * Functions for parsing timezone offset files
5 : *
6 : * Note: this code is invoked from the check_hook for the GUC variable
7 : * timezone_abbreviations. Therefore, it should report problems using
8 : * GUC_check_errmsg() and related functions, and try to avoid throwing
9 : * elog(ERROR). This is not completely bulletproof at present --- in
10 : * particular out-of-memory will throw an error. Could probably fix with
11 : * PG_TRY if necessary.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/backend/utils/misc/tzparser.c
19 : *
20 : *-------------------------------------------------------------------------
21 : */
22 :
23 : #include "postgres.h"
24 :
25 : #include <ctype.h>
26 :
27 : #include "miscadmin.h"
28 : #include "storage/fd.h"
29 : #include "utils/datetime.h"
30 : #include "utils/guc.h"
31 : #include "utils/memutils.h"
32 : #include "utils/tzparser.h"
33 :
34 :
35 : #define WHITESPACE " \t\n\r"
36 :
37 : static bool validateTzEntry(tzEntry *tzentry);
38 : static bool splitTzLine(const char *filename, int lineno,
39 : char *line, tzEntry *tzentry);
40 : static int addToArray(tzEntry **base, int *arraysize, int n,
41 : tzEntry *entry, bool override);
42 : static int ParseTzFile(const char *filename, int depth,
43 : tzEntry **base, int *arraysize, int n);
44 :
45 :
46 : /*
47 : * Apply additional validation checks to a tzEntry
48 : *
49 : * Returns true if OK, else false
50 : */
51 : static bool
52 2315082 : validateTzEntry(tzEntry *tzentry)
53 : {
54 : unsigned char *p;
55 :
56 : /*
57 : * Check restrictions imposed by datetktbl storage format (see datetime.c)
58 : */
59 2315082 : if (strlen(tzentry->abbrev) > TOKMAXLEN)
60 : {
61 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62 : tzentry->abbrev, TOKMAXLEN,
63 : tzentry->filename, tzentry->lineno);
64 0 : return false;
65 : }
66 :
67 : /*
68 : * Sanity-check the offset: shouldn't exceed 14 hours
69 : */
70 2315082 : if (tzentry->offset > 14 * SECS_PER_HOUR ||
71 2315082 : tzentry->offset < -14 * SECS_PER_HOUR)
72 : {
73 0 : GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74 : tzentry->offset,
75 : tzentry->filename, tzentry->lineno);
76 0 : return false;
77 : }
78 :
79 : /*
80 : * Convert abbrev to lowercase (must match datetime.c's conversion)
81 : */
82 10732468 : for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83 8417386 : *p = pg_tolower(*p);
84 :
85 2315082 : return true;
86 : }
87 :
88 : /*
89 : * Attempt to parse the line as a timezone abbrev spec
90 : *
91 : * Valid formats are:
92 : * name zone
93 : * name offset dst
94 : *
95 : * Returns true if OK, else false; data is stored in *tzentry
96 : */
97 : static bool
98 2315082 : splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 : {
100 : char *abbrev;
101 : char *offset;
102 : char *offset_endptr;
103 : char *remain;
104 : char *is_dst;
105 :
106 2315082 : tzentry->lineno = lineno;
107 2315082 : tzentry->filename = filename;
108 :
109 2315082 : abbrev = strtok(line, WHITESPACE);
110 2315082 : if (!abbrev)
111 : {
112 0 : GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
113 : filename, lineno);
114 0 : return false;
115 : }
116 2315082 : tzentry->abbrev = pstrdup(abbrev);
117 :
118 2315082 : offset = strtok(NULL, WHITESPACE);
119 2315082 : if (!offset)
120 : {
121 0 : GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
122 : filename, lineno);
123 0 : return false;
124 : }
125 :
126 : /* We assume zone names don't begin with a digit or sign */
127 2315082 : if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
128 : {
129 1721482 : tzentry->zone = NULL;
130 1721482 : tzentry->offset = strtol(offset, &offset_endptr, 10);
131 1721482 : if (offset_endptr == offset || *offset_endptr != '\0')
132 : {
133 0 : GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
134 : filename, lineno);
135 0 : return false;
136 : }
137 :
138 1721482 : is_dst = strtok(NULL, WHITESPACE);
139 1721482 : if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
140 : {
141 569856 : tzentry->is_dst = true;
142 569856 : remain = strtok(NULL, WHITESPACE);
143 : }
144 : else
145 : {
146 : /* there was no 'D' dst specifier */
147 1151626 : tzentry->is_dst = false;
148 1151626 : remain = is_dst;
149 : }
150 : }
151 : else
152 : {
153 : /*
154 : * Assume entry is a zone name. We do not try to validate it by
155 : * looking up the zone, because that would force loading of a lot of
156 : * zones that probably will never be used in the current session.
157 : */
158 593600 : tzentry->zone = pstrdup(offset);
159 593600 : tzentry->offset = 0 * SECS_PER_HOUR;
160 593600 : tzentry->is_dst = false;
161 593600 : remain = strtok(NULL, WHITESPACE);
162 : }
163 :
164 2315082 : if (!remain) /* no more non-whitespace chars */
165 0 : return true;
166 :
167 2315082 : if (remain[0] != '#') /* must be a comment */
168 : {
169 0 : GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
170 : filename, lineno);
171 0 : return false;
172 : }
173 2315082 : return true;
174 : }
175 :
176 : /*
177 : * Insert entry into sorted array
178 : *
179 : * *base: base address of array (changeable if must enlarge array)
180 : * *arraysize: allocated length of array (changeable if must enlarge array)
181 : * n: current number of valid elements in array
182 : * entry: new data to insert
183 : * override: true if OK to override
184 : *
185 : * Returns the new array length (new value for n), or -1 if error
186 : */
187 : static int
188 2315082 : addToArray(tzEntry **base, int *arraysize, int n,
189 : tzEntry *entry, bool override)
190 : {
191 : tzEntry *arrayptr;
192 : int low;
193 : int high;
194 :
195 : /*
196 : * Search the array for a duplicate; as a useful side effect, the array is
197 : * maintained in sorted order. We use strcmp() to ensure we match the
198 : * sort order datetime.c expects.
199 : */
200 2315082 : arrayptr = *base;
201 2315082 : low = 0;
202 2315082 : high = n - 1;
203 16941644 : while (low <= high)
204 : {
205 14626592 : int mid = (low + high) >> 1;
206 14626592 : tzEntry *midptr = arrayptr + mid;
207 : int cmp;
208 :
209 14626592 : cmp = strcmp(entry->abbrev, midptr->abbrev);
210 14626592 : if (cmp < 0)
211 5817406 : high = mid - 1;
212 8809186 : else if (cmp > 0)
213 8809156 : low = mid + 1;
214 : else
215 : {
216 : /*
217 : * Found a duplicate entry; complain unless it's the same.
218 : */
219 30 : if ((midptr->zone == NULL && entry->zone == NULL &&
220 24 : midptr->offset == entry->offset &&
221 0 : midptr->is_dst == entry->is_dst) ||
222 30 : (midptr->zone != NULL && entry->zone != NULL &&
223 0 : strcmp(midptr->zone, entry->zone) == 0))
224 : {
225 : /* return unchanged array */
226 0 : return n;
227 : }
228 30 : if (override)
229 : {
230 : /* same abbrev but something is different, override */
231 30 : midptr->zone = entry->zone;
232 30 : midptr->offset = entry->offset;
233 30 : midptr->is_dst = entry->is_dst;
234 30 : return n;
235 : }
236 : /* same abbrev but something is different, complain */
237 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
238 : entry->abbrev);
239 0 : GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
240 : midptr->filename, midptr->lineno,
241 : entry->filename, entry->lineno);
242 0 : return -1;
243 : }
244 : }
245 :
246 : /*
247 : * No match, insert at position "low".
248 : */
249 2315052 : if (n >= *arraysize)
250 : {
251 11872 : *arraysize *= 2;
252 11872 : *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
253 : }
254 :
255 2315052 : arrayptr = *base + low;
256 :
257 2315052 : memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
258 :
259 2315052 : memcpy(arrayptr, entry, sizeof(tzEntry));
260 :
261 2315052 : return n + 1;
262 : }
263 :
264 : /*
265 : * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
266 : *
267 : * filename: user-specified file name (does not include path)
268 : * depth: current recursion depth
269 : * *base: array for results (changeable if must enlarge array)
270 : * *arraysize: allocated length of array (changeable if must enlarge array)
271 : * n: current number of valid elements in array
272 : *
273 : * Returns the new array length (new value for n), or -1 if error
274 : */
275 : static int
276 11884 : ParseTzFile(const char *filename, int depth,
277 : tzEntry **base, int *arraysize, int n)
278 : {
279 : char share_path[MAXPGPATH];
280 : char file_path[MAXPGPATH];
281 : FILE *tzFile;
282 : char tzbuf[1024];
283 : char *line;
284 : tzEntry tzentry;
285 11884 : int lineno = 0;
286 11884 : bool override = false;
287 : const char *p;
288 :
289 : /*
290 : * We enforce that the filename is all alpha characters. This may be
291 : * overly restrictive, but we don't want to allow access to anything
292 : * outside the timezonesets directory, so for instance '/' *must* be
293 : * rejected.
294 : */
295 95072 : for (p = filename; *p; p++)
296 : {
297 83188 : if (!isalpha((unsigned char) *p))
298 : {
299 : /* at level 0, just use guc.c's regular "invalid value" message */
300 0 : if (depth > 0)
301 0 : GUC_check_errmsg("invalid time zone file name \"%s\"",
302 : filename);
303 0 : return -1;
304 : }
305 : }
306 :
307 : /*
308 : * The maximal recursion depth is a pretty arbitrary setting. It is hard
309 : * to imagine that someone needs more than 3 levels so stick with this
310 : * conservative setting until someone complains.
311 : */
312 11884 : if (depth > 3)
313 : {
314 0 : GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
315 : filename);
316 0 : return -1;
317 : }
318 :
319 11884 : get_share_path(my_exec_path, share_path);
320 11884 : snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
321 : share_path, filename);
322 11884 : tzFile = AllocateFile(file_path, "r");
323 11884 : if (!tzFile)
324 : {
325 : /*
326 : * Check to see if the problem is not the filename but the directory.
327 : * This is worth troubling over because if the installation share/
328 : * directory is missing or unreadable, this is likely to be the first
329 : * place we notice a problem during postmaster startup.
330 : */
331 0 : int save_errno = errno;
332 : DIR *tzdir;
333 :
334 0 : snprintf(file_path, sizeof(file_path), "%s/timezonesets",
335 : share_path);
336 0 : tzdir = AllocateDir(file_path);
337 0 : if (tzdir == NULL)
338 : {
339 0 : GUC_check_errmsg("could not open directory \"%s\": %m",
340 : file_path);
341 0 : GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
342 : my_exec_path);
343 0 : return -1;
344 : }
345 0 : FreeDir(tzdir);
346 0 : errno = save_errno;
347 :
348 : /*
349 : * otherwise, if file doesn't exist and it's level 0, guc.c's
350 : * complaint is enough
351 : */
352 0 : if (errno != ENOENT || depth > 0)
353 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
354 : filename);
355 :
356 0 : return -1;
357 : }
358 :
359 7503392 : while (!feof(tzFile))
360 : {
361 7503392 : lineno++;
362 7503392 : if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
363 : {
364 11884 : if (ferror(tzFile))
365 : {
366 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
367 : filename);
368 0 : n = -1;
369 0 : break;
370 : }
371 : /* else we're at EOF after all */
372 11884 : break;
373 : }
374 7491508 : if (strlen(tzbuf) == sizeof(tzbuf) - 1)
375 : {
376 : /* the line is too long for tzbuf */
377 0 : GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
378 : filename, lineno);
379 0 : n = -1;
380 0 : break;
381 : }
382 :
383 : /* skip over whitespace */
384 7491508 : line = tzbuf;
385 79602204 : while (*line && isspace((unsigned char) *line))
386 72110696 : line++;
387 :
388 7491508 : if (*line == '\0') /* empty line */
389 261250 : continue;
390 7230258 : if (*line == '#') /* comment line */
391 4915152 : continue;
392 :
393 2315106 : if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
394 : {
395 : /* pstrdup so we can use filename in result data structure */
396 12 : char *includeFile = pstrdup(line + strlen("@INCLUDE"));
397 :
398 12 : includeFile = strtok(includeFile, WHITESPACE);
399 12 : if (!includeFile || !*includeFile)
400 : {
401 0 : GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
402 : filename, lineno);
403 0 : n = -1;
404 0 : break;
405 : }
406 12 : n = ParseTzFile(includeFile, depth + 1,
407 : base, arraysize, n);
408 12 : if (n < 0)
409 0 : break;
410 12 : continue;
411 : }
412 :
413 2315094 : if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
414 : {
415 12 : override = true;
416 12 : continue;
417 : }
418 :
419 2315082 : if (!splitTzLine(filename, lineno, line, &tzentry))
420 : {
421 0 : n = -1;
422 0 : break;
423 : }
424 2315082 : if (!validateTzEntry(&tzentry))
425 : {
426 0 : n = -1;
427 0 : break;
428 : }
429 2315082 : n = addToArray(base, arraysize, n, &tzentry, override);
430 2315082 : if (n < 0)
431 0 : break;
432 : }
433 :
434 11884 : FreeFile(tzFile);
435 :
436 11884 : return n;
437 : }
438 :
439 : /*
440 : * load_tzoffsets --- read and parse the specified timezone offset file
441 : *
442 : * On success, return a filled-in TimeZoneAbbrevTable, which must have been
443 : * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
444 : * and friends to give details of the problem.
445 : */
446 : TimeZoneAbbrevTable *
447 11872 : load_tzoffsets(const char *filename)
448 : {
449 11872 : TimeZoneAbbrevTable *result = NULL;
450 : MemoryContext tmpContext;
451 : MemoryContext oldContext;
452 : tzEntry *array;
453 : int arraysize;
454 : int n;
455 :
456 : /*
457 : * Create a temp memory context to work in. This makes it easy to clean
458 : * up afterwards.
459 : */
460 11872 : tmpContext = AllocSetContextCreate(CurrentMemoryContext,
461 : "TZParserMemory",
462 : ALLOCSET_SMALL_SIZES);
463 11872 : oldContext = MemoryContextSwitchTo(tmpContext);
464 :
465 : /* Initialize array at a reasonable size */
466 11872 : arraysize = 128;
467 11872 : array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
468 :
469 : /* Parse the file(s) */
470 11872 : n = ParseTzFile(filename, 0, &array, &arraysize, 0);
471 :
472 : /* If no errors so far, let datetime.c allocate memory & convert format */
473 11872 : if (n >= 0)
474 : {
475 11872 : result = ConvertTimeZoneAbbrevs(array, n);
476 11872 : if (!result)
477 0 : GUC_check_errmsg("out of memory");
478 : }
479 :
480 : /* Clean up */
481 11872 : MemoryContextSwitchTo(oldContext);
482 11872 : MemoryContextDelete(tmpContext);
483 :
484 11872 : return result;
485 : }
|