Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tzparser.c
4 : * Functions for parsing timezone offset files
5 : *
6 : * Note: this code is invoked from the check_hook for the GUC variable
7 : * timezone_abbreviations. Therefore, it should report problems using
8 : * GUC_check_errmsg() and related functions, and try to avoid throwing
9 : * elog(ERROR). This is not completely bulletproof at present --- in
10 : * particular out-of-memory will throw an error. Could probably fix with
11 : * PG_TRY if necessary.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/backend/utils/misc/tzparser.c
19 : *
20 : *-------------------------------------------------------------------------
21 : */
22 :
23 : #include "postgres.h"
24 :
25 : #include <ctype.h>
26 :
27 : #include "miscadmin.h"
28 : #include "storage/fd.h"
29 : #include "utils/guc.h"
30 : #include "utils/memutils.h"
31 : #include "utils/tzparser.h"
32 :
33 :
34 : #define WHITESPACE " \t\n\r"
35 :
36 : static bool validateTzEntry(tzEntry *tzentry);
37 : static bool splitTzLine(const char *filename, int lineno,
38 : char *line, tzEntry *tzentry);
39 : static int addToArray(tzEntry **base, int *arraysize, int n,
40 : tzEntry *entry, bool override);
41 : static int ParseTzFile(const char *filename, int depth,
42 : tzEntry **base, int *arraysize, int n);
43 :
44 :
45 : /*
46 : * Apply additional validation checks to a tzEntry
47 : *
48 : * Returns true if OK, else false
49 : */
50 : static bool
51 2620562 : validateTzEntry(tzEntry *tzentry)
52 : {
53 : unsigned char *p;
54 :
55 : /*
56 : * Check restrictions imposed by datetktbl storage format (see datetime.c)
57 : */
58 2620562 : if (strlen(tzentry->abbrev) > TOKMAXLEN)
59 : {
60 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
61 : tzentry->abbrev, TOKMAXLEN,
62 : tzentry->filename, tzentry->lineno);
63 0 : return false;
64 : }
65 :
66 : /*
67 : * Sanity-check the offset: shouldn't exceed 14 hours
68 : */
69 2620562 : if (tzentry->offset > 14 * 60 * 60 ||
70 2620562 : tzentry->offset < -14 * 60 * 60)
71 : {
72 0 : GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
73 : tzentry->offset,
74 : tzentry->filename, tzentry->lineno);
75 0 : return false;
76 : }
77 :
78 : /*
79 : * Convert abbrev to lowercase (must match datetime.c's conversion)
80 : */
81 12153510 : for (p = (unsigned char *) tzentry->abbrev; *p; p++)
82 9532948 : *p = pg_tolower(*p);
83 :
84 2620562 : return true;
85 : }
86 :
87 : /*
88 : * Attempt to parse the line as a timezone abbrev spec
89 : *
90 : * Valid formats are:
91 : * name zone
92 : * name offset dst
93 : *
94 : * Returns true if OK, else false; data is stored in *tzentry
95 : */
96 : static bool
97 2620562 : splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
98 : {
99 : char *abbrev;
100 : char *offset;
101 : char *offset_endptr;
102 : char *remain;
103 : char *is_dst;
104 :
105 2620562 : tzentry->lineno = lineno;
106 2620562 : tzentry->filename = filename;
107 :
108 2620562 : abbrev = strtok(line, WHITESPACE);
109 2620562 : if (!abbrev)
110 : {
111 0 : GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
112 : filename, lineno);
113 0 : return false;
114 : }
115 2620562 : tzentry->abbrev = pstrdup(abbrev);
116 :
117 2620562 : offset = strtok(NULL, WHITESPACE);
118 2620562 : if (!offset)
119 : {
120 0 : GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
121 : filename, lineno);
122 0 : return false;
123 : }
124 :
125 : /* We assume zone names don't begin with a digit or sign */
126 2620562 : if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
127 : {
128 1938692 : tzentry->zone = NULL;
129 1938692 : tzentry->offset = strtol(offset, &offset_endptr, 10);
130 1938692 : if (offset_endptr == offset || *offset_endptr != '\0')
131 : {
132 0 : GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
133 : filename, lineno);
134 0 : return false;
135 : }
136 :
137 1938692 : is_dst = strtok(NULL, WHITESPACE);
138 1938692 : if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
139 : {
140 641760 : tzentry->is_dst = true;
141 641760 : remain = strtok(NULL, WHITESPACE);
142 : }
143 : else
144 : {
145 : /* there was no 'D' dst specifier */
146 1296932 : tzentry->is_dst = false;
147 1296932 : remain = is_dst;
148 : }
149 : }
150 : else
151 : {
152 : /*
153 : * Assume entry is a zone name. We do not try to validate it by
154 : * looking up the zone, because that would force loading of a lot of
155 : * zones that probably will never be used in the current session.
156 : */
157 681870 : tzentry->zone = pstrdup(offset);
158 681870 : tzentry->offset = 0;
159 681870 : tzentry->is_dst = false;
160 681870 : remain = strtok(NULL, WHITESPACE);
161 : }
162 :
163 2620562 : if (!remain) /* no more non-whitespace chars */
164 0 : return true;
165 :
166 2620562 : if (remain[0] != '#') /* must be a comment */
167 : {
168 0 : GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
169 : filename, lineno);
170 0 : return false;
171 : }
172 2620562 : return true;
173 : }
174 :
175 : /*
176 : * Insert entry into sorted array
177 : *
178 : * *base: base address of array (changeable if must enlarge array)
179 : * *arraysize: allocated length of array (changeable if must enlarge array)
180 : * n: current number of valid elements in array
181 : * entry: new data to insert
182 : * override: true if OK to override
183 : *
184 : * Returns the new array length (new value for n), or -1 if error
185 : */
186 : static int
187 2620562 : addToArray(tzEntry **base, int *arraysize, int n,
188 : tzEntry *entry, bool override)
189 : {
190 : tzEntry *arrayptr;
191 : int low;
192 : int high;
193 :
194 : /*
195 : * Search the array for a duplicate; as a useful side effect, the array is
196 : * maintained in sorted order. We use strcmp() to ensure we match the
197 : * sort order datetime.c expects.
198 : */
199 2620562 : arrayptr = *base;
200 2620562 : low = 0;
201 2620562 : high = n - 1;
202 19172898 : while (low <= high)
203 : {
204 16552366 : int mid = (low + high) >> 1;
205 16552366 : tzEntry *midptr = arrayptr + mid;
206 : int cmp;
207 :
208 16552366 : cmp = strcmp(entry->abbrev, midptr->abbrev);
209 16552366 : if (cmp < 0)
210 6604912 : high = mid - 1;
211 9947454 : else if (cmp > 0)
212 9947424 : low = mid + 1;
213 : else
214 : {
215 : /*
216 : * Found a duplicate entry; complain unless it's the same.
217 : */
218 30 : if ((midptr->zone == NULL && entry->zone == NULL &&
219 24 : midptr->offset == entry->offset &&
220 0 : midptr->is_dst == entry->is_dst) ||
221 30 : (midptr->zone != NULL && entry->zone != NULL &&
222 0 : strcmp(midptr->zone, entry->zone) == 0))
223 : {
224 : /* return unchanged array */
225 0 : return n;
226 : }
227 30 : if (override)
228 : {
229 : /* same abbrev but something is different, override */
230 30 : midptr->zone = entry->zone;
231 30 : midptr->offset = entry->offset;
232 30 : midptr->is_dst = entry->is_dst;
233 30 : return n;
234 : }
235 : /* same abbrev but something is different, complain */
236 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
237 : entry->abbrev);
238 0 : GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
239 : midptr->filename, midptr->lineno,
240 : entry->filename, entry->lineno);
241 0 : return -1;
242 : }
243 : }
244 :
245 : /*
246 : * No match, insert at position "low".
247 : */
248 2620532 : if (n >= *arraysize)
249 : {
250 13370 : *arraysize *= 2;
251 13370 : *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
252 : }
253 :
254 2620532 : arrayptr = *base + low;
255 :
256 2620532 : memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
257 :
258 2620532 : memcpy(arrayptr, entry, sizeof(tzEntry));
259 :
260 2620532 : return n + 1;
261 : }
262 :
263 : /*
264 : * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
265 : *
266 : * filename: user-specified file name (does not include path)
267 : * depth: current recursion depth
268 : * *base: array for results (changeable if must enlarge array)
269 : * *arraysize: allocated length of array (changeable if must enlarge array)
270 : * n: current number of valid elements in array
271 : *
272 : * Returns the new array length (new value for n), or -1 if error
273 : */
274 : static int
275 13382 : ParseTzFile(const char *filename, int depth,
276 : tzEntry **base, int *arraysize, int n)
277 : {
278 : char share_path[MAXPGPATH];
279 : char file_path[MAXPGPATH];
280 : FILE *tzFile;
281 : char tzbuf[1024];
282 : char *line;
283 : tzEntry tzentry;
284 13382 : int lineno = 0;
285 13382 : bool override = false;
286 : const char *p;
287 :
288 : /*
289 : * We enforce that the filename is all alpha characters. This may be
290 : * overly restrictive, but we don't want to allow access to anything
291 : * outside the timezonesets directory, so for instance '/' *must* be
292 : * rejected.
293 : */
294 107056 : for (p = filename; *p; p++)
295 : {
296 93674 : if (!isalpha((unsigned char) *p))
297 : {
298 : /* at level 0, just use guc.c's regular "invalid value" message */
299 0 : if (depth > 0)
300 0 : GUC_check_errmsg("invalid time zone file name \"%s\"",
301 : filename);
302 0 : return -1;
303 : }
304 : }
305 :
306 : /*
307 : * The maximal recursion depth is a pretty arbitrary setting. It is hard
308 : * to imagine that someone needs more than 3 levels so stick with this
309 : * conservative setting until someone complains.
310 : */
311 13382 : if (depth > 3)
312 : {
313 0 : GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
314 : filename);
315 0 : return -1;
316 : }
317 :
318 13382 : get_share_path(my_exec_path, share_path);
319 13382 : snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
320 : share_path, filename);
321 13382 : tzFile = AllocateFile(file_path, "r");
322 13382 : if (!tzFile)
323 : {
324 : /*
325 : * Check to see if the problem is not the filename but the directory.
326 : * This is worth troubling over because if the installation share/
327 : * directory is missing or unreadable, this is likely to be the first
328 : * place we notice a problem during postmaster startup.
329 : */
330 0 : int save_errno = errno;
331 : DIR *tzdir;
332 :
333 0 : snprintf(file_path, sizeof(file_path), "%s/timezonesets",
334 : share_path);
335 0 : tzdir = AllocateDir(file_path);
336 0 : if (tzdir == NULL)
337 : {
338 0 : GUC_check_errmsg("could not open directory \"%s\": %m",
339 : file_path);
340 0 : GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
341 : my_exec_path);
342 0 : return -1;
343 : }
344 0 : FreeDir(tzdir);
345 0 : errno = save_errno;
346 :
347 : /*
348 : * otherwise, if file doesn't exist and it's level 0, guc.c's
349 : * complaint is enough
350 : */
351 0 : if (errno != ENOENT || depth > 0)
352 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
353 : filename);
354 :
355 0 : return -1;
356 : }
357 :
358 8463498 : while (!feof(tzFile))
359 : {
360 8463498 : lineno++;
361 8463498 : if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
362 : {
363 13382 : if (ferror(tzFile))
364 : {
365 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
366 : filename);
367 0 : n = -1;
368 0 : break;
369 : }
370 : /* else we're at EOF after all */
371 13382 : break;
372 : }
373 8450116 : if (strlen(tzbuf) == sizeof(tzbuf) - 1)
374 : {
375 : /* the line is too long for tzbuf */
376 0 : GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377 : filename, lineno);
378 0 : n = -1;
379 0 : break;
380 : }
381 :
382 : /* skip over whitespace */
383 8450116 : line = tzbuf;
384 89659664 : while (*line && isspace((unsigned char) *line))
385 81209548 : line++;
386 :
387 8450116 : if (*line == '\0') /* empty line */
388 294206 : continue;
389 8155910 : if (*line == '#') /* comment line */
390 5535324 : continue;
391 :
392 2620586 : if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
393 : {
394 : /* pstrdup so we can use filename in result data structure */
395 12 : char *includeFile = pstrdup(line + strlen("@INCLUDE"));
396 :
397 12 : includeFile = strtok(includeFile, WHITESPACE);
398 12 : if (!includeFile || !*includeFile)
399 : {
400 0 : GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
401 : filename, lineno);
402 0 : n = -1;
403 0 : break;
404 : }
405 12 : n = ParseTzFile(includeFile, depth + 1,
406 : base, arraysize, n);
407 12 : if (n < 0)
408 0 : break;
409 12 : continue;
410 : }
411 :
412 2620574 : if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
413 : {
414 12 : override = true;
415 12 : continue;
416 : }
417 :
418 2620562 : if (!splitTzLine(filename, lineno, line, &tzentry))
419 : {
420 0 : n = -1;
421 0 : break;
422 : }
423 2620562 : if (!validateTzEntry(&tzentry))
424 : {
425 0 : n = -1;
426 0 : break;
427 : }
428 2620562 : n = addToArray(base, arraysize, n, &tzentry, override);
429 2620562 : if (n < 0)
430 0 : break;
431 : }
432 :
433 13382 : FreeFile(tzFile);
434 :
435 13382 : return n;
436 : }
437 :
438 : /*
439 : * load_tzoffsets --- read and parse the specified timezone offset file
440 : *
441 : * On success, return a filled-in TimeZoneAbbrevTable, which must have been
442 : * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
443 : * and friends to give details of the problem.
444 : */
445 : TimeZoneAbbrevTable *
446 13370 : load_tzoffsets(const char *filename)
447 : {
448 13370 : TimeZoneAbbrevTable *result = NULL;
449 : MemoryContext tmpContext;
450 : MemoryContext oldContext;
451 : tzEntry *array;
452 : int arraysize;
453 : int n;
454 :
455 : /*
456 : * Create a temp memory context to work in. This makes it easy to clean
457 : * up afterwards.
458 : */
459 13370 : tmpContext = AllocSetContextCreate(CurrentMemoryContext,
460 : "TZParserMemory",
461 : ALLOCSET_SMALL_SIZES);
462 13370 : oldContext = MemoryContextSwitchTo(tmpContext);
463 :
464 : /* Initialize array at a reasonable size */
465 13370 : arraysize = 128;
466 13370 : array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
467 :
468 : /* Parse the file(s) */
469 13370 : n = ParseTzFile(filename, 0, &array, &arraysize, 0);
470 :
471 : /* If no errors so far, let datetime.c allocate memory & convert format */
472 13370 : if (n >= 0)
473 : {
474 13370 : result = ConvertTimeZoneAbbrevs(array, n);
475 13370 : if (!result)
476 0 : GUC_check_errmsg("out of memory");
477 : }
478 :
479 : /* Clean up */
480 13370 : MemoryContextSwitchTo(oldContext);
481 13370 : MemoryContextDelete(tmpContext);
482 :
483 13370 : return result;
484 : }
|