Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * String-processing utility routines for frontend code
4 : *
5 : * Assorted utility functions that are useful in constructing SQL queries
6 : * and interpreting backend output.
7 : *
8 : *
9 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994, Regents of the University of California
11 : *
12 : * src/fe_utils/string_utils.c
13 : *
14 : *-------------------------------------------------------------------------
15 : */
16 : #include "postgres_fe.h"
17 :
18 : #include <ctype.h>
19 :
20 : #include "common/keywords.h"
21 : #include "fe_utils/string_utils.h"
22 :
23 : static PQExpBuffer defaultGetLocalPQExpBuffer(void);
24 :
25 : /* Globals exported by this file */
26 : int quote_all_identifiers = 0;
27 : PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
28 :
29 :
30 : /*
31 : * Returns a temporary PQExpBuffer, valid until the next call to the function.
32 : * This is used by fmtId and fmtQualifiedId.
33 : *
34 : * Non-reentrant and non-thread-safe but reduces memory leakage. You can
35 : * replace this with a custom version by setting the getLocalPQExpBuffer
36 : * function pointer.
37 : */
38 : static PQExpBuffer
39 635170 : defaultGetLocalPQExpBuffer(void)
40 : {
41 : static PQExpBuffer id_return = NULL;
42 :
43 635170 : if (id_return) /* first time through? */
44 : {
45 : /* same buffer, just wipe contents */
46 634424 : resetPQExpBuffer(id_return);
47 : }
48 : else
49 : {
50 : /* new buffer */
51 746 : id_return = createPQExpBuffer();
52 : }
53 :
54 635170 : return id_return;
55 : }
56 :
57 : /*
58 : * Quotes input string if it's not a legitimate SQL identifier as-is.
59 : *
60 : * Note that the returned string must be used before calling fmtId again,
61 : * since we re-use the same return buffer each time.
62 : */
63 : const char *
64 538554 : fmtId(const char *rawid)
65 : {
66 538554 : PQExpBuffer id_return = getLocalPQExpBuffer();
67 :
68 : const char *cp;
69 538554 : bool need_quotes = false;
70 :
71 : /*
72 : * These checks need to match the identifier production in scan.l. Don't
73 : * use islower() etc.
74 : */
75 538554 : if (quote_all_identifiers)
76 31434 : need_quotes = true;
77 : /* slightly different rules for first character */
78 507120 : else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
79 1144 : need_quotes = true;
80 : else
81 : {
82 : /* otherwise check the entire string */
83 5463644 : for (cp = rawid; *cp; cp++)
84 : {
85 4977082 : if (!((*cp >= 'a' && *cp <= 'z')
86 666258 : || (*cp >= '0' && *cp <= '9')
87 454162 : || (*cp == '_')))
88 : {
89 19414 : need_quotes = true;
90 19414 : break;
91 : }
92 : }
93 : }
94 :
95 538554 : if (!need_quotes)
96 : {
97 : /*
98 : * Check for keyword. We quote keywords except for unreserved ones.
99 : * (In some cases we could avoid quoting a col_name or type_func_name
100 : * keyword, but it seems much harder than it's worth to tell that.)
101 : *
102 : * Note: ScanKeywordLookup() does case-insensitive comparison, but
103 : * that's fine, since we already know we have all-lower-case.
104 : */
105 486562 : int kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
106 :
107 486562 : if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
108 1408 : need_quotes = true;
109 : }
110 :
111 538554 : if (!need_quotes)
112 : {
113 : /* no quoting needed */
114 485154 : appendPQExpBufferStr(id_return, rawid);
115 : }
116 : else
117 : {
118 53400 : appendPQExpBufferChar(id_return, '"');
119 589992 : for (cp = rawid; *cp; cp++)
120 : {
121 : /*
122 : * Did we find a double-quote in the string? Then make this a
123 : * double double-quote per SQL99. Before, we put in a
124 : * backslash/double-quote pair. - thomas 2000-08-05
125 : */
126 536592 : if (*cp == '"')
127 388 : appendPQExpBufferChar(id_return, '"');
128 536592 : appendPQExpBufferChar(id_return, *cp);
129 : }
130 53400 : appendPQExpBufferChar(id_return, '"');
131 : }
132 :
133 538554 : return id_return->data;
134 : }
135 :
136 : /*
137 : * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
138 : *
139 : * Like fmtId, use the result before calling again.
140 : *
141 : * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
142 : * use that buffer until we're finished with calling fmtId().
143 : */
144 : const char *
145 96616 : fmtQualifiedId(const char *schema, const char *id)
146 : {
147 : PQExpBuffer id_return;
148 96616 : PQExpBuffer lcl_pqexp = createPQExpBuffer();
149 :
150 : /* Some callers might fail to provide a schema name */
151 96616 : if (schema && *schema)
152 : {
153 96616 : appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
154 : }
155 96616 : appendPQExpBufferStr(lcl_pqexp, fmtId(id));
156 :
157 96616 : id_return = getLocalPQExpBuffer();
158 :
159 96616 : appendPQExpBufferStr(id_return, lcl_pqexp->data);
160 96616 : destroyPQExpBuffer(lcl_pqexp);
161 :
162 96616 : return id_return->data;
163 : }
164 :
165 :
166 : /*
167 : * Format a Postgres version number (in the PG_VERSION_NUM integer format
168 : * returned by PQserverVersion()) as a string. This exists mainly to
169 : * encapsulate knowledge about two-part vs. three-part version numbers.
170 : *
171 : * For reentrancy, caller must supply the buffer the string is put in.
172 : * Recommended size of the buffer is 32 bytes.
173 : *
174 : * Returns address of 'buf', as a notational convenience.
175 : */
176 : char *
177 0 : formatPGVersionNumber(int version_number, bool include_minor,
178 : char *buf, size_t buflen)
179 : {
180 0 : if (version_number >= 100000)
181 : {
182 : /* New two-part style */
183 0 : if (include_minor)
184 0 : snprintf(buf, buflen, "%d.%d", version_number / 10000,
185 : version_number % 10000);
186 : else
187 0 : snprintf(buf, buflen, "%d", version_number / 10000);
188 : }
189 : else
190 : {
191 : /* Old three-part style */
192 0 : if (include_minor)
193 0 : snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
194 0 : (version_number / 100) % 100,
195 : version_number % 100);
196 : else
197 0 : snprintf(buf, buflen, "%d.%d", version_number / 10000,
198 0 : (version_number / 100) % 100);
199 : }
200 0 : return buf;
201 : }
202 :
203 :
204 : /*
205 : * Convert a string value to an SQL string literal and append it to
206 : * the given buffer. We assume the specified client_encoding and
207 : * standard_conforming_strings settings.
208 : *
209 : * This is essentially equivalent to libpq's PQescapeStringInternal,
210 : * except for the output buffer structure. We need it in situations
211 : * where we do not have a PGconn available. Where we do,
212 : * appendStringLiteralConn is a better choice.
213 : */
214 : void
215 32890 : appendStringLiteral(PQExpBuffer buf, const char *str,
216 : int encoding, bool std_strings)
217 : {
218 32890 : size_t length = strlen(str);
219 32890 : const char *source = str;
220 : char *target;
221 :
222 32890 : if (!enlargePQExpBuffer(buf, 2 * length + 2))
223 0 : return;
224 :
225 32890 : target = buf->data + buf->len;
226 32890 : *target++ = '\'';
227 :
228 513842 : while (*source != '\0')
229 : {
230 480952 : char c = *source;
231 : int len;
232 : int i;
233 :
234 : /* Fast path for plain ASCII */
235 480952 : if (!IS_HIGHBIT_SET(c))
236 : {
237 : /* Apply quoting if needed */
238 480644 : if (SQL_STR_DOUBLE(c, !std_strings))
239 4102 : *target++ = c;
240 : /* Copy the character */
241 480644 : *target++ = c;
242 480644 : source++;
243 480644 : continue;
244 : }
245 :
246 : /* Slow path for possible multibyte characters */
247 308 : len = PQmblen(source, encoding);
248 :
249 : /* Copy the character */
250 616 : for (i = 0; i < len; i++)
251 : {
252 308 : if (*source == '\0')
253 0 : break;
254 308 : *target++ = *source++;
255 : }
256 :
257 : /*
258 : * If we hit premature end of string (ie, incomplete multibyte
259 : * character), try to pad out to the correct length with spaces. We
260 : * may not be able to pad completely, but we will always be able to
261 : * insert at least one pad space (since we'd not have quoted a
262 : * multibyte character). This should be enough to make a string that
263 : * the server will error out on.
264 : */
265 308 : if (i < len)
266 : {
267 0 : char *stop = buf->data + buf->maxlen - 2;
268 :
269 0 : for (; i < len; i++)
270 : {
271 0 : if (target >= stop)
272 0 : break;
273 0 : *target++ = ' ';
274 : }
275 0 : break;
276 : }
277 : }
278 :
279 : /* Write the terminating quote and NUL character. */
280 32890 : *target++ = '\'';
281 32890 : *target = '\0';
282 :
283 32890 : buf->len = target - buf->data;
284 : }
285 :
286 :
287 : /*
288 : * Convert a string value to an SQL string literal and append it to
289 : * the given buffer. Encoding and string syntax rules are as indicated
290 : * by current settings of the PGconn.
291 : */
292 : void
293 8946 : appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
294 : {
295 8946 : size_t length = strlen(str);
296 :
297 : /*
298 : * XXX This is a kluge to silence escape_string_warning in our utility
299 : * programs. It should go away someday.
300 : */
301 8946 : if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
302 : {
303 : /* ensure we are not adjacent to an identifier */
304 1432 : if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
305 0 : appendPQExpBufferChar(buf, ' ');
306 1432 : appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
307 1432 : appendStringLiteral(buf, str, PQclientEncoding(conn), false);
308 1432 : return;
309 : }
310 : /* XXX end kluge */
311 :
312 7514 : if (!enlargePQExpBuffer(buf, 2 * length + 2))
313 0 : return;
314 7514 : appendPQExpBufferChar(buf, '\'');
315 7514 : buf->len += PQescapeStringConn(conn, buf->data + buf->len,
316 : str, length, NULL);
317 7514 : appendPQExpBufferChar(buf, '\'');
318 : }
319 :
320 :
321 : /*
322 : * Convert a string value to a dollar quoted literal and append it to
323 : * the given buffer. If the dqprefix parameter is not NULL then the
324 : * dollar quote delimiter will begin with that (after the opening $).
325 : *
326 : * No escaping is done at all on str, in compliance with the rules
327 : * for parsing dollar quoted strings. Also, we need not worry about
328 : * encoding issues.
329 : */
330 : void
331 3108 : appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
332 : {
333 : static const char suffixes[] = "_XXXXXXX";
334 3108 : int nextchar = 0;
335 3108 : PQExpBuffer delimBuf = createPQExpBuffer();
336 :
337 : /* start with $ + dqprefix if not NULL */
338 3108 : appendPQExpBufferChar(delimBuf, '$');
339 3108 : if (dqprefix)
340 0 : appendPQExpBufferStr(delimBuf, dqprefix);
341 :
342 : /*
343 : * Make sure we choose a delimiter which (without the trailing $) is not
344 : * present in the string being quoted. We don't check with the trailing $
345 : * because a string ending in $foo must not be quoted with $foo$.
346 : */
347 4130 : while (strstr(str, delimBuf->data) != NULL)
348 : {
349 1022 : appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
350 1022 : nextchar %= sizeof(suffixes) - 1;
351 : }
352 :
353 : /* add trailing $ */
354 3108 : appendPQExpBufferChar(delimBuf, '$');
355 :
356 : /* quote it and we are all done */
357 3108 : appendPQExpBufferStr(buf, delimBuf->data);
358 3108 : appendPQExpBufferStr(buf, str);
359 3108 : appendPQExpBufferStr(buf, delimBuf->data);
360 :
361 3108 : destroyPQExpBuffer(delimBuf);
362 3108 : }
363 :
364 :
365 : /*
366 : * Convert a bytea value (presented as raw bytes) to an SQL string literal
367 : * and append it to the given buffer. We assume the specified
368 : * standard_conforming_strings setting.
369 : *
370 : * This is needed in situations where we do not have a PGconn available.
371 : * Where we do, PQescapeByteaConn is a better choice.
372 : */
373 : void
374 78 : appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
375 : bool std_strings)
376 : {
377 78 : const unsigned char *source = str;
378 : char *target;
379 :
380 : static const char hextbl[] = "0123456789abcdef";
381 :
382 : /*
383 : * This implementation is hard-wired to produce hex-format output. We do
384 : * not know the server version the output will be loaded into, so making
385 : * an intelligent format choice is impossible. It might be better to
386 : * always use the old escaped format.
387 : */
388 78 : if (!enlargePQExpBuffer(buf, 2 * length + 5))
389 0 : return;
390 :
391 78 : target = buf->data + buf->len;
392 78 : *target++ = '\'';
393 78 : if (!std_strings)
394 0 : *target++ = '\\';
395 78 : *target++ = '\\';
396 78 : *target++ = 'x';
397 :
398 8010 : while (length-- > 0)
399 : {
400 7932 : unsigned char c = *source++;
401 :
402 7932 : *target++ = hextbl[(c >> 4) & 0xF];
403 7932 : *target++ = hextbl[c & 0xF];
404 : }
405 :
406 : /* Write the terminating quote and NUL character. */
407 78 : *target++ = '\'';
408 78 : *target = '\0';
409 :
410 78 : buf->len = target - buf->data;
411 : }
412 :
413 :
414 : /*
415 : * Append the given string to the shell command being built in the buffer,
416 : * with shell-style quoting as needed to create exactly one argument.
417 : *
418 : * Forbid LF or CR characters, which have scant practical use beyond designing
419 : * security breaches. The Windows command shell is unusable as a conduit for
420 : * arguments containing LF or CR characters. A future major release should
421 : * reject those characters in CREATE ROLE and CREATE DATABASE, because use
422 : * there eventually leads to errors here.
423 : *
424 : * appendShellString() simply prints an error and dies if LF or CR appears.
425 : * appendShellStringNoError() omits those characters from the result, and
426 : * returns false if there were any.
427 : */
428 : void
429 648 : appendShellString(PQExpBuffer buf, const char *str)
430 : {
431 648 : if (!appendShellStringNoError(buf, str))
432 : {
433 2 : fprintf(stderr,
434 2 : _("shell command argument contains a newline or carriage return: \"%s\"\n"),
435 : str);
436 2 : exit(EXIT_FAILURE);
437 : }
438 646 : }
439 :
440 : bool
441 648 : appendShellStringNoError(PQExpBuffer buf, const char *str)
442 : {
443 : #ifdef WIN32
444 : int backslash_run_length = 0;
445 : #endif
446 648 : bool ok = true;
447 : const char *p;
448 :
449 : /*
450 : * Don't bother with adding quotes if the string is nonempty and clearly
451 : * contains only safe characters.
452 : */
453 648 : if (*str != '\0' &&
454 648 : strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
455 : {
456 524 : appendPQExpBufferStr(buf, str);
457 524 : return ok;
458 : }
459 :
460 : #ifndef WIN32
461 124 : appendPQExpBufferChar(buf, '\'');
462 6088 : for (p = str; *p; p++)
463 : {
464 5964 : if (*p == '\n' || *p == '\r')
465 : {
466 4 : ok = false;
467 4 : continue;
468 : }
469 :
470 5960 : if (*p == '\'')
471 172 : appendPQExpBufferStr(buf, "'\"'\"'");
472 : else
473 5788 : appendPQExpBufferChar(buf, *p);
474 : }
475 124 : appendPQExpBufferChar(buf, '\'');
476 : #else /* WIN32 */
477 :
478 : /*
479 : * A Windows system() argument experiences two layers of interpretation.
480 : * First, cmd.exe interprets the string. Its behavior is undocumented,
481 : * but a caret escapes any byte except LF or CR that would otherwise have
482 : * special meaning. Handling of a caret before LF or CR differs between
483 : * "cmd.exe /c" and other modes, and it is unusable here.
484 : *
485 : * Second, the new process parses its command line to construct argv (see
486 : * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
487 : * backslash-double quote sequences specially.
488 : */
489 : appendPQExpBufferStr(buf, "^\"");
490 : for (p = str; *p; p++)
491 : {
492 : if (*p == '\n' || *p == '\r')
493 : {
494 : ok = false;
495 : continue;
496 : }
497 :
498 : /* Change N backslashes before a double quote to 2N+1 backslashes. */
499 : if (*p == '"')
500 : {
501 : while (backslash_run_length)
502 : {
503 : appendPQExpBufferStr(buf, "^\\");
504 : backslash_run_length--;
505 : }
506 : appendPQExpBufferStr(buf, "^\\");
507 : }
508 : else if (*p == '\\')
509 : backslash_run_length++;
510 : else
511 : backslash_run_length = 0;
512 :
513 : /*
514 : * Decline to caret-escape the most mundane characters, to ease
515 : * debugging and lest we approach the command length limit.
516 : */
517 : if (!((*p >= 'a' && *p <= 'z') ||
518 : (*p >= 'A' && *p <= 'Z') ||
519 : (*p >= '0' && *p <= '9')))
520 : appendPQExpBufferChar(buf, '^');
521 : appendPQExpBufferChar(buf, *p);
522 : }
523 :
524 : /*
525 : * Change N backslashes at end of argument to 2N backslashes, because they
526 : * precede the double quote that terminates the argument.
527 : */
528 : while (backslash_run_length)
529 : {
530 : appendPQExpBufferStr(buf, "^\\");
531 : backslash_run_length--;
532 : }
533 : appendPQExpBufferStr(buf, "^\"");
534 : #endif /* WIN32 */
535 :
536 124 : return ok;
537 : }
538 :
539 :
540 : /*
541 : * Append the given string to the buffer, with suitable quoting for passing
542 : * the string as a value in a keyword/value pair in a libpq connection string.
543 : */
544 : void
545 2386 : appendConnStrVal(PQExpBuffer buf, const char *str)
546 : {
547 : const char *s;
548 : bool needquotes;
549 :
550 : /*
551 : * If the string is one or more plain ASCII characters, no need to quote
552 : * it. This is quite conservative, but better safe than sorry.
553 : */
554 2386 : needquotes = true;
555 16822 : for (s = str; *s; s++)
556 : {
557 15266 : if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
558 2168 : (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
559 : {
560 830 : needquotes = true;
561 830 : break;
562 : }
563 14436 : needquotes = false;
564 : }
565 :
566 2386 : if (needquotes)
567 : {
568 830 : appendPQExpBufferChar(buf, '\'');
569 21388 : while (*str)
570 : {
571 : /* ' and \ must be escaped by to \' and \\ */
572 20558 : if (*str == '\'' || *str == '\\')
573 572 : appendPQExpBufferChar(buf, '\\');
574 :
575 20558 : appendPQExpBufferChar(buf, *str);
576 20558 : str++;
577 : }
578 830 : appendPQExpBufferChar(buf, '\'');
579 : }
580 : else
581 1556 : appendPQExpBufferStr(buf, str);
582 2386 : }
583 :
584 :
585 : /*
586 : * Append a psql meta-command that connects to the given database with the
587 : * then-current connection's user, host and port.
588 : */
589 : void
590 70 : appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
591 : {
592 : const char *s;
593 : bool complex;
594 :
595 : /*
596 : * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
597 : * For other names, even many not technically requiring it, skip to the
598 : * general case. No database has a zero-length name.
599 : */
600 70 : complex = false;
601 :
602 1822 : for (s = dbname; *s; s++)
603 : {
604 1752 : if (*s == '\n' || *s == '\r')
605 : {
606 0 : fprintf(stderr,
607 0 : _("database name contains a newline or carriage return: \"%s\"\n"),
608 : dbname);
609 0 : exit(EXIT_FAILURE);
610 : }
611 :
612 1752 : if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
613 770 : (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
614 : {
615 654 : complex = true;
616 : }
617 : }
618 :
619 70 : appendPQExpBufferStr(buf, "\\connect ");
620 70 : if (complex)
621 : {
622 : PQExpBufferData connstr;
623 :
624 20 : initPQExpBuffer(&connstr);
625 20 : appendPQExpBufferStr(&connstr, "dbname=");
626 20 : appendConnStrVal(&connstr, dbname);
627 :
628 20 : appendPQExpBufferStr(buf, "-reuse-previous=on ");
629 :
630 : /*
631 : * As long as the name does not contain a newline, SQL identifier
632 : * quoting satisfies the psql meta-command parser. Prefer not to
633 : * involve psql-interpreted single quotes, which behaved differently
634 : * before PostgreSQL 9.2.
635 : */
636 20 : appendPQExpBufferStr(buf, fmtId(connstr.data));
637 :
638 20 : termPQExpBuffer(&connstr);
639 : }
640 : else
641 50 : appendPQExpBufferStr(buf, fmtId(dbname));
642 70 : appendPQExpBufferChar(buf, '\n');
643 70 : }
644 :
645 :
646 : /*
647 : * Deconstruct the text representation of a 1-dimensional Postgres array
648 : * into individual items.
649 : *
650 : * On success, returns true and sets *itemarray and *nitems to describe
651 : * an array of individual strings. On parse failure, returns false;
652 : * *itemarray may exist or be NULL.
653 : *
654 : * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
655 : */
656 : bool
657 91950 : parsePGArray(const char *atext, char ***itemarray, int *nitems)
658 : {
659 : int inputlen;
660 : char **items;
661 : char *strings;
662 : int curitem;
663 :
664 : /*
665 : * We expect input in the form of "{item,item,item}" where any item is
666 : * either raw data, or surrounded by double quotes (in which case embedded
667 : * characters including backslashes and quotes are backslashed).
668 : *
669 : * We build the result as an array of pointers followed by the actual
670 : * string data, all in one malloc block for convenience of deallocation.
671 : * The worst-case storage need is not more than one pointer and one
672 : * character for each input character (consider "{,,,,,,,,,,}").
673 : */
674 91950 : *itemarray = NULL;
675 91950 : *nitems = 0;
676 91950 : inputlen = strlen(atext);
677 91950 : if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
678 0 : return false; /* bad input */
679 91950 : items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
680 91950 : if (items == NULL)
681 0 : return false; /* out of memory */
682 91950 : *itemarray = items;
683 91950 : strings = (char *) (items + inputlen);
684 :
685 91950 : atext++; /* advance over initial '{' */
686 91950 : curitem = 0;
687 252014 : while (*atext != '}')
688 : {
689 160064 : if (*atext == '\0')
690 0 : return false; /* premature end of string */
691 160064 : items[curitem] = strings;
692 3243248 : while (*atext != '}' && *atext != ',')
693 : {
694 3083184 : if (*atext == '\0')
695 0 : return false; /* premature end of string */
696 3083184 : if (*atext != '"')
697 3082802 : *strings++ = *atext++; /* copy unquoted data */
698 : else
699 : {
700 : /* process quoted substring */
701 382 : atext++;
702 12958 : while (*atext != '"')
703 : {
704 12576 : if (*atext == '\0')
705 0 : return false; /* premature end of string */
706 12576 : if (*atext == '\\')
707 : {
708 1900 : atext++;
709 1900 : if (*atext == '\0')
710 0 : return false; /* premature end of string */
711 : }
712 12576 : *strings++ = *atext++; /* copy quoted data */
713 : }
714 382 : atext++;
715 : }
716 : }
717 160064 : *strings++ = '\0';
718 160064 : if (*atext == ',')
719 70808 : atext++;
720 160064 : curitem++;
721 : }
722 91950 : if (atext[1] != '\0')
723 0 : return false; /* bogus syntax (embedded '}') */
724 91950 : *nitems = curitem;
725 91950 : return true;
726 : }
727 :
728 :
729 : /*
730 : * Append one element to the text representation of a 1-dimensional Postgres
731 : * array.
732 : *
733 : * The caller must provide the initial '{' and closing '}' of the array.
734 : * This function handles all else, including insertion of commas and
735 : * quoting of values.
736 : *
737 : * We assume that typdelim is ','.
738 : */
739 : void
740 604 : appendPGArray(PQExpBuffer buffer, const char *value)
741 : {
742 : bool needquote;
743 : const char *tmp;
744 :
745 604 : if (buffer->data[buffer->len - 1] != '{')
746 302 : appendPQExpBufferChar(buffer, ',');
747 :
748 : /* Decide if we need quotes; this should match array_out()'s choices. */
749 604 : if (value[0] == '\0')
750 0 : needquote = true; /* force quotes for empty string */
751 604 : else if (pg_strcasecmp(value, "NULL") == 0)
752 0 : needquote = true; /* force quotes for literal NULL */
753 : else
754 604 : needquote = false;
755 :
756 604 : if (!needquote)
757 : {
758 12840 : for (tmp = value; *tmp; tmp++)
759 : {
760 12428 : char ch = *tmp;
761 :
762 12428 : if (ch == '"' || ch == '\\' ||
763 12236 : ch == '{' || ch == '}' || ch == ',' ||
764 : /* these match scanner_isspace(): */
765 12236 : ch == ' ' || ch == '\t' || ch == '\n' ||
766 12236 : ch == '\r' || ch == '\v' || ch == '\f')
767 : {
768 192 : needquote = true;
769 192 : break;
770 : }
771 : }
772 : }
773 :
774 604 : if (needquote)
775 : {
776 192 : appendPQExpBufferChar(buffer, '"');
777 8352 : for (tmp = value; *tmp; tmp++)
778 : {
779 8160 : char ch = *tmp;
780 :
781 8160 : if (ch == '"' || ch == '\\')
782 1440 : appendPQExpBufferChar(buffer, '\\');
783 8160 : appendPQExpBufferChar(buffer, ch);
784 : }
785 192 : appendPQExpBufferChar(buffer, '"');
786 : }
787 : else
788 412 : appendPQExpBufferStr(buffer, value);
789 604 : }
790 :
791 :
792 : /*
793 : * Format a reloptions array and append it to the given buffer.
794 : *
795 : * "prefix" is prepended to the option names; typically it's "" or "toast.".
796 : *
797 : * Returns false if the reloptions array could not be parsed (in which case
798 : * nothing will have been appended to the buffer), or true on success.
799 : *
800 : * Note: this logic should generally match the backend's flatten_reloptions()
801 : * (in adt/ruleutils.c).
802 : */
803 : bool
804 420 : appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
805 : const char *prefix, int encoding, bool std_strings)
806 : {
807 : char **options;
808 : int noptions;
809 : int i;
810 :
811 420 : if (!parsePGArray(reloptions, &options, &noptions))
812 : {
813 0 : free(options);
814 0 : return false;
815 : }
816 :
817 956 : for (i = 0; i < noptions; i++)
818 : {
819 536 : char *option = options[i];
820 : char *name;
821 : char *separator;
822 : char *value;
823 :
824 : /*
825 : * Each array element should have the form name=value. If the "=" is
826 : * missing for some reason, treat it like an empty value.
827 : */
828 536 : name = option;
829 536 : separator = strchr(option, '=');
830 536 : if (separator)
831 : {
832 536 : *separator = '\0';
833 536 : value = separator + 1;
834 : }
835 : else
836 0 : value = "";
837 :
838 536 : if (i > 0)
839 116 : appendPQExpBufferStr(buffer, ", ");
840 536 : appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
841 :
842 : /*
843 : * In general we need to quote the value; but to avoid unnecessary
844 : * clutter, do not quote if it is an identifier that would not need
845 : * quoting. (We could also allow numbers, but that is a bit trickier
846 : * than it looks --- for example, are leading zeroes significant? We
847 : * don't want to assume very much here about what custom reloptions
848 : * might mean.)
849 : */
850 536 : if (strcmp(fmtId(value), value) == 0)
851 64 : appendPQExpBufferStr(buffer, value);
852 : else
853 472 : appendStringLiteral(buffer, value, encoding, std_strings);
854 : }
855 :
856 420 : free(options);
857 :
858 420 : return true;
859 : }
860 :
861 :
862 : /*
863 : * processSQLNamePattern
864 : *
865 : * Scan a wildcard-pattern string and generate appropriate WHERE clauses
866 : * to limit the set of objects returned. The WHERE clauses are appended
867 : * to the already-partially-constructed query in buf. Returns whether
868 : * any clause was added.
869 : *
870 : * conn: connection query will be sent to (consulted for escaping rules).
871 : * buf: output parameter.
872 : * pattern: user-specified pattern option, or NULL if none ("*" is implied).
873 : * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
874 : * onto the existing WHERE clause).
875 : * force_escape: always quote regexp special characters, even outside
876 : * double quotes (else they are quoted only between double quotes).
877 : * schemavar: name of query variable to match against a schema-name pattern.
878 : * Can be NULL if no schema.
879 : * namevar: name of query variable to match against an object-name pattern.
880 : * altnamevar: NULL, or name of an alternative variable to match against name.
881 : * visibilityrule: clause to use if we want to restrict to visible objects
882 : * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
883 : * dbnamebuf: output parameter receiving the database name portion of the
884 : * pattern, if any. Can be NULL.
885 : * dotcnt: how many separators were parsed from the pattern, by reference.
886 : *
887 : * Formatting note: the text already present in buf should end with a newline.
888 : * The appended text, if any, will end with one too.
889 : */
890 : bool
891 6890 : processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
892 : bool have_where, bool force_escape,
893 : const char *schemavar, const char *namevar,
894 : const char *altnamevar, const char *visibilityrule,
895 : PQExpBuffer dbnamebuf, int *dotcnt)
896 : {
897 : PQExpBufferData schemabuf;
898 : PQExpBufferData namebuf;
899 6890 : bool added_clause = false;
900 : int dcnt;
901 :
902 : #define WHEREAND() \
903 : (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
904 : have_where = true, added_clause = true)
905 :
906 6890 : if (dotcnt == NULL)
907 12 : dotcnt = &dcnt;
908 6890 : *dotcnt = 0;
909 6890 : if (pattern == NULL)
910 : {
911 : /* Default: select all visible objects */
912 484 : if (visibilityrule)
913 : {
914 114 : WHEREAND();
915 114 : appendPQExpBuffer(buf, "%s\n", visibilityrule);
916 : }
917 484 : return added_clause;
918 : }
919 :
920 6406 : initPQExpBuffer(&schemabuf);
921 6406 : initPQExpBuffer(&namebuf);
922 :
923 : /*
924 : * Convert shell-style 'pattern' into the regular expression(s) we want to
925 : * execute. Quoting/escaping into SQL literal format will be done below
926 : * using appendStringLiteralConn().
927 : *
928 : * If the caller provided a schemavar, we want to split the pattern on
929 : * ".", otherwise not.
930 : */
931 6406 : patternToSQLRegex(PQclientEncoding(conn),
932 : (schemavar ? dbnamebuf : NULL),
933 : (schemavar ? &schemabuf : NULL),
934 : &namebuf,
935 : pattern, force_escape, true, dotcnt);
936 :
937 : /*
938 : * Now decide what we need to emit. We may run under a hostile
939 : * search_path, so qualify EVERY name. Note there will be a leading "^("
940 : * in the patterns in any case.
941 : *
942 : * We want the regex matches to use the database's default collation where
943 : * collation-sensitive behavior is required (for example, which characters
944 : * match '\w'). That happened by default before PG v12, but if the server
945 : * is >= v12 then we need to force it through explicit COLLATE clauses,
946 : * otherwise the "C" collation attached to "name" catalog columns wins.
947 : */
948 6406 : if (namevar && namebuf.len > 2)
949 : {
950 : /* We have a name pattern, so constrain the namevar(s) */
951 :
952 : /* Optimize away a "*" pattern */
953 6406 : if (strcmp(namebuf.data, "^(.*)$") != 0)
954 : {
955 6322 : WHEREAND();
956 6322 : if (altnamevar)
957 : {
958 228 : appendPQExpBuffer(buf,
959 : "(%s OPERATOR(pg_catalog.~) ", namevar);
960 228 : appendStringLiteralConn(buf, namebuf.data, conn);
961 228 : if (PQserverVersion(conn) >= 120000)
962 228 : appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
963 228 : appendPQExpBuffer(buf,
964 : "\n OR %s OPERATOR(pg_catalog.~) ",
965 : altnamevar);
966 228 : appendStringLiteralConn(buf, namebuf.data, conn);
967 228 : if (PQserverVersion(conn) >= 120000)
968 228 : appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
969 228 : appendPQExpBufferStr(buf, ")\n");
970 : }
971 : else
972 : {
973 6094 : appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
974 6094 : appendStringLiteralConn(buf, namebuf.data, conn);
975 6094 : if (PQserverVersion(conn) >= 120000)
976 6094 : appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
977 6094 : appendPQExpBufferChar(buf, '\n');
978 : }
979 : }
980 : }
981 :
982 6406 : if (schemavar && schemabuf.len > 2)
983 : {
984 : /* We have a schema pattern, so constrain the schemavar */
985 :
986 : /* Optimize away a "*" pattern */
987 1436 : if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
988 : {
989 1430 : WHEREAND();
990 1430 : appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
991 1430 : appendStringLiteralConn(buf, schemabuf.data, conn);
992 1430 : if (PQserverVersion(conn) >= 120000)
993 1430 : appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
994 1430 : appendPQExpBufferChar(buf, '\n');
995 : }
996 : }
997 : else
998 : {
999 : /* No schema pattern given, so select only visible objects */
1000 4970 : if (visibilityrule)
1001 : {
1002 3942 : WHEREAND();
1003 3942 : appendPQExpBuffer(buf, "%s\n", visibilityrule);
1004 : }
1005 : }
1006 :
1007 6406 : termPQExpBuffer(&schemabuf);
1008 6406 : termPQExpBuffer(&namebuf);
1009 :
1010 6406 : return added_clause;
1011 : #undef WHEREAND
1012 : }
1013 :
1014 : /*
1015 : * Transform a possibly qualified shell-style object name pattern into up to
1016 : * three SQL-style regular expressions, converting quotes, lower-casing
1017 : * unquoted letters, and adjusting shell-style wildcard characters into regexp
1018 : * notation.
1019 : *
1020 : * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
1021 : * contains two or more dbname/schema/name separators, we parse the portions of
1022 : * the pattern prior to the first and second separators into dbnamebuf and
1023 : * schemabuf, and the rest into namebuf.
1024 : *
1025 : * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
1026 : * least one separator, we parse the first portion into schemabuf and the rest
1027 : * into namebuf.
1028 : *
1029 : * Otherwise, we parse all the pattern into namebuf.
1030 : *
1031 : * If the pattern contains more dotted parts than buffers to parse into, the
1032 : * extra dots will be treated as literal characters and written into the
1033 : * namebuf, though they will be counted. Callers should always check the value
1034 : * returned by reference in dotcnt and handle this error case appropriately.
1035 : *
1036 : * We surround the regexps with "^(...)$" to force them to match whole strings,
1037 : * as per SQL practice. We have to have parens in case strings contain "|",
1038 : * else the "^" and "$" will be bound into the first and last alternatives
1039 : * which is not what we want. Whether this is done for dbnamebuf is controlled
1040 : * by the want_literal_dbname parameter.
1041 : *
1042 : * The regexps we parse into the buffers are appended to the data (if any)
1043 : * already present. If we parse fewer fields than the number of buffers we
1044 : * were given, the extra buffers are unaltered.
1045 : *
1046 : * encoding: the character encoding for the given pattern
1047 : * dbnamebuf: output parameter receiving the database name portion of the
1048 : * pattern, if any. Can be NULL.
1049 : * schemabuf: output parameter receiving the schema name portion of the
1050 : * pattern, if any. Can be NULL.
1051 : * namebuf: output parameter receiving the database name portion of the
1052 : * pattern, if any. Can be NULL.
1053 : * pattern: user-specified pattern option, or NULL if none ("*" is implied).
1054 : * force_escape: always quote regexp special characters, even outside
1055 : * double quotes (else they are quoted only between double quotes).
1056 : * want_literal_dbname: if true, regexp special characters within the database
1057 : * name portion of the pattern will not be escaped, nor will the dbname be
1058 : * converted into a regular expression.
1059 : * dotcnt: output parameter receiving the number of separators parsed from the
1060 : * pattern.
1061 : */
1062 : void
1063 6610 : patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
1064 : PQExpBuffer namebuf, const char *pattern, bool force_escape,
1065 : bool want_literal_dbname, int *dotcnt)
1066 : {
1067 : PQExpBufferData buf[3];
1068 : PQExpBufferData left_literal;
1069 : PQExpBuffer curbuf;
1070 : PQExpBuffer maxbuf;
1071 : int i;
1072 : bool inquotes;
1073 : bool left;
1074 : const char *cp;
1075 :
1076 : Assert(pattern != NULL);
1077 : Assert(namebuf != NULL);
1078 :
1079 : /* callers should never expect "dbname.relname" format */
1080 : Assert(dbnamebuf == NULL || schemabuf != NULL);
1081 : Assert(dotcnt != NULL);
1082 :
1083 6610 : *dotcnt = 0;
1084 6610 : inquotes = false;
1085 6610 : cp = pattern;
1086 :
1087 6610 : if (dbnamebuf != NULL)
1088 5468 : maxbuf = &buf[2];
1089 1142 : else if (schemabuf != NULL)
1090 58 : maxbuf = &buf[1];
1091 : else
1092 1084 : maxbuf = &buf[0];
1093 :
1094 6610 : curbuf = &buf[0];
1095 6610 : if (want_literal_dbname)
1096 : {
1097 6406 : left = true;
1098 6406 : initPQExpBuffer(&left_literal);
1099 : }
1100 : else
1101 204 : left = false;
1102 6610 : initPQExpBuffer(curbuf);
1103 6610 : appendPQExpBufferStr(curbuf, "^(");
1104 118764 : while (*cp)
1105 : {
1106 112154 : char ch = *cp;
1107 :
1108 112154 : if (ch == '"')
1109 : {
1110 3214 : if (inquotes && cp[1] == '"')
1111 : {
1112 : /* emit one quote, stay in inquotes mode */
1113 6 : appendPQExpBufferChar(curbuf, '"');
1114 6 : if (left)
1115 6 : appendPQExpBufferChar(&left_literal, '"');
1116 6 : cp++;
1117 : }
1118 : else
1119 3208 : inquotes = !inquotes;
1120 3214 : cp++;
1121 : }
1122 108940 : else if (!inquotes && isupper((unsigned char) ch))
1123 : {
1124 240 : appendPQExpBufferChar(curbuf,
1125 240 : pg_tolower((unsigned char) ch));
1126 240 : if (left)
1127 150 : appendPQExpBufferChar(&left_literal,
1128 150 : pg_tolower((unsigned char) ch));
1129 240 : cp++;
1130 : }
1131 108700 : else if (!inquotes && ch == '*')
1132 : {
1133 410 : appendPQExpBufferStr(curbuf, ".*");
1134 410 : if (left)
1135 304 : appendPQExpBufferChar(&left_literal, '*');
1136 410 : cp++;
1137 : }
1138 108290 : else if (!inquotes && ch == '?')
1139 : {
1140 6 : appendPQExpBufferChar(curbuf, '.');
1141 6 : if (left)
1142 6 : appendPQExpBufferChar(&left_literal, '?');
1143 6 : cp++;
1144 : }
1145 108284 : else if (!inquotes && ch == '.')
1146 : {
1147 2826 : left = false;
1148 2826 : if (dotcnt)
1149 2826 : (*dotcnt)++;
1150 2826 : if (curbuf < maxbuf)
1151 : {
1152 2242 : appendPQExpBufferStr(curbuf, ")$");
1153 2242 : curbuf++;
1154 2242 : initPQExpBuffer(curbuf);
1155 2242 : appendPQExpBufferStr(curbuf, "^(");
1156 2242 : cp++;
1157 : }
1158 : else
1159 584 : appendPQExpBufferChar(curbuf, *cp++);
1160 : }
1161 105458 : else if (ch == '$')
1162 : {
1163 : /*
1164 : * Dollar is always quoted, whether inside quotes or not. The
1165 : * reason is that it's allowed in SQL identifiers, so there's a
1166 : * significant use-case for treating it literally, while because
1167 : * we anchor the pattern automatically there is no use-case for
1168 : * having it possess its regexp meaning.
1169 : */
1170 12 : appendPQExpBufferStr(curbuf, "\\$");
1171 12 : if (left)
1172 12 : appendPQExpBufferChar(&left_literal, '$');
1173 12 : cp++;
1174 : }
1175 : else
1176 : {
1177 : /*
1178 : * Ordinary data character, transfer to pattern
1179 : *
1180 : * Inside double quotes, or at all times if force_escape is true,
1181 : * quote regexp special characters with a backslash to avoid
1182 : * regexp errors. Outside quotes, however, let them pass through
1183 : * as-is; this lets knowledgeable users build regexp expressions
1184 : * that are more powerful than shell-style patterns.
1185 : *
1186 : * As an exception to that, though, always quote "[]", as that's
1187 : * much more likely to be an attempt to write an array type name
1188 : * than it is to be the start of a regexp bracket expression.
1189 : */
1190 105446 : if ((inquotes || force_escape) &&
1191 29992 : strchr("|*+?()[]{}.^$\\", ch))
1192 3874 : appendPQExpBufferChar(curbuf, '\\');
1193 101572 : else if (ch == '[' && cp[1] == ']')
1194 6 : appendPQExpBufferChar(curbuf, '\\');
1195 105446 : i = PQmblenBounded(cp, encoding);
1196 210892 : while (i--)
1197 : {
1198 105446 : if (left)
1199 72018 : appendPQExpBufferChar(&left_literal, *cp);
1200 105446 : appendPQExpBufferChar(curbuf, *cp++);
1201 : }
1202 : }
1203 : }
1204 6610 : appendPQExpBufferStr(curbuf, ")$");
1205 :
1206 6610 : if (namebuf)
1207 : {
1208 6610 : appendPQExpBufferStr(namebuf, curbuf->data);
1209 6610 : termPQExpBuffer(curbuf);
1210 6610 : curbuf--;
1211 : }
1212 :
1213 6610 : if (schemabuf && curbuf >= buf)
1214 : {
1215 1488 : appendPQExpBufferStr(schemabuf, curbuf->data);
1216 1488 : termPQExpBuffer(curbuf);
1217 1488 : curbuf--;
1218 : }
1219 :
1220 6610 : if (dbnamebuf && curbuf >= buf)
1221 : {
1222 754 : if (want_literal_dbname)
1223 720 : appendPQExpBufferStr(dbnamebuf, left_literal.data);
1224 : else
1225 34 : appendPQExpBufferStr(dbnamebuf, curbuf->data);
1226 754 : termPQExpBuffer(curbuf);
1227 : }
1228 :
1229 6610 : if (want_literal_dbname)
1230 6406 : termPQExpBuffer(&left_literal);
1231 6610 : }
|