LCOV - code coverage report
Current view: top level - src/fe_utils - string_utils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 405 438 92.5 %
Date: 2025-02-21 17:14:59 Functions: 20 21 95.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * String-processing utility routines for frontend code
       4             :  *
       5             :  * Assorted utility functions that are useful in constructing SQL queries
       6             :  * and interpreting backend output.
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * src/fe_utils/string_utils.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres_fe.h"
      17             : 
      18             : #include <ctype.h>
      19             : 
      20             : #include "common/keywords.h"
      21             : #include "fe_utils/string_utils.h"
      22             : #include "mb/pg_wchar.h"
      23             : 
      24             : static PQExpBuffer defaultGetLocalPQExpBuffer(void);
      25             : 
      26             : /* Globals exported by this file */
      27             : int         quote_all_identifiers = 0;
      28             : PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
      29             : 
      30             : static int  fmtIdEncoding = -1;
      31             : 
      32             : 
      33             : /*
      34             :  * Returns a temporary PQExpBuffer, valid until the next call to the function.
      35             :  * This is used by fmtId and fmtQualifiedId.
      36             :  *
      37             :  * Non-reentrant and non-thread-safe but reduces memory leakage. You can
      38             :  * replace this with a custom version by setting the getLocalPQExpBuffer
      39             :  * function pointer.
      40             :  */
      41             : static PQExpBuffer
      42      671686 : defaultGetLocalPQExpBuffer(void)
      43             : {
      44             :     static PQExpBuffer id_return = NULL;
      45             : 
      46      671686 :     if (id_return)              /* first time through? */
      47             :     {
      48             :         /* same buffer, just wipe contents */
      49      670930 :         resetPQExpBuffer(id_return);
      50             :     }
      51             :     else
      52             :     {
      53             :         /* new buffer */
      54         756 :         id_return = createPQExpBuffer();
      55             :     }
      56             : 
      57      671686 :     return id_return;
      58             : }
      59             : 
      60             : /*
      61             :  * Set the encoding that fmtId() and fmtQualifiedId() use.
      62             :  *
      63             :  * This is not safe against multiple connections having different encodings,
      64             :  * but there is no real other way to address the need to know the encoding for
      65             :  * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
      66             :  * rid of fmtId().
      67             :  */
      68             : void
      69       19872 : setFmtEncoding(int encoding)
      70             : {
      71       19872 :     fmtIdEncoding = encoding;
      72       19872 : }
      73             : 
      74             : /*
      75             :  * Return the currently configured encoding for fmtId() and fmtQualifiedId().
      76             :  */
      77             : static int
      78      462030 : getFmtEncoding(void)
      79             : {
      80      462030 :     if (fmtIdEncoding != -1)
      81      462030 :         return fmtIdEncoding;
      82             : 
      83             :     /*
      84             :      * In assertion builds it seems best to fail hard if the encoding was not
      85             :      * set, to make it easier to find places with missing calls. But in
      86             :      * production builds that seems like a bad idea, thus we instead just
      87             :      * default to UTF-8.
      88             :      */
      89             :     Assert(fmtIdEncoding != -1);
      90             : 
      91           0 :     return PG_UTF8;
      92             : }
      93             : 
      94             : /*
      95             :  *  Quotes input string if it's not a legitimate SQL identifier as-is.
      96             :  *
      97             :  *  Note that the returned string must be used before calling fmtIdEnc again,
      98             :  *  since we re-use the same return buffer each time.
      99             :  */
     100             : const char *
     101      570406 : fmtIdEnc(const char *rawid, int encoding)
     102             : {
     103      570406 :     PQExpBuffer id_return = getLocalPQExpBuffer();
     104             : 
     105             :     const char *cp;
     106      570406 :     bool        need_quotes = false;
     107      570406 :     size_t      remaining = strlen(rawid);
     108             : 
     109             :     /*
     110             :      * These checks need to match the identifier production in scan.l. Don't
     111             :      * use islower() etc.
     112             :      */
     113      570406 :     if (quote_all_identifiers)
     114       35050 :         need_quotes = true;
     115             :     /* slightly different rules for first character */
     116      535356 :     else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
     117        1286 :         need_quotes = true;
     118             :     else
     119             :     {
     120             :         /* otherwise check the entire string */
     121      534070 :         cp = rawid;
     122     5850366 :         for (size_t i = 0; i < remaining; i++, cp++)
     123             :         {
     124     5335736 :             if (!((*cp >= 'a' && *cp <= 'z')
     125      720572 :                   || (*cp >= '0' && *cp <= '9')
     126      491556 :                   || (*cp == '_')))
     127             :             {
     128       19440 :                 need_quotes = true;
     129       19440 :                 break;
     130             :             }
     131             :         }
     132             :     }
     133             : 
     134      570406 :     if (!need_quotes)
     135             :     {
     136             :         /*
     137             :          * Check for keyword.  We quote keywords except for unreserved ones.
     138             :          * (In some cases we could avoid quoting a col_name or type_func_name
     139             :          * keyword, but it seems much harder than it's worth to tell that.)
     140             :          *
     141             :          * Note: ScanKeywordLookup() does case-insensitive comparison, but
     142             :          * that's fine, since we already know we have all-lower-case.
     143             :          */
     144      514630 :         int         kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
     145             : 
     146      514630 :         if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
     147        1440 :             need_quotes = true;
     148             :     }
     149             : 
     150      570406 :     if (!need_quotes)
     151             :     {
     152             :         /* no quoting needed */
     153      513190 :         appendPQExpBufferStr(id_return, rawid);
     154             :     }
     155             :     else
     156             :     {
     157       57216 :         appendPQExpBufferChar(id_return, '"');
     158             : 
     159       57216 :         cp = &rawid[0];
     160      635010 :         while (remaining > 0)
     161             :         {
     162             :             int         charlen;
     163             : 
     164             :             /* Fast path for plain ASCII */
     165      577794 :             if (!IS_HIGHBIT_SET(*cp))
     166             :             {
     167             :                 /*
     168             :                  * Did we find a double-quote in the string? Then make this a
     169             :                  * double double-quote per SQL99. Before, we put in a
     170             :                  * backslash/double-quote pair. - thomas 2000-08-05
     171             :                  */
     172      572320 :                 if (*cp == '"')
     173         420 :                     appendPQExpBufferChar(id_return, '"');
     174      572320 :                 appendPQExpBufferChar(id_return, *cp);
     175      572320 :                 remaining--;
     176      572320 :                 cp++;
     177      572320 :                 continue;
     178             :             }
     179             : 
     180             :             /* Slow path for possible multibyte characters */
     181        5474 :             charlen = pg_encoding_mblen(encoding, cp);
     182             : 
     183       10904 :             if (remaining < charlen ||
     184        5430 :                 pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
     185             :             {
     186             :                 /*
     187             :                  * Multibyte character is invalid.  It's important to verify
     188             :                  * that as invalid multibyte characters could e.g. be used to
     189             :                  * "skip" over quote characters, e.g. when parsing
     190             :                  * character-by-character.
     191             :                  *
     192             :                  * Replace the character's first byte with an invalid
     193             :                  * sequence. The invalid sequence ensures that the escaped
     194             :                  * string will trigger an error on the server-side, even if we
     195             :                  * can't directly report an error here.
     196             :                  *
     197             :                  * It would be a bit faster to verify the whole string the
     198             :                  * first time we encounter a set highbit, but this way we can
     199             :                  * replace just the invalid data, which probably makes it
     200             :                  * easier for users to find the invalidly encoded portion of a
     201             :                  * larger string.
     202             :                  */
     203          70 :                 if (enlargePQExpBuffer(id_return, 2))
     204             :                 {
     205          70 :                     pg_encoding_set_invalid(encoding,
     206          70 :                                             id_return->data + id_return->len);
     207          70 :                     id_return->len += 2;
     208          70 :                     id_return->data[id_return->len] = '\0';
     209             :                 }
     210             : 
     211             :                 /*
     212             :                  * Handle the following bytes as if this byte didn't exist.
     213             :                  * That's safer in case the subsequent bytes contain
     214             :                  * characters that are significant for the caller (e.g. '>' in
     215             :                  * html).
     216             :                  */
     217          70 :                 remaining--;
     218          70 :                 cp++;
     219             :             }
     220             :             else
     221             :             {
     222       10834 :                 for (int i = 0; i < charlen; i++)
     223             :                 {
     224        5430 :                     appendPQExpBufferChar(id_return, *cp);
     225        5430 :                     remaining--;
     226        5430 :                     cp++;
     227             :                 }
     228             :             }
     229             :         }
     230             : 
     231       57216 :         appendPQExpBufferChar(id_return, '"');
     232             :     }
     233             : 
     234      570406 :     return id_return->data;
     235             : }
     236             : 
     237             : /*
     238             :  *  Quotes input string if it's not a legitimate SQL identifier as-is.
     239             :  *
     240             :  *  Note that the returned string must be used before calling fmtId again,
     241             :  *  since we re-use the same return buffer each time.
     242             :  *
     243             :  *  NB: This assumes setFmtEncoding() previously has been called to configure
     244             :  *  the encoding of rawid. It is preferable to use fmtIdEnc() with an
     245             :  *  explicit encoding.
     246             :  */
     247             : const char *
     248      367700 : fmtId(const char *rawid)
     249             : {
     250      367700 :     return fmtIdEnc(rawid, getFmtEncoding());
     251             : }
     252             : 
     253             : /*
     254             :  * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
     255             :  * needed.
     256             :  *
     257             :  * Like fmtId, use the result before calling again.
     258             :  *
     259             :  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
     260             :  * use that buffer until we're finished with calling fmtId().
     261             :  */
     262             : const char *
     263      101280 : fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
     264             : {
     265             :     PQExpBuffer id_return;
     266      101280 :     PQExpBuffer lcl_pqexp = createPQExpBuffer();
     267             : 
     268             :     /* Some callers might fail to provide a schema name */
     269      101280 :     if (schema && *schema)
     270             :     {
     271      101280 :         appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
     272             :     }
     273      101280 :     appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
     274             : 
     275      101280 :     id_return = getLocalPQExpBuffer();
     276             : 
     277      101280 :     appendPQExpBufferStr(id_return, lcl_pqexp->data);
     278      101280 :     destroyPQExpBuffer(lcl_pqexp);
     279             : 
     280      101280 :     return id_return->data;
     281             : }
     282             : 
     283             : /*
     284             :  * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
     285             :  *
     286             :  * Like fmtId, use the result before calling again.
     287             :  *
     288             :  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
     289             :  * use that buffer until we're finished with calling fmtId().
     290             :  *
     291             :  * NB: This assumes setFmtEncoding() previously has been called to configure
     292             :  * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
     293             :  * with an explicit encoding.
     294             :  */
     295             : const char *
     296       94330 : fmtQualifiedId(const char *schema, const char *id)
     297             : {
     298       94330 :     return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
     299             : }
     300             : 
     301             : 
     302             : /*
     303             :  * Format a Postgres version number (in the PG_VERSION_NUM integer format
     304             :  * returned by PQserverVersion()) as a string.  This exists mainly to
     305             :  * encapsulate knowledge about two-part vs. three-part version numbers.
     306             :  *
     307             :  * For reentrancy, caller must supply the buffer the string is put in.
     308             :  * Recommended size of the buffer is 32 bytes.
     309             :  *
     310             :  * Returns address of 'buf', as a notational convenience.
     311             :  */
     312             : char *
     313           0 : formatPGVersionNumber(int version_number, bool include_minor,
     314             :                       char *buf, size_t buflen)
     315             : {
     316           0 :     if (version_number >= 100000)
     317             :     {
     318             :         /* New two-part style */
     319           0 :         if (include_minor)
     320           0 :             snprintf(buf, buflen, "%d.%d", version_number / 10000,
     321             :                      version_number % 10000);
     322             :         else
     323           0 :             snprintf(buf, buflen, "%d", version_number / 10000);
     324             :     }
     325             :     else
     326             :     {
     327             :         /* Old three-part style */
     328           0 :         if (include_minor)
     329           0 :             snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
     330           0 :                      (version_number / 100) % 100,
     331             :                      version_number % 100);
     332             :         else
     333           0 :             snprintf(buf, buflen, "%d.%d", version_number / 10000,
     334           0 :                      (version_number / 100) % 100);
     335             :     }
     336           0 :     return buf;
     337             : }
     338             : 
     339             : 
     340             : /*
     341             :  * Convert a string value to an SQL string literal and append it to
     342             :  * the given buffer.  We assume the specified client_encoding and
     343             :  * standard_conforming_strings settings.
     344             :  *
     345             :  * This is essentially equivalent to libpq's PQescapeStringInternal,
     346             :  * except for the output buffer structure.  We need it in situations
     347             :  * where we do not have a PGconn available.  Where we do,
     348             :  * appendStringLiteralConn is a better choice.
     349             :  */
     350             : void
     351      319794 : appendStringLiteral(PQExpBuffer buf, const char *str,
     352             :                     int encoding, bool std_strings)
     353             : {
     354      319794 :     size_t      length = strlen(str);
     355      319794 :     const char *source = str;
     356             :     char       *target;
     357      319794 :     size_t      remaining = length;
     358             : 
     359      319794 :     if (!enlargePQExpBuffer(buf, 2 * length + 2))
     360           0 :         return;
     361             : 
     362      319794 :     target = buf->data + buf->len;
     363      319794 :     *target++ = '\'';
     364             : 
     365     5672610 :     while (remaining > 0)
     366             :     {
     367     5352816 :         char        c = *source;
     368             :         int         charlen;
     369             :         int         i;
     370             : 
     371             :         /* Fast path for plain ASCII */
     372     5352816 :         if (!IS_HIGHBIT_SET(c))
     373             :         {
     374             :             /* Apply quoting if needed */
     375     5352410 :             if (SQL_STR_DOUBLE(c, !std_strings))
     376        4692 :                 *target++ = c;
     377             :             /* Copy the character */
     378     5352410 :             *target++ = c;
     379     5352410 :             source++;
     380     5352410 :             remaining--;
     381     5352410 :             continue;
     382             :         }
     383             : 
     384             :         /* Slow path for possible multibyte characters */
     385         406 :         charlen = PQmblen(source, encoding);
     386             : 
     387         768 :         if (remaining < charlen ||
     388         362 :             pg_encoding_verifymbchar(encoding, source, charlen) == -1)
     389             :         {
     390             :             /*
     391             :              * Multibyte character is invalid.  It's important to verify that
     392             :              * as invalid multibyte characters could e.g. be used to "skip"
     393             :              * over quote characters, e.g. when parsing
     394             :              * character-by-character.
     395             :              *
     396             :              * Replace the character's first byte with an invalid sequence.
     397             :              * The invalid sequence ensures that the escaped string will
     398             :              * trigger an error on the server-side, even if we can't directly
     399             :              * report an error here.
     400             :              *
     401             :              * We know there's enough space for the invalid sequence because
     402             :              * the "target" buffer is 2 * length + 2 long, and at worst we're
     403             :              * replacing a single input byte with two invalid bytes.
     404             :              *
     405             :              * It would be a bit faster to verify the whole string the first
     406             :              * time we encounter a set highbit, but this way we can replace
     407             :              * just the invalid data, which probably makes it easier for users
     408             :              * to find the invalidly encoded portion of a larger string.
     409             :              */
     410          70 :             pg_encoding_set_invalid(encoding, target);
     411          70 :             target += 2;
     412             : 
     413             :             /*
     414             :              * Handle the following bytes as if this byte didn't exist. That's
     415             :              * safer in case the subsequent bytes contain important characters
     416             :              * for the caller (e.g. '>' in html).
     417             :              */
     418          70 :             source++;
     419          70 :             remaining--;
     420             :         }
     421             :         else
     422             :         {
     423             :             /* Copy the character */
     424         698 :             for (i = 0; i < charlen; i++)
     425             :             {
     426         362 :                 *target++ = *source++;
     427         362 :                 remaining--;
     428             :             }
     429             :         }
     430             :     }
     431             : 
     432             :     /* Write the terminating quote and NUL character. */
     433      319794 :     *target++ = '\'';
     434      319794 :     *target = '\0';
     435             : 
     436      319794 :     buf->len = target - buf->data;
     437             : }
     438             : 
     439             : 
     440             : /*
     441             :  * Convert a string value to an SQL string literal and append it to
     442             :  * the given buffer.  Encoding and string syntax rules are as indicated
     443             :  * by current settings of the PGconn.
     444             :  */
     445             : void
     446        9192 : appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
     447             : {
     448        9192 :     size_t      length = strlen(str);
     449             : 
     450             :     /*
     451             :      * XXX This is a kluge to silence escape_string_warning in our utility
     452             :      * programs.  It should go away someday.
     453             :      */
     454        9192 :     if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
     455             :     {
     456             :         /* ensure we are not adjacent to an identifier */
     457        1432 :         if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
     458           0 :             appendPQExpBufferChar(buf, ' ');
     459        1432 :         appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
     460        1432 :         appendStringLiteral(buf, str, PQclientEncoding(conn), false);
     461        1432 :         return;
     462             :     }
     463             :     /* XXX end kluge */
     464             : 
     465        7760 :     if (!enlargePQExpBuffer(buf, 2 * length + 2))
     466           0 :         return;
     467        7760 :     appendPQExpBufferChar(buf, '\'');
     468        7760 :     buf->len += PQescapeStringConn(conn, buf->data + buf->len,
     469             :                                    str, length, NULL);
     470        7760 :     appendPQExpBufferChar(buf, '\'');
     471             : }
     472             : 
     473             : 
     474             : /*
     475             :  * Convert a string value to a dollar quoted literal and append it to
     476             :  * the given buffer. If the dqprefix parameter is not NULL then the
     477             :  * dollar quote delimiter will begin with that (after the opening $).
     478             :  *
     479             :  * No escaping is done at all on str, in compliance with the rules
     480             :  * for parsing dollar quoted strings.  Also, we need not worry about
     481             :  * encoding issues.
     482             :  */
     483             : void
     484        3174 : appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
     485             : {
     486             :     static const char suffixes[] = "_XXXXXXX";
     487        3174 :     int         nextchar = 0;
     488        3174 :     PQExpBuffer delimBuf = createPQExpBuffer();
     489             : 
     490             :     /* start with $ + dqprefix if not NULL */
     491        3174 :     appendPQExpBufferChar(delimBuf, '$');
     492        3174 :     if (dqprefix)
     493           0 :         appendPQExpBufferStr(delimBuf, dqprefix);
     494             : 
     495             :     /*
     496             :      * Make sure we choose a delimiter which (without the trailing $) is not
     497             :      * present in the string being quoted. We don't check with the trailing $
     498             :      * because a string ending in $foo must not be quoted with $foo$.
     499             :      */
     500        4196 :     while (strstr(str, delimBuf->data) != NULL)
     501             :     {
     502        1022 :         appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
     503        1022 :         nextchar %= sizeof(suffixes) - 1;
     504             :     }
     505             : 
     506             :     /* add trailing $ */
     507        3174 :     appendPQExpBufferChar(delimBuf, '$');
     508             : 
     509             :     /* quote it and we are all done */
     510        3174 :     appendPQExpBufferStr(buf, delimBuf->data);
     511        3174 :     appendPQExpBufferStr(buf, str);
     512        3174 :     appendPQExpBufferStr(buf, delimBuf->data);
     513             : 
     514        3174 :     destroyPQExpBuffer(delimBuf);
     515        3174 : }
     516             : 
     517             : 
     518             : /*
     519             :  * Convert a bytea value (presented as raw bytes) to an SQL string literal
     520             :  * and append it to the given buffer.  We assume the specified
     521             :  * standard_conforming_strings setting.
     522             :  *
     523             :  * This is needed in situations where we do not have a PGconn available.
     524             :  * Where we do, PQescapeByteaConn is a better choice.
     525             :  */
     526             : void
     527          82 : appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
     528             :                    bool std_strings)
     529             : {
     530          82 :     const unsigned char *source = str;
     531             :     char       *target;
     532             : 
     533             :     static const char hextbl[] = "0123456789abcdef";
     534             : 
     535             :     /*
     536             :      * This implementation is hard-wired to produce hex-format output. We do
     537             :      * not know the server version the output will be loaded into, so making
     538             :      * an intelligent format choice is impossible.  It might be better to
     539             :      * always use the old escaped format.
     540             :      */
     541          82 :     if (!enlargePQExpBuffer(buf, 2 * length + 5))
     542           0 :         return;
     543             : 
     544          82 :     target = buf->data + buf->len;
     545          82 :     *target++ = '\'';
     546          82 :     if (!std_strings)
     547           0 :         *target++ = '\\';
     548          82 :     *target++ = '\\';
     549          82 :     *target++ = 'x';
     550             : 
     551        8086 :     while (length-- > 0)
     552             :     {
     553        8004 :         unsigned char c = *source++;
     554             : 
     555        8004 :         *target++ = hextbl[(c >> 4) & 0xF];
     556        8004 :         *target++ = hextbl[c & 0xF];
     557             :     }
     558             : 
     559             :     /* Write the terminating quote and NUL character. */
     560          82 :     *target++ = '\'';
     561          82 :     *target = '\0';
     562             : 
     563          82 :     buf->len = target - buf->data;
     564             : }
     565             : 
     566             : 
     567             : /*
     568             :  * Append the given string to the shell command being built in the buffer,
     569             :  * with shell-style quoting as needed to create exactly one argument.
     570             :  *
     571             :  * Forbid LF or CR characters, which have scant practical use beyond designing
     572             :  * security breaches.  The Windows command shell is unusable as a conduit for
     573             :  * arguments containing LF or CR characters.  A future major release should
     574             :  * reject those characters in CREATE ROLE and CREATE DATABASE, because use
     575             :  * there eventually leads to errors here.
     576             :  *
     577             :  * appendShellString() simply prints an error and dies if LF or CR appears.
     578             :  * appendShellStringNoError() omits those characters from the result, and
     579             :  * returns false if there were any.
     580             :  */
     581             : void
     582         648 : appendShellString(PQExpBuffer buf, const char *str)
     583             : {
     584         648 :     if (!appendShellStringNoError(buf, str))
     585             :     {
     586           2 :         fprintf(stderr,
     587           2 :                 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
     588             :                 str);
     589           2 :         exit(EXIT_FAILURE);
     590             :     }
     591         646 : }
     592             : 
     593             : bool
     594         648 : appendShellStringNoError(PQExpBuffer buf, const char *str)
     595             : {
     596             : #ifdef WIN32
     597             :     int         backslash_run_length = 0;
     598             : #endif
     599         648 :     bool        ok = true;
     600             :     const char *p;
     601             : 
     602             :     /*
     603             :      * Don't bother with adding quotes if the string is nonempty and clearly
     604             :      * contains only safe characters.
     605             :      */
     606         648 :     if (*str != '\0' &&
     607         648 :         strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
     608             :     {
     609         524 :         appendPQExpBufferStr(buf, str);
     610         524 :         return ok;
     611             :     }
     612             : 
     613             : #ifndef WIN32
     614         124 :     appendPQExpBufferChar(buf, '\'');
     615        6088 :     for (p = str; *p; p++)
     616             :     {
     617        5964 :         if (*p == '\n' || *p == '\r')
     618             :         {
     619           4 :             ok = false;
     620           4 :             continue;
     621             :         }
     622             : 
     623        5960 :         if (*p == '\'')
     624         172 :             appendPQExpBufferStr(buf, "'\"'\"'");
     625             :         else
     626        5788 :             appendPQExpBufferChar(buf, *p);
     627             :     }
     628         124 :     appendPQExpBufferChar(buf, '\'');
     629             : #else                           /* WIN32 */
     630             : 
     631             :     /*
     632             :      * A Windows system() argument experiences two layers of interpretation.
     633             :      * First, cmd.exe interprets the string.  Its behavior is undocumented,
     634             :      * but a caret escapes any byte except LF or CR that would otherwise have
     635             :      * special meaning.  Handling of a caret before LF or CR differs between
     636             :      * "cmd.exe /c" and other modes, and it is unusable here.
     637             :      *
     638             :      * Second, the new process parses its command line to construct argv (see
     639             :      * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx).  This treats
     640             :      * backslash-double quote sequences specially.
     641             :      */
     642             :     appendPQExpBufferStr(buf, "^\"");
     643             :     for (p = str; *p; p++)
     644             :     {
     645             :         if (*p == '\n' || *p == '\r')
     646             :         {
     647             :             ok = false;
     648             :             continue;
     649             :         }
     650             : 
     651             :         /* Change N backslashes before a double quote to 2N+1 backslashes. */
     652             :         if (*p == '"')
     653             :         {
     654             :             while (backslash_run_length)
     655             :             {
     656             :                 appendPQExpBufferStr(buf, "^\\");
     657             :                 backslash_run_length--;
     658             :             }
     659             :             appendPQExpBufferStr(buf, "^\\");
     660             :         }
     661             :         else if (*p == '\\')
     662             :             backslash_run_length++;
     663             :         else
     664             :             backslash_run_length = 0;
     665             : 
     666             :         /*
     667             :          * Decline to caret-escape the most mundane characters, to ease
     668             :          * debugging and lest we approach the command length limit.
     669             :          */
     670             :         if (!((*p >= 'a' && *p <= 'z') ||
     671             :               (*p >= 'A' && *p <= 'Z') ||
     672             :               (*p >= '0' && *p <= '9')))
     673             :             appendPQExpBufferChar(buf, '^');
     674             :         appendPQExpBufferChar(buf, *p);
     675             :     }
     676             : 
     677             :     /*
     678             :      * Change N backslashes at end of argument to 2N backslashes, because they
     679             :      * precede the double quote that terminates the argument.
     680             :      */
     681             :     while (backslash_run_length)
     682             :     {
     683             :         appendPQExpBufferStr(buf, "^\\");
     684             :         backslash_run_length--;
     685             :     }
     686             :     appendPQExpBufferStr(buf, "^\"");
     687             : #endif                          /* WIN32 */
     688             : 
     689         124 :     return ok;
     690             : }
     691             : 
     692             : 
     693             : /*
     694             :  * Append the given string to the buffer, with suitable quoting for passing
     695             :  * the string as a value in a keyword/value pair in a libpq connection string.
     696             :  */
     697             : void
     698        2386 : appendConnStrVal(PQExpBuffer buf, const char *str)
     699             : {
     700             :     const char *s;
     701             :     bool        needquotes;
     702             : 
     703             :     /*
     704             :      * If the string is one or more plain ASCII characters, no need to quote
     705             :      * it. This is quite conservative, but better safe than sorry.
     706             :      */
     707        2386 :     needquotes = true;
     708       16822 :     for (s = str; *s; s++)
     709             :     {
     710       15266 :         if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
     711        2168 :               (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
     712             :         {
     713         830 :             needquotes = true;
     714         830 :             break;
     715             :         }
     716       14436 :         needquotes = false;
     717             :     }
     718             : 
     719        2386 :     if (needquotes)
     720             :     {
     721         830 :         appendPQExpBufferChar(buf, '\'');
     722       21388 :         while (*str)
     723             :         {
     724             :             /* ' and \ must be escaped by to \' and \\ */
     725       20558 :             if (*str == '\'' || *str == '\\')
     726         572 :                 appendPQExpBufferChar(buf, '\\');
     727             : 
     728       20558 :             appendPQExpBufferChar(buf, *str);
     729       20558 :             str++;
     730             :         }
     731         830 :         appendPQExpBufferChar(buf, '\'');
     732             :     }
     733             :     else
     734        1556 :         appendPQExpBufferStr(buf, str);
     735        2386 : }
     736             : 
     737             : 
     738             : /*
     739             :  * Append a psql meta-command that connects to the given database with the
     740             :  * then-current connection's user, host and port.
     741             :  */
     742             : void
     743          70 : appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
     744             : {
     745             :     const char *s;
     746             :     bool complex;
     747             : 
     748             :     /*
     749             :      * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
     750             :      * For other names, even many not technically requiring it, skip to the
     751             :      * general case.  No database has a zero-length name.
     752             :      */
     753          70 :     complex = false;
     754             : 
     755        1822 :     for (s = dbname; *s; s++)
     756             :     {
     757        1752 :         if (*s == '\n' || *s == '\r')
     758             :         {
     759           0 :             fprintf(stderr,
     760           0 :                     _("database name contains a newline or carriage return: \"%s\"\n"),
     761             :                     dbname);
     762           0 :             exit(EXIT_FAILURE);
     763             :         }
     764             : 
     765        1752 :         if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
     766         770 :               (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
     767             :         {
     768         654 :             complex = true;
     769             :         }
     770             :     }
     771             : 
     772          70 :     if (complex)
     773             :     {
     774             :         PQExpBufferData connstr;
     775             : 
     776          20 :         initPQExpBuffer(&connstr);
     777             : 
     778             :         /*
     779             :          * Force the target psql's encoding to SQL_ASCII.  We don't really
     780             :          * know the encoding of the database name, and it doesn't matter as
     781             :          * long as psql will forward it to the server unchanged.
     782             :          */
     783          20 :         appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n");
     784          20 :         appendPQExpBufferStr(buf, "\\connect -reuse-previous=on ");
     785             : 
     786          20 :         appendPQExpBufferStr(&connstr, "dbname=");
     787          20 :         appendConnStrVal(&connstr, dbname);
     788             : 
     789             :         /*
     790             :          * As long as the name does not contain a newline, SQL identifier
     791             :          * quoting satisfies the psql meta-command parser.  Prefer not to
     792             :          * involve psql-interpreted single quotes, which behaved differently
     793             :          * before PostgreSQL 9.2.
     794             :          */
     795          20 :         appendPQExpBufferStr(buf, fmtIdEnc(connstr.data, PG_SQL_ASCII));
     796             : 
     797          20 :         termPQExpBuffer(&connstr);
     798             :     }
     799             :     else
     800             :     {
     801          50 :         appendPQExpBufferStr(buf, "\\connect ");
     802          50 :         appendPQExpBufferStr(buf, fmtIdEnc(dbname, PG_SQL_ASCII));
     803             :     }
     804          70 :     appendPQExpBufferChar(buf, '\n');
     805          70 : }
     806             : 
     807             : 
     808             : /*
     809             :  * Deconstruct the text representation of a 1-dimensional Postgres array
     810             :  * into individual items.
     811             :  *
     812             :  * On success, returns true and sets *itemarray and *nitems to describe
     813             :  * an array of individual strings.  On parse failure, returns false;
     814             :  * *itemarray may exist or be NULL.
     815             :  *
     816             :  * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
     817             :  */
     818             : bool
     819       92896 : parsePGArray(const char *atext, char ***itemarray, int *nitems)
     820             : {
     821             :     int         inputlen;
     822             :     char      **items;
     823             :     char       *strings;
     824             :     int         curitem;
     825             : 
     826             :     /*
     827             :      * We expect input in the form of "{item,item,item}" where any item is
     828             :      * either raw data, or surrounded by double quotes (in which case embedded
     829             :      * characters including backslashes and quotes are backslashed).
     830             :      *
     831             :      * We build the result as an array of pointers followed by the actual
     832             :      * string data, all in one malloc block for convenience of deallocation.
     833             :      * The worst-case storage need is not more than one pointer and one
     834             :      * character for each input character (consider "{,,,,,,,,,,}").
     835             :      */
     836       92896 :     *itemarray = NULL;
     837       92896 :     *nitems = 0;
     838       92896 :     inputlen = strlen(atext);
     839       92896 :     if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
     840           0 :         return false;           /* bad input */
     841       92896 :     items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
     842       92896 :     if (items == NULL)
     843           0 :         return false;           /* out of memory */
     844       92896 :     *itemarray = items;
     845       92896 :     strings = (char *) (items + inputlen);
     846             : 
     847       92896 :     atext++;                    /* advance over initial '{' */
     848       92896 :     curitem = 0;
     849      254466 :     while (*atext != '}')
     850             :     {
     851      161570 :         if (*atext == '\0')
     852           0 :             return false;       /* premature end of string */
     853      161570 :         items[curitem] = strings;
     854     3271414 :         while (*atext != '}' && *atext != ',')
     855             :         {
     856     3109844 :             if (*atext == '\0')
     857           0 :                 return false;   /* premature end of string */
     858     3109844 :             if (*atext != '"')
     859     3109454 :                 *strings++ = *atext++;  /* copy unquoted data */
     860             :             else
     861             :             {
     862             :                 /* process quoted substring */
     863         390 :                 atext++;
     864       13268 :                 while (*atext != '"')
     865             :                 {
     866       12878 :                     if (*atext == '\0')
     867           0 :                         return false;   /* premature end of string */
     868       12878 :                     if (*atext == '\\')
     869             :                     {
     870        1950 :                         atext++;
     871        1950 :                         if (*atext == '\0')
     872           0 :                             return false;   /* premature end of string */
     873             :                     }
     874       12878 :                     *strings++ = *atext++;  /* copy quoted data */
     875             :                 }
     876         390 :                 atext++;
     877             :             }
     878             :         }
     879      161570 :         *strings++ = '\0';
     880      161570 :         if (*atext == ',')
     881       71446 :             atext++;
     882      161570 :         curitem++;
     883             :     }
     884       92896 :     if (atext[1] != '\0')
     885           0 :         return false;           /* bogus syntax (embedded '}') */
     886       92896 :     *nitems = curitem;
     887       92896 :     return true;
     888             : }
     889             : 
     890             : 
     891             : /*
     892             :  * Append one element to the text representation of a 1-dimensional Postgres
     893             :  * array.
     894             :  *
     895             :  * The caller must provide the initial '{' and closing '}' of the array.
     896             :  * This function handles all else, including insertion of commas and
     897             :  * quoting of values.
     898             :  *
     899             :  * We assume that typdelim is ','.
     900             :  */
     901             : void
     902         620 : appendPGArray(PQExpBuffer buffer, const char *value)
     903             : {
     904             :     bool        needquote;
     905             :     const char *tmp;
     906             : 
     907         620 :     if (buffer->data[buffer->len - 1] != '{')
     908         310 :         appendPQExpBufferChar(buffer, ',');
     909             : 
     910             :     /* Decide if we need quotes; this should match array_out()'s choices. */
     911         620 :     if (value[0] == '\0')
     912           0 :         needquote = true;       /* force quotes for empty string */
     913         620 :     else if (pg_strcasecmp(value, "NULL") == 0)
     914           0 :         needquote = true;       /* force quotes for literal NULL */
     915             :     else
     916         620 :         needquote = false;
     917             : 
     918         620 :     if (!needquote)
     919             :     {
     920       12880 :         for (tmp = value; *tmp; tmp++)
     921             :         {
     922       12468 :             char        ch = *tmp;
     923             : 
     924       12468 :             if (ch == '"' || ch == '\\' ||
     925       12260 :                 ch == '{' || ch == '}' || ch == ',' ||
     926             :             /* these match scanner_isspace(): */
     927       12260 :                 ch == ' ' || ch == '\t' || ch == '\n' ||
     928       12260 :                 ch == '\r' || ch == '\v' || ch == '\f')
     929             :             {
     930         208 :                 needquote = true;
     931         208 :                 break;
     932             :             }
     933             :         }
     934             :     }
     935             : 
     936         620 :     if (needquote)
     937             :     {
     938         208 :         appendPQExpBufferChar(buffer, '"');
     939        9048 :         for (tmp = value; *tmp; tmp++)
     940             :         {
     941        8840 :             char        ch = *tmp;
     942             : 
     943        8840 :             if (ch == '"' || ch == '\\')
     944        1560 :                 appendPQExpBufferChar(buffer, '\\');
     945        8840 :             appendPQExpBufferChar(buffer, ch);
     946             :         }
     947         208 :         appendPQExpBufferChar(buffer, '"');
     948             :     }
     949             :     else
     950         412 :         appendPQExpBufferStr(buffer, value);
     951         620 : }
     952             : 
     953             : 
     954             : /*
     955             :  * Format a reloptions array and append it to the given buffer.
     956             :  *
     957             :  * "prefix" is prepended to the option names; typically it's "" or "toast.".
     958             :  *
     959             :  * Returns false if the reloptions array could not be parsed (in which case
     960             :  * nothing will have been appended to the buffer), or true on success.
     961             :  *
     962             :  * Note: this logic should generally match the backend's flatten_reloptions()
     963             :  * (in adt/ruleutils.c).
     964             :  */
     965             : bool
     966         424 : appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
     967             :                       const char *prefix, int encoding, bool std_strings)
     968             : {
     969             :     char      **options;
     970             :     int         noptions;
     971             :     int         i;
     972             : 
     973         424 :     if (!parsePGArray(reloptions, &options, &noptions))
     974             :     {
     975           0 :         free(options);
     976           0 :         return false;
     977             :     }
     978             : 
     979         966 :     for (i = 0; i < noptions; i++)
     980             :     {
     981         542 :         char       *option = options[i];
     982             :         char       *name;
     983             :         char       *separator;
     984             :         char       *value;
     985             : 
     986             :         /*
     987             :          * Each array element should have the form name=value.  If the "=" is
     988             :          * missing for some reason, treat it like an empty value.
     989             :          */
     990         542 :         name = option;
     991         542 :         separator = strchr(option, '=');
     992         542 :         if (separator)
     993             :         {
     994         542 :             *separator = '\0';
     995         542 :             value = separator + 1;
     996             :         }
     997             :         else
     998           0 :             value = "";
     999             : 
    1000         542 :         if (i > 0)
    1001         118 :             appendPQExpBufferStr(buffer, ", ");
    1002         542 :         appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
    1003             : 
    1004             :         /*
    1005             :          * In general we need to quote the value; but to avoid unnecessary
    1006             :          * clutter, do not quote if it is an identifier that would not need
    1007             :          * quoting.  (We could also allow numbers, but that is a bit trickier
    1008             :          * than it looks --- for example, are leading zeroes significant?  We
    1009             :          * don't want to assume very much here about what custom reloptions
    1010             :          * might mean.)
    1011             :          */
    1012         542 :         if (strcmp(fmtId(value), value) == 0)
    1013          64 :             appendPQExpBufferStr(buffer, value);
    1014             :         else
    1015         478 :             appendStringLiteral(buffer, value, encoding, std_strings);
    1016             :     }
    1017             : 
    1018         424 :     free(options);
    1019             : 
    1020         424 :     return true;
    1021             : }
    1022             : 
    1023             : 
    1024             : /*
    1025             :  * processSQLNamePattern
    1026             :  *
    1027             :  * Scan a wildcard-pattern string and generate appropriate WHERE clauses
    1028             :  * to limit the set of objects returned.  The WHERE clauses are appended
    1029             :  * to the already-partially-constructed query in buf.  Returns whether
    1030             :  * any clause was added.
    1031             :  *
    1032             :  * conn: connection query will be sent to (consulted for escaping rules).
    1033             :  * buf: output parameter.
    1034             :  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
    1035             :  * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
    1036             :  * onto the existing WHERE clause).
    1037             :  * force_escape: always quote regexp special characters, even outside
    1038             :  * double quotes (else they are quoted only between double quotes).
    1039             :  * schemavar: name of query variable to match against a schema-name pattern.
    1040             :  * Can be NULL if no schema.
    1041             :  * namevar: name of query variable to match against an object-name pattern.
    1042             :  * altnamevar: NULL, or name of an alternative variable to match against name.
    1043             :  * visibilityrule: clause to use if we want to restrict to visible objects
    1044             :  * (for example, "pg_catalog.pg_table_is_visible(p.oid)").  Can be NULL.
    1045             :  * dbnamebuf: output parameter receiving the database name portion of the
    1046             :  * pattern, if any.  Can be NULL.
    1047             :  * dotcnt: how many separators were parsed from the pattern, by reference.
    1048             :  *
    1049             :  * Formatting note: the text already present in buf should end with a newline.
    1050             :  * The appended text, if any, will end with one too.
    1051             :  */
    1052             : bool
    1053        7130 : processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
    1054             :                       bool have_where, bool force_escape,
    1055             :                       const char *schemavar, const char *namevar,
    1056             :                       const char *altnamevar, const char *visibilityrule,
    1057             :                       PQExpBuffer dbnamebuf, int *dotcnt)
    1058             : {
    1059             :     PQExpBufferData schemabuf;
    1060             :     PQExpBufferData namebuf;
    1061        7130 :     bool        added_clause = false;
    1062             :     int         dcnt;
    1063             : 
    1064             : #define WHEREAND() \
    1065             :     (appendPQExpBufferStr(buf, have_where ? "  AND " : "WHERE "), \
    1066             :      have_where = true, added_clause = true)
    1067             : 
    1068        7130 :     if (dotcnt == NULL)
    1069          12 :         dotcnt = &dcnt;
    1070        7130 :     *dotcnt = 0;
    1071        7130 :     if (pattern == NULL)
    1072             :     {
    1073             :         /* Default: select all visible objects */
    1074         490 :         if (visibilityrule)
    1075             :         {
    1076         120 :             WHEREAND();
    1077         120 :             appendPQExpBuffer(buf, "%s\n", visibilityrule);
    1078             :         }
    1079         490 :         return added_clause;
    1080             :     }
    1081             : 
    1082        6640 :     initPQExpBuffer(&schemabuf);
    1083        6640 :     initPQExpBuffer(&namebuf);
    1084             : 
    1085             :     /*
    1086             :      * Convert shell-style 'pattern' into the regular expression(s) we want to
    1087             :      * execute.  Quoting/escaping into SQL literal format will be done below
    1088             :      * using appendStringLiteralConn().
    1089             :      *
    1090             :      * If the caller provided a schemavar, we want to split the pattern on
    1091             :      * ".", otherwise not.
    1092             :      */
    1093        6640 :     patternToSQLRegex(PQclientEncoding(conn),
    1094             :                       (schemavar ? dbnamebuf : NULL),
    1095             :                       (schemavar ? &schemabuf : NULL),
    1096             :                       &namebuf,
    1097             :                       pattern, force_escape, true, dotcnt);
    1098             : 
    1099             :     /*
    1100             :      * Now decide what we need to emit.  We may run under a hostile
    1101             :      * search_path, so qualify EVERY name.  Note there will be a leading "^("
    1102             :      * in the patterns in any case.
    1103             :      *
    1104             :      * We want the regex matches to use the database's default collation where
    1105             :      * collation-sensitive behavior is required (for example, which characters
    1106             :      * match '\w').  That happened by default before PG v12, but if the server
    1107             :      * is >= v12 then we need to force it through explicit COLLATE clauses,
    1108             :      * otherwise the "C" collation attached to "name" catalog columns wins.
    1109             :      */
    1110        6640 :     if (namevar && namebuf.len > 2)
    1111             :     {
    1112             :         /* We have a name pattern, so constrain the namevar(s) */
    1113             : 
    1114             :         /* Optimize away a "*" pattern */
    1115        6640 :         if (strcmp(namebuf.data, "^(.*)$") != 0)
    1116             :         {
    1117        6556 :             WHEREAND();
    1118        6556 :             if (altnamevar)
    1119             :             {
    1120         228 :                 appendPQExpBuffer(buf,
    1121             :                                   "(%s OPERATOR(pg_catalog.~) ", namevar);
    1122         228 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1123         228 :                 if (PQserverVersion(conn) >= 120000)
    1124         228 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1125         228 :                 appendPQExpBuffer(buf,
    1126             :                                   "\n        OR %s OPERATOR(pg_catalog.~) ",
    1127             :                                   altnamevar);
    1128         228 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1129         228 :                 if (PQserverVersion(conn) >= 120000)
    1130         228 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1131         228 :                 appendPQExpBufferStr(buf, ")\n");
    1132             :             }
    1133             :             else
    1134             :             {
    1135        6328 :                 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
    1136        6328 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1137        6328 :                 if (PQserverVersion(conn) >= 120000)
    1138        6328 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1139        6328 :                 appendPQExpBufferChar(buf, '\n');
    1140             :             }
    1141             :         }
    1142             :     }
    1143             : 
    1144        6640 :     if (schemavar && schemabuf.len > 2)
    1145             :     {
    1146             :         /* We have a schema pattern, so constrain the schemavar */
    1147             : 
    1148             :         /* Optimize away a "*" pattern */
    1149        1436 :         if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
    1150             :         {
    1151        1430 :             WHEREAND();
    1152        1430 :             appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
    1153        1430 :             appendStringLiteralConn(buf, schemabuf.data, conn);
    1154        1430 :             if (PQserverVersion(conn) >= 120000)
    1155        1430 :                 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1156        1430 :             appendPQExpBufferChar(buf, '\n');
    1157             :         }
    1158             :     }
    1159             :     else
    1160             :     {
    1161             :         /* No schema pattern given, so select only visible objects */
    1162        5204 :         if (visibilityrule)
    1163             :         {
    1164        4158 :             WHEREAND();
    1165        4158 :             appendPQExpBuffer(buf, "%s\n", visibilityrule);
    1166             :         }
    1167             :     }
    1168             : 
    1169        6640 :     termPQExpBuffer(&schemabuf);
    1170        6640 :     termPQExpBuffer(&namebuf);
    1171             : 
    1172        6640 :     return added_clause;
    1173             : #undef WHEREAND
    1174             : }
    1175             : 
    1176             : /*
    1177             :  * Transform a possibly qualified shell-style object name pattern into up to
    1178             :  * three SQL-style regular expressions, converting quotes, lower-casing
    1179             :  * unquoted letters, and adjusting shell-style wildcard characters into regexp
    1180             :  * notation.
    1181             :  *
    1182             :  * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
    1183             :  * contains two or more dbname/schema/name separators, we parse the portions of
    1184             :  * the pattern prior to the first and second separators into dbnamebuf and
    1185             :  * schemabuf, and the rest into namebuf.
    1186             :  *
    1187             :  * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
    1188             :  * least one separator, we parse the first portion into schemabuf and the rest
    1189             :  * into namebuf.
    1190             :  *
    1191             :  * Otherwise, we parse all the pattern into namebuf.
    1192             :  *
    1193             :  * If the pattern contains more dotted parts than buffers to parse into, the
    1194             :  * extra dots will be treated as literal characters and written into the
    1195             :  * namebuf, though they will be counted.  Callers should always check the value
    1196             :  * returned by reference in dotcnt and handle this error case appropriately.
    1197             :  *
    1198             :  * We surround the regexps with "^(...)$" to force them to match whole strings,
    1199             :  * as per SQL practice.  We have to have parens in case strings contain "|",
    1200             :  * else the "^" and "$" will be bound into the first and last alternatives
    1201             :  * which is not what we want.  Whether this is done for dbnamebuf is controlled
    1202             :  * by the want_literal_dbname parameter.
    1203             :  *
    1204             :  * The regexps we parse into the buffers are appended to the data (if any)
    1205             :  * already present.  If we parse fewer fields than the number of buffers we
    1206             :  * were given, the extra buffers are unaltered.
    1207             :  *
    1208             :  * encoding: the character encoding for the given pattern
    1209             :  * dbnamebuf: output parameter receiving the database name portion of the
    1210             :  * pattern, if any.  Can be NULL.
    1211             :  * schemabuf: output parameter receiving the schema name portion of the
    1212             :  * pattern, if any.  Can be NULL.
    1213             :  * namebuf: output parameter receiving the database name portion of the
    1214             :  * pattern, if any.  Can be NULL.
    1215             :  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
    1216             :  * force_escape: always quote regexp special characters, even outside
    1217             :  * double quotes (else they are quoted only between double quotes).
    1218             :  * want_literal_dbname: if true, regexp special characters within the database
    1219             :  * name portion of the pattern will not be escaped, nor will the dbname be
    1220             :  * converted into a regular expression.
    1221             :  * dotcnt: output parameter receiving the number of separators parsed from the
    1222             :  * pattern.
    1223             :  */
    1224             : void
    1225        6844 : patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
    1226             :                   PQExpBuffer namebuf, const char *pattern, bool force_escape,
    1227             :                   bool want_literal_dbname, int *dotcnt)
    1228             : {
    1229             :     PQExpBufferData buf[3];
    1230             :     PQExpBufferData left_literal;
    1231             :     PQExpBuffer curbuf;
    1232             :     PQExpBuffer maxbuf;
    1233             :     int         i;
    1234             :     bool        inquotes;
    1235             :     bool        left;
    1236             :     const char *cp;
    1237             : 
    1238             :     Assert(pattern != NULL);
    1239             :     Assert(namebuf != NULL);
    1240             : 
    1241             :     /* callers should never expect "dbname.relname" format */
    1242             :     Assert(dbnamebuf == NULL || schemabuf != NULL);
    1243             :     Assert(dotcnt != NULL);
    1244             : 
    1245        6844 :     *dotcnt = 0;
    1246        6844 :     inquotes = false;
    1247        6844 :     cp = pattern;
    1248             : 
    1249        6844 :     if (dbnamebuf != NULL)
    1250        5690 :         maxbuf = &buf[2];
    1251        1154 :     else if (schemabuf != NULL)
    1252          58 :         maxbuf = &buf[1];
    1253             :     else
    1254        1096 :         maxbuf = &buf[0];
    1255             : 
    1256        6844 :     curbuf = &buf[0];
    1257        6844 :     if (want_literal_dbname)
    1258             :     {
    1259        6640 :         left = true;
    1260        6640 :         initPQExpBuffer(&left_literal);
    1261             :     }
    1262             :     else
    1263         204 :         left = false;
    1264        6844 :     initPQExpBuffer(curbuf);
    1265        6844 :     appendPQExpBufferStr(curbuf, "^(");
    1266      121206 :     while (*cp)
    1267             :     {
    1268      114362 :         char        ch = *cp;
    1269             : 
    1270      114362 :         if (ch == '"')
    1271             :         {
    1272        3214 :             if (inquotes && cp[1] == '"')
    1273             :             {
    1274             :                 /* emit one quote, stay in inquotes mode */
    1275           6 :                 appendPQExpBufferChar(curbuf, '"');
    1276           6 :                 if (left)
    1277           6 :                     appendPQExpBufferChar(&left_literal, '"');
    1278           6 :                 cp++;
    1279             :             }
    1280             :             else
    1281        3208 :                 inquotes = !inquotes;
    1282        3214 :             cp++;
    1283             :         }
    1284      111148 :         else if (!inquotes && isupper((unsigned char) ch))
    1285             :         {
    1286         240 :             appendPQExpBufferChar(curbuf,
    1287         240 :                                   pg_tolower((unsigned char) ch));
    1288         240 :             if (left)
    1289         150 :                 appendPQExpBufferChar(&left_literal,
    1290         150 :                                       pg_tolower((unsigned char) ch));
    1291         240 :             cp++;
    1292             :         }
    1293      110908 :         else if (!inquotes && ch == '*')
    1294             :         {
    1295         416 :             appendPQExpBufferStr(curbuf, ".*");
    1296         416 :             if (left)
    1297         310 :                 appendPQExpBufferChar(&left_literal, '*');
    1298         416 :             cp++;
    1299             :         }
    1300      110492 :         else if (!inquotes && ch == '?')
    1301             :         {
    1302           6 :             appendPQExpBufferChar(curbuf, '.');
    1303           6 :             if (left)
    1304           6 :                 appendPQExpBufferChar(&left_literal, '?');
    1305           6 :             cp++;
    1306             :         }
    1307      110486 :         else if (!inquotes && ch == '.')
    1308             :         {
    1309        2826 :             left = false;
    1310        2826 :             if (dotcnt)
    1311        2826 :                 (*dotcnt)++;
    1312        2826 :             if (curbuf < maxbuf)
    1313             :             {
    1314        2242 :                 appendPQExpBufferStr(curbuf, ")$");
    1315        2242 :                 curbuf++;
    1316        2242 :                 initPQExpBuffer(curbuf);
    1317        2242 :                 appendPQExpBufferStr(curbuf, "^(");
    1318        2242 :                 cp++;
    1319             :             }
    1320             :             else
    1321         584 :                 appendPQExpBufferChar(curbuf, *cp++);
    1322             :         }
    1323      107660 :         else if (ch == '$')
    1324             :         {
    1325             :             /*
    1326             :              * Dollar is always quoted, whether inside quotes or not. The
    1327             :              * reason is that it's allowed in SQL identifiers, so there's a
    1328             :              * significant use-case for treating it literally, while because
    1329             :              * we anchor the pattern automatically there is no use-case for
    1330             :              * having it possess its regexp meaning.
    1331             :              */
    1332          12 :             appendPQExpBufferStr(curbuf, "\\$");
    1333          12 :             if (left)
    1334          12 :                 appendPQExpBufferChar(&left_literal, '$');
    1335          12 :             cp++;
    1336             :         }
    1337             :         else
    1338             :         {
    1339             :             /*
    1340             :              * Ordinary data character, transfer to pattern
    1341             :              *
    1342             :              * Inside double quotes, or at all times if force_escape is true,
    1343             :              * quote regexp special characters with a backslash to avoid
    1344             :              * regexp errors.  Outside quotes, however, let them pass through
    1345             :              * as-is; this lets knowledgeable users build regexp expressions
    1346             :              * that are more powerful than shell-style patterns.
    1347             :              *
    1348             :              * As an exception to that, though, always quote "[]", as that's
    1349             :              * much more likely to be an attempt to write an array type name
    1350             :              * than it is to be the start of a regexp bracket expression.
    1351             :              */
    1352      107648 :             if ((inquotes || force_escape) &&
    1353       29992 :                 strchr("|*+?()[]{}.^$\\", ch))
    1354        3874 :                 appendPQExpBufferChar(curbuf, '\\');
    1355      103774 :             else if (ch == '[' && cp[1] == ']')
    1356           6 :                 appendPQExpBufferChar(curbuf, '\\');
    1357      107648 :             i = PQmblenBounded(cp, encoding);
    1358      215296 :             while (i--)
    1359             :             {
    1360      107648 :                 if (left)
    1361       74220 :                     appendPQExpBufferChar(&left_literal, *cp);
    1362      107648 :                 appendPQExpBufferChar(curbuf, *cp++);
    1363             :             }
    1364             :         }
    1365             :     }
    1366        6844 :     appendPQExpBufferStr(curbuf, ")$");
    1367             : 
    1368        6844 :     if (namebuf)
    1369             :     {
    1370        6844 :         appendPQExpBufferStr(namebuf, curbuf->data);
    1371        6844 :         termPQExpBuffer(curbuf);
    1372        6844 :         curbuf--;
    1373             :     }
    1374             : 
    1375        6844 :     if (schemabuf && curbuf >= buf)
    1376             :     {
    1377        1488 :         appendPQExpBufferStr(schemabuf, curbuf->data);
    1378        1488 :         termPQExpBuffer(curbuf);
    1379        1488 :         curbuf--;
    1380             :     }
    1381             : 
    1382        6844 :     if (dbnamebuf && curbuf >= buf)
    1383             :     {
    1384         754 :         if (want_literal_dbname)
    1385         720 :             appendPQExpBufferStr(dbnamebuf, left_literal.data);
    1386             :         else
    1387          34 :             appendPQExpBufferStr(dbnamebuf, curbuf->data);
    1388         754 :         termPQExpBuffer(curbuf);
    1389             :     }
    1390             : 
    1391        6844 :     if (want_literal_dbname)
    1392        6640 :         termPQExpBuffer(&left_literal);
    1393        6844 : }

Generated by: LCOV version 1.14