LCOV - code coverage report
Current view: top level - src/fe_utils - string_utils.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.6 % 438 401
Test Date: 2026-02-17 17:20:33 Functions: 95.2 % 21 20
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * String-processing utility routines for frontend code
       4              :  *
       5              :  * Assorted utility functions that are useful in constructing SQL queries
       6              :  * and interpreting backend output.
       7              :  *
       8              :  *
       9              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      10              :  * Portions Copyright (c) 1994, Regents of the University of California
      11              :  *
      12              :  * src/fe_utils/string_utils.c
      13              :  *
      14              :  *-------------------------------------------------------------------------
      15              :  */
      16              : #include "postgres_fe.h"
      17              : 
      18              : #include <ctype.h>
      19              : 
      20              : #include "common/keywords.h"
      21              : #include "fe_utils/string_utils.h"
      22              : #include "mb/pg_wchar.h"
      23              : 
      24              : static PQExpBuffer defaultGetLocalPQExpBuffer(void);
      25              : 
      26              : /* Globals exported by this file */
      27              : int         quote_all_identifiers = 0;
      28              : PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
      29              : 
      30              : static int  fmtIdEncoding = -1;
      31              : 
      32              : 
      33              : /*
      34              :  * Returns a temporary PQExpBuffer, valid until the next call to the function.
      35              :  * This is used by fmtId and fmtQualifiedId.
      36              :  *
      37              :  * Non-reentrant and non-thread-safe but reduces memory leakage. You can
      38              :  * replace this with a custom version by setting the getLocalPQExpBuffer
      39              :  * function pointer.
      40              :  */
      41              : static PQExpBuffer
      42       354035 : defaultGetLocalPQExpBuffer(void)
      43              : {
      44              :     static PQExpBuffer id_return = NULL;
      45              : 
      46       354035 :     if (id_return)              /* first time through? */
      47              :     {
      48              :         /* same buffer, just wipe contents */
      49       353575 :         resetPQExpBuffer(id_return);
      50              :     }
      51              :     else
      52              :     {
      53              :         /* new buffer */
      54          460 :         id_return = createPQExpBuffer();
      55              :     }
      56              : 
      57       354035 :     return id_return;
      58              : }
      59              : 
      60              : /*
      61              :  * Set the encoding that fmtId() and fmtQualifiedId() use.
      62              :  *
      63              :  * This is not safe against multiple connections having different encodings,
      64              :  * but there is no real other way to address the need to know the encoding for
      65              :  * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
      66              :  * rid of fmtId().
      67              :  */
      68              : void
      69        10265 : setFmtEncoding(int encoding)
      70              : {
      71        10265 :     fmtIdEncoding = encoding;
      72        10265 : }
      73              : 
      74              : /*
      75              :  * Return the currently configured encoding for fmtId() and fmtQualifiedId().
      76              :  */
      77              : static int
      78       238604 : getFmtEncoding(void)
      79              : {
      80       238604 :     if (fmtIdEncoding != -1)
      81       238604 :         return fmtIdEncoding;
      82              : 
      83              :     /*
      84              :      * In assertion builds it seems best to fail hard if the encoding was not
      85              :      * set, to make it easier to find places with missing calls. But in
      86              :      * production builds that seems like a bad idea, thus we instead just
      87              :      * default to UTF-8.
      88              :      */
      89              :     Assert(fmtIdEncoding != -1);
      90              : 
      91            0 :     return PG_UTF8;
      92              : }
      93              : 
      94              : /*
      95              :  *  Quotes input string if it's not a legitimate SQL identifier as-is.
      96              :  *
      97              :  *  Note that the returned string must be used before calling fmtIdEnc again,
      98              :  *  since we re-use the same return buffer each time.
      99              :  */
     100              : const char *
     101       299096 : fmtIdEnc(const char *rawid, int encoding)
     102              : {
     103       299096 :     PQExpBuffer id_return = getLocalPQExpBuffer();
     104              : 
     105              :     const char *cp;
     106       299096 :     bool        need_quotes = false;
     107       299096 :     size_t      remaining = strlen(rawid);
     108              : 
     109              :     /*
     110              :      * These checks need to match the identifier production in scan.l. Don't
     111              :      * use islower() etc.
     112              :      */
     113       299096 :     if (quote_all_identifiers)
     114        22146 :         need_quotes = true;
     115              :     /* slightly different rules for first character */
     116       276950 :     else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
     117          705 :         need_quotes = true;
     118              :     else
     119              :     {
     120              :         /* otherwise check the entire string */
     121       276245 :         cp = rawid;
     122      2983815 :         for (size_t i = 0; i < remaining; i++, cp++)
     123              :         {
     124      2718631 :             if (!((*cp >= 'a' && *cp <= 'z')
     125       361174 :                   || (*cp >= '0' && *cp <= '9')
     126       248967 :                   || (*cp == '_')))
     127              :             {
     128        11061 :                 need_quotes = true;
     129        11061 :                 break;
     130              :             }
     131              :         }
     132              :     }
     133              : 
     134       299096 :     if (!need_quotes)
     135              :     {
     136              :         /*
     137              :          * Check for keyword.  We quote keywords except for unreserved ones.
     138              :          * (In some cases we could avoid quoting a col_name or type_func_name
     139              :          * keyword, but it seems much harder than it's worth to tell that.)
     140              :          *
     141              :          * Note: ScanKeywordLookup() does case-insensitive comparison, but
     142              :          * that's fine, since we already know we have all-lower-case.
     143              :          */
     144       265184 :         int         kwnum = ScanKeywordLookup(rawid, &ScanKeywords);
     145              : 
     146       265184 :         if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
     147          728 :             need_quotes = true;
     148              :     }
     149              : 
     150       299096 :     if (!need_quotes)
     151              :     {
     152              :         /* no quoting needed */
     153       264456 :         appendPQExpBufferStr(id_return, rawid);
     154              :     }
     155              :     else
     156              :     {
     157        34640 :         appendPQExpBufferChar(id_return, '"');
     158              : 
     159        34640 :         cp = &rawid[0];
     160       407158 :         while (remaining > 0)
     161              :         {
     162              :             int         charlen;
     163              : 
     164              :             /* Fast path for plain ASCII */
     165       372518 :             if (!IS_HIGHBIT_SET(*cp))
     166              :             {
     167              :                 /*
     168              :                  * Did we find a double-quote in the string? Then make this a
     169              :                  * double double-quote per SQL99. Before, we put in a
     170              :                  * backslash/double-quote pair. - thomas 2000-08-05
     171              :                  */
     172       369775 :                 if (*cp == '"')
     173          334 :                     appendPQExpBufferChar(id_return, '"');
     174       369775 :                 appendPQExpBufferChar(id_return, *cp);
     175       369775 :                 remaining--;
     176       369775 :                 cp++;
     177       369775 :                 continue;
     178              :             }
     179              : 
     180              :             /* Slow path for possible multibyte characters */
     181         2743 :             charlen = pg_encoding_mblen(encoding, cp);
     182              : 
     183         5458 :             if (remaining < charlen ||
     184         2715 :                 pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
     185              :             {
     186              :                 /*
     187              :                  * Multibyte character is invalid.  It's important to verify
     188              :                  * that as invalid multibyte characters could e.g. be used to
     189              :                  * "skip" over quote characters, e.g. when parsing
     190              :                  * character-by-character.
     191              :                  *
     192              :                  * Replace the character's first byte with an invalid
     193              :                  * sequence. The invalid sequence ensures that the escaped
     194              :                  * string will trigger an error on the server-side, even if we
     195              :                  * can't directly report an error here.
     196              :                  *
     197              :                  * It would be a bit faster to verify the whole string the
     198              :                  * first time we encounter a set highbit, but this way we can
     199              :                  * replace just the invalid data, which probably makes it
     200              :                  * easier for users to find the invalidly encoded portion of a
     201              :                  * larger string.
     202              :                  */
     203           41 :                 if (enlargePQExpBuffer(id_return, 2))
     204              :                 {
     205           41 :                     pg_encoding_set_invalid(encoding,
     206           41 :                                             id_return->data + id_return->len);
     207           41 :                     id_return->len += 2;
     208           41 :                     id_return->data[id_return->len] = '\0';
     209              :                 }
     210              : 
     211              :                 /*
     212              :                  * Handle the following bytes as if this byte didn't exist.
     213              :                  * That's safer in case the subsequent bytes contain
     214              :                  * characters that are significant for the caller (e.g. '>' in
     215              :                  * html).
     216              :                  */
     217           41 :                 remaining--;
     218           41 :                 cp++;
     219              :             }
     220              :             else
     221              :             {
     222         5417 :                 for (int i = 0; i < charlen; i++)
     223              :                 {
     224         2715 :                     appendPQExpBufferChar(id_return, *cp);
     225         2715 :                     remaining--;
     226         2715 :                     cp++;
     227              :                 }
     228              :             }
     229              :         }
     230              : 
     231        34640 :         appendPQExpBufferChar(id_return, '"');
     232              :     }
     233              : 
     234       299096 :     return id_return->data;
     235              : }
     236              : 
     237              : /*
     238              :  *  Quotes input string if it's not a legitimate SQL identifier as-is.
     239              :  *
     240              :  *  Note that the returned string must be used before calling fmtId again,
     241              :  *  since we re-use the same return buffer each time.
     242              :  *
     243              :  *  NB: This assumes setFmtEncoding() previously has been called to configure
     244              :  *  the encoding of rawid. It is preferable to use fmtIdEnc() with an
     245              :  *  explicit encoding.
     246              :  */
     247              : const char *
     248       189145 : fmtId(const char *rawid)
     249              : {
     250       189145 :     return fmtIdEnc(rawid, getFmtEncoding());
     251              : }
     252              : 
     253              : /*
     254              :  * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
     255              :  * needed.
     256              :  *
     257              :  * Like fmtId, use the result before calling again.
     258              :  *
     259              :  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
     260              :  * use that buffer until we're finished with calling fmtId().
     261              :  */
     262              : const char *
     263        54939 : fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
     264              : {
     265              :     PQExpBuffer id_return;
     266        54939 :     PQExpBuffer lcl_pqexp = createPQExpBuffer();
     267              : 
     268              :     /* Some callers might fail to provide a schema name */
     269        54939 :     if (schema && *schema)
     270              :     {
     271        54939 :         appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
     272              :     }
     273        54939 :     appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
     274              : 
     275        54939 :     id_return = getLocalPQExpBuffer();
     276              : 
     277        54939 :     appendPQExpBufferStr(id_return, lcl_pqexp->data);
     278        54939 :     destroyPQExpBuffer(lcl_pqexp);
     279              : 
     280        54939 :     return id_return->data;
     281              : }
     282              : 
     283              : /*
     284              :  * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
     285              :  *
     286              :  * Like fmtId, use the result before calling again.
     287              :  *
     288              :  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
     289              :  * use that buffer until we're finished with calling fmtId().
     290              :  *
     291              :  * NB: This assumes setFmtEncoding() previously has been called to configure
     292              :  * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
     293              :  * with an explicit encoding.
     294              :  */
     295              : const char *
     296        49459 : fmtQualifiedId(const char *schema, const char *id)
     297              : {
     298        49459 :     return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
     299              : }
     300              : 
     301              : 
     302              : /*
     303              :  * Format a Postgres version number (in the PG_VERSION_NUM integer format
     304              :  * returned by PQserverVersion()) as a string.  This exists mainly to
     305              :  * encapsulate knowledge about two-part vs. three-part version numbers.
     306              :  *
     307              :  * For reentrancy, caller must supply the buffer the string is put in.
     308              :  * Recommended size of the buffer is 32 bytes.
     309              :  *
     310              :  * Returns address of 'buf', as a notational convenience.
     311              :  */
     312              : char *
     313            0 : formatPGVersionNumber(int version_number, bool include_minor,
     314              :                       char *buf, size_t buflen)
     315              : {
     316            0 :     if (version_number >= 100000)
     317              :     {
     318              :         /* New two-part style */
     319            0 :         if (include_minor)
     320            0 :             snprintf(buf, buflen, "%d.%d", version_number / 10000,
     321              :                      version_number % 10000);
     322              :         else
     323            0 :             snprintf(buf, buflen, "%d", version_number / 10000);
     324              :     }
     325              :     else
     326              :     {
     327              :         /* Old three-part style */
     328            0 :         if (include_minor)
     329            0 :             snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
     330            0 :                      (version_number / 100) % 100,
     331              :                      version_number % 100);
     332              :         else
     333            0 :             snprintf(buf, buflen, "%d.%d", version_number / 10000,
     334            0 :                      (version_number / 100) % 100);
     335              :     }
     336            0 :     return buf;
     337              : }
     338              : 
     339              : 
     340              : /*
     341              :  * Convert a string value to an SQL string literal and append it to
     342              :  * the given buffer.  We assume the specified client_encoding and
     343              :  * standard_conforming_strings settings.
     344              :  *
     345              :  * This is essentially equivalent to libpq's PQescapeStringInternal,
     346              :  * except for the output buffer structure.  We need it in situations
     347              :  * where we do not have a PGconn available.  Where we do,
     348              :  * appendStringLiteralConn is a better choice.
     349              :  */
     350              : void
     351        37711 : appendStringLiteral(PQExpBuffer buf, const char *str,
     352              :                     int encoding, bool std_strings)
     353              : {
     354        37711 :     size_t      length = strlen(str);
     355        37711 :     const char *source = str;
     356              :     char       *target;
     357        37711 :     size_t      remaining = length;
     358              : 
     359        37711 :     if (!enlargePQExpBuffer(buf, 2 * length + 2))
     360            0 :         return;
     361              : 
     362        37711 :     target = buf->data + buf->len;
     363        37711 :     *target++ = '\'';
     364              : 
     365       906049 :     while (remaining > 0)
     366              :     {
     367       868338 :         char        c = *source;
     368              :         int         charlen;
     369              :         int         i;
     370              : 
     371              :         /* Fast path for plain ASCII */
     372       868338 :         if (!IS_HIGHBIT_SET(c))
     373              :         {
     374              :             /* Apply quoting if needed */
     375       868283 :             if (SQL_STR_DOUBLE(c, !std_strings))
     376          231 :                 *target++ = c;
     377              :             /* Copy the character */
     378       868283 :             *target++ = c;
     379       868283 :             source++;
     380       868283 :             remaining--;
     381       868283 :             continue;
     382              :         }
     383              : 
     384              :         /* Slow path for possible multibyte characters */
     385           55 :         charlen = PQmblen(source, encoding);
     386              : 
     387           82 :         if (remaining < charlen ||
     388           27 :             pg_encoding_verifymbchar(encoding, source, charlen) == -1)
     389              :         {
     390              :             /*
     391              :              * Multibyte character is invalid.  It's important to verify that
     392              :              * as invalid multibyte characters could e.g. be used to "skip"
     393              :              * over quote characters, e.g. when parsing
     394              :              * character-by-character.
     395              :              *
     396              :              * Replace the character's first byte with an invalid sequence.
     397              :              * The invalid sequence ensures that the escaped string will
     398              :              * trigger an error on the server-side, even if we can't directly
     399              :              * report an error here.
     400              :              *
     401              :              * We know there's enough space for the invalid sequence because
     402              :              * the "target" buffer is 2 * length + 2 long, and at worst we're
     403              :              * replacing a single input byte with two invalid bytes.
     404              :              *
     405              :              * It would be a bit faster to verify the whole string the first
     406              :              * time we encounter a set highbit, but this way we can replace
     407              :              * just the invalid data, which probably makes it easier for users
     408              :              * to find the invalidly encoded portion of a larger string.
     409              :              */
     410           41 :             pg_encoding_set_invalid(encoding, target);
     411           41 :             target += 2;
     412              : 
     413              :             /*
     414              :              * Handle the following bytes as if this byte didn't exist. That's
     415              :              * safer in case the subsequent bytes contain important characters
     416              :              * for the caller (e.g. '>' in html).
     417              :              */
     418           41 :             source++;
     419           41 :             remaining--;
     420              :         }
     421              :         else
     422              :         {
     423              :             /* Copy the character */
     424           41 :             for (i = 0; i < charlen; i++)
     425              :             {
     426           27 :                 *target++ = *source++;
     427           27 :                 remaining--;
     428              :             }
     429              :         }
     430              :     }
     431              : 
     432              :     /* Write the terminating quote and NUL character. */
     433        37711 :     *target++ = '\'';
     434        37711 :     *target = '\0';
     435              : 
     436        37711 :     buf->len = target - buf->data;
     437              : }
     438              : 
     439              : 
     440              : /*
     441              :  * Convert a string value to an SQL string literal and append it to
     442              :  * the given buffer.  Encoding and string syntax rules are as indicated
     443              :  * by current settings of the PGconn.
     444              :  */
     445              : void
     446         4739 : appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
     447              : {
     448         4739 :     size_t      length = strlen(str);
     449              : 
     450              :     /*
     451              :      * XXX This is a kluge to silence escape_string_warning in our utility
     452              :      * programs.  It can go away once pre-v19 servers are out of support.
     453              :      */
     454         4739 :     if (strchr(str, '\\') != NULL && PQserverVersion(conn) < 190000)
     455              :     {
     456              :         /* ensure we are not adjacent to an identifier */
     457            0 :         if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
     458            0 :             appendPQExpBufferChar(buf, ' ');
     459            0 :         appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
     460            0 :         appendStringLiteral(buf, str, PQclientEncoding(conn), false);
     461            0 :         return;
     462              :     }
     463              :     /* XXX end kluge */
     464              : 
     465         4739 :     if (!enlargePQExpBuffer(buf, 2 * length + 2))
     466            0 :         return;
     467         4739 :     appendPQExpBufferChar(buf, '\'');
     468         4739 :     buf->len += PQescapeStringConn(conn, buf->data + buf->len,
     469              :                                    str, length, NULL);
     470         4739 :     appendPQExpBufferChar(buf, '\'');
     471              : }
     472              : 
     473              : 
     474              : /*
     475              :  * Convert a string value to a dollar quoted literal and append it to
     476              :  * the given buffer. If the dqprefix parameter is not NULL then the
     477              :  * dollar quote delimiter will begin with that (after the opening $).
     478              :  *
     479              :  * No escaping is done at all on str, in compliance with the rules
     480              :  * for parsing dollar quoted strings.  Also, we need not worry about
     481              :  * encoding issues.
     482              :  */
     483              : void
     484         1571 : appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
     485              : {
     486              :     static const char suffixes[] = "_XXXXXXX";
     487         1571 :     int         nextchar = 0;
     488         1571 :     PQExpBuffer delimBuf = createPQExpBuffer();
     489              : 
     490              :     /* start with $ + dqprefix if not NULL */
     491         1571 :     appendPQExpBufferChar(delimBuf, '$');
     492         1571 :     if (dqprefix)
     493            0 :         appendPQExpBufferStr(delimBuf, dqprefix);
     494              : 
     495              :     /*
     496              :      * Make sure we choose a delimiter which (without the trailing $) is not
     497              :      * present in the string being quoted. We don't check with the trailing $
     498              :      * because a string ending in $foo must not be quoted with $foo$.
     499              :      */
     500         2082 :     while (strstr(str, delimBuf->data) != NULL)
     501              :     {
     502          511 :         appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
     503          511 :         nextchar %= sizeof(suffixes) - 1;
     504              :     }
     505              : 
     506              :     /* add trailing $ */
     507         1571 :     appendPQExpBufferChar(delimBuf, '$');
     508              : 
     509              :     /* quote it and we are all done */
     510         1571 :     appendPQExpBufferStr(buf, delimBuf->data);
     511         1571 :     appendPQExpBufferStr(buf, str);
     512         1571 :     appendPQExpBufferStr(buf, delimBuf->data);
     513              : 
     514         1571 :     destroyPQExpBuffer(delimBuf);
     515         1571 : }
     516              : 
     517              : 
     518              : /*
     519              :  * Convert a bytea value (presented as raw bytes) to an SQL string literal
     520              :  * and append it to the given buffer.  We assume the specified
     521              :  * standard_conforming_strings setting.
     522              :  *
     523              :  * This is needed in situations where we do not have a PGconn available.
     524              :  * Where we do, PQescapeByteaConn is a better choice.
     525              :  */
     526              : void
     527           45 : appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
     528              :                    bool std_strings)
     529              : {
     530           45 :     const unsigned char *source = str;
     531              :     char       *target;
     532              : 
     533              :     static const char hextbl[] = "0123456789abcdef";
     534              : 
     535              :     /*
     536              :      * This implementation is hard-wired to produce hex-format output. We do
     537              :      * not know the server version the output will be loaded into, so making
     538              :      * an intelligent format choice is impossible.  It might be better to
     539              :      * always use the old escaped format.
     540              :      */
     541           45 :     if (!enlargePQExpBuffer(buf, 2 * length + 5))
     542            0 :         return;
     543              : 
     544           45 :     target = buf->data + buf->len;
     545           45 :     *target++ = '\'';
     546           45 :     if (!std_strings)
     547            0 :         *target++ = '\\';
     548           45 :     *target++ = '\\';
     549           45 :     *target++ = 'x';
     550              : 
     551         4119 :     while (length-- > 0)
     552              :     {
     553         4074 :         unsigned char c = *source++;
     554              : 
     555         4074 :         *target++ = hextbl[(c >> 4) & 0xF];
     556         4074 :         *target++ = hextbl[c & 0xF];
     557              :     }
     558              : 
     559              :     /* Write the terminating quote and NUL character. */
     560           45 :     *target++ = '\'';
     561           45 :     *target = '\0';
     562              : 
     563           45 :     buf->len = target - buf->data;
     564              : }
     565              : 
     566              : 
     567              : /*
     568              :  * Append the given string to the shell command being built in the buffer,
     569              :  * with shell-style quoting as needed to create exactly one argument.
     570              :  *
     571              :  * Forbid LF or CR characters, which have scant practical use beyond designing
     572              :  * security breaches.  The Windows command shell is unusable as a conduit for
     573              :  * arguments containing LF or CR characters.  A future major release should
     574              :  * reject those characters in CREATE ROLE and CREATE DATABASE, because use
     575              :  * there eventually leads to errors here.
     576              :  *
     577              :  * appendShellString() simply prints an error and dies if LF or CR appears.
     578              :  * appendShellStringNoError() omits those characters from the result, and
     579              :  * returns false if there were any.
     580              :  */
     581              : void
     582          522 : appendShellString(PQExpBuffer buf, const char *str)
     583              : {
     584          522 :     if (!appendShellStringNoError(buf, str))
     585              :     {
     586            2 :         fprintf(stderr,
     587            2 :                 _("shell command argument contains a newline or carriage return: \"%s\"\n"),
     588              :                 str);
     589            2 :         exit(EXIT_FAILURE);
     590              :     }
     591          520 : }
     592              : 
     593              : bool
     594          522 : appendShellStringNoError(PQExpBuffer buf, const char *str)
     595              : {
     596              : #ifdef WIN32
     597              :     int         backslash_run_length = 0;
     598              : #endif
     599          522 :     bool        ok = true;
     600              :     const char *p;
     601              : 
     602              :     /*
     603              :      * Don't bother with adding quotes if the string is nonempty and clearly
     604              :      * contains only safe characters.
     605              :      */
     606          522 :     if (*str != '\0' &&
     607          522 :         strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
     608              :     {
     609          434 :         appendPQExpBufferStr(buf, str);
     610          434 :         return ok;
     611              :     }
     612              : 
     613              : #ifndef WIN32
     614           88 :     appendPQExpBufferChar(buf, '\'');
     615         3489 :     for (p = str; *p; p++)
     616              :     {
     617         3401 :         if (*p == '\n' || *p == '\r')
     618              :         {
     619            3 :             ok = false;
     620            3 :             continue;
     621              :         }
     622              : 
     623         3398 :         if (*p == '\'')
     624           88 :             appendPQExpBufferStr(buf, "'\"'\"'");
     625              :         else
     626         3310 :             appendPQExpBufferChar(buf, *p);
     627              :     }
     628           88 :     appendPQExpBufferChar(buf, '\'');
     629              : #else                           /* WIN32 */
     630              : 
     631              :     /*
     632              :      * A Windows system() argument experiences two layers of interpretation.
     633              :      * First, cmd.exe interprets the string.  Its behavior is undocumented,
     634              :      * but a caret escapes any byte except LF or CR that would otherwise have
     635              :      * special meaning.  Handling of a caret before LF or CR differs between
     636              :      * "cmd.exe /c" and other modes, and it is unusable here.
     637              :      *
     638              :      * Second, the new process parses its command line to construct argv (see
     639              :      * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx).  This treats
     640              :      * backslash-double quote sequences specially.
     641              :      */
     642              :     appendPQExpBufferStr(buf, "^\"");
     643              :     for (p = str; *p; p++)
     644              :     {
     645              :         if (*p == '\n' || *p == '\r')
     646              :         {
     647              :             ok = false;
     648              :             continue;
     649              :         }
     650              : 
     651              :         /* Change N backslashes before a double quote to 2N+1 backslashes. */
     652              :         if (*p == '"')
     653              :         {
     654              :             while (backslash_run_length)
     655              :             {
     656              :                 appendPQExpBufferStr(buf, "^\\");
     657              :                 backslash_run_length--;
     658              :             }
     659              :             appendPQExpBufferStr(buf, "^\\");
     660              :         }
     661              :         else if (*p == '\\')
     662              :             backslash_run_length++;
     663              :         else
     664              :             backslash_run_length = 0;
     665              : 
     666              :         /*
     667              :          * Decline to caret-escape the most mundane characters, to ease
     668              :          * debugging and lest we approach the command length limit.
     669              :          */
     670              :         if (!((*p >= 'a' && *p <= 'z') ||
     671              :               (*p >= 'A' && *p <= 'Z') ||
     672              :               (*p >= '0' && *p <= '9')))
     673              :             appendPQExpBufferChar(buf, '^');
     674              :         appendPQExpBufferChar(buf, *p);
     675              :     }
     676              : 
     677              :     /*
     678              :      * Change N backslashes at end of argument to 2N backslashes, because they
     679              :      * precede the double quote that terminates the argument.
     680              :      */
     681              :     while (backslash_run_length)
     682              :     {
     683              :         appendPQExpBufferStr(buf, "^\\");
     684              :         backslash_run_length--;
     685              :     }
     686              :     appendPQExpBufferStr(buf, "^\"");
     687              : #endif                          /* WIN32 */
     688              : 
     689           88 :     return ok;
     690              : }
     691              : 
     692              : 
     693              : /*
     694              :  * Append the given string to the buffer, with suitable quoting for passing
     695              :  * the string as a value in a keyword/value pair in a libpq connection string.
     696              :  */
     697              : void
     698         2343 : appendConnStrVal(PQExpBuffer buf, const char *str)
     699              : {
     700              :     const char *s;
     701              :     bool        needquotes;
     702              : 
     703              :     /*
     704              :      * If the string is one or more plain ASCII characters, no need to quote
     705              :      * it. This is quite conservative, but better safe than sorry.
     706              :      */
     707         2343 :     needquotes = true;
     708        15979 :     for (s = str; *s; s++)
     709              :     {
     710        14423 :         if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
     711         1843 :               (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
     712              :         {
     713          787 :             needquotes = true;
     714          787 :             break;
     715              :         }
     716        13636 :         needquotes = false;
     717              :     }
     718              : 
     719         2343 :     if (needquotes)
     720              :     {
     721          787 :         appendPQExpBufferChar(buf, '\'');
     722        16825 :         while (*str)
     723              :         {
     724              :             /* ' and \ must be escaped by to \' and \\ */
     725        16038 :             if (*str == '\'' || *str == '\\')
     726          302 :                 appendPQExpBufferChar(buf, '\\');
     727              : 
     728        16038 :             appendPQExpBufferChar(buf, *str);
     729        16038 :             str++;
     730              :         }
     731          787 :         appendPQExpBufferChar(buf, '\'');
     732              :     }
     733              :     else
     734         1556 :         appendPQExpBufferStr(buf, str);
     735         2343 : }
     736              : 
     737              : 
     738              : /*
     739              :  * Append a psql meta-command that connects to the given database with the
     740              :  * then-current connection's user, host and port.
     741              :  */
     742              : void
     743           35 : appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
     744              : {
     745              :     const char *s;
     746              :     bool complex;
     747              : 
     748              :     /*
     749              :      * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
     750              :      * For other names, even many not technically requiring it, skip to the
     751              :      * general case.  No database has a zero-length name.
     752              :      */
     753           35 :     complex = false;
     754              : 
     755          911 :     for (s = dbname; *s; s++)
     756              :     {
     757          876 :         if (*s == '\n' || *s == '\r')
     758              :         {
     759            0 :             fprintf(stderr,
     760            0 :                     _("database name contains a newline or carriage return: \"%s\"\n"),
     761              :                     dbname);
     762            0 :             exit(EXIT_FAILURE);
     763              :         }
     764              : 
     765          876 :         if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
     766          385 :               (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
     767              :         {
     768          327 :             complex = true;
     769              :         }
     770              :     }
     771              : 
     772           35 :     if (complex)
     773              :     {
     774              :         PQExpBufferData connstr;
     775              : 
     776           10 :         initPQExpBuffer(&connstr);
     777              : 
     778              :         /*
     779              :          * Force the target psql's encoding to SQL_ASCII.  We don't really
     780              :          * know the encoding of the database name, and it doesn't matter as
     781              :          * long as psql will forward it to the server unchanged.
     782              :          */
     783           10 :         appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n");
     784           10 :         appendPQExpBufferStr(buf, "\\connect -reuse-previous=on ");
     785              : 
     786           10 :         appendPQExpBufferStr(&connstr, "dbname=");
     787           10 :         appendConnStrVal(&connstr, dbname);
     788              : 
     789              :         /*
     790              :          * As long as the name does not contain a newline, SQL identifier
     791              :          * quoting satisfies the psql meta-command parser.  Prefer not to
     792              :          * involve psql-interpreted single quotes, which behaved differently
     793              :          * before PostgreSQL 9.2.
     794              :          */
     795           10 :         appendPQExpBufferStr(buf, fmtIdEnc(connstr.data, PG_SQL_ASCII));
     796              : 
     797           10 :         termPQExpBuffer(&connstr);
     798              :     }
     799              :     else
     800              :     {
     801           25 :         appendPQExpBufferStr(buf, "\\connect ");
     802           25 :         appendPQExpBufferStr(buf, fmtIdEnc(dbname, PG_SQL_ASCII));
     803              :     }
     804           35 :     appendPQExpBufferChar(buf, '\n');
     805           35 : }
     806              : 
     807              : 
     808              : /*
     809              :  * Deconstruct the text representation of a 1-dimensional Postgres array
     810              :  * into individual items.
     811              :  *
     812              :  * On success, returns true and sets *itemarray and *nitems to describe
     813              :  * an array of individual strings.  On parse failure, returns false;
     814              :  * *itemarray may exist or be NULL.
     815              :  *
     816              :  * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
     817              :  */
     818              : bool
     819        58341 : parsePGArray(const char *atext, char ***itemarray, int *nitems)
     820              : {
     821              :     int         inputlen;
     822              :     char      **items;
     823              :     char       *strings;
     824              :     int         curitem;
     825              : 
     826              :     /*
     827              :      * We expect input in the form of "{item,item,item}" where any item is
     828              :      * either raw data, or surrounded by double quotes (in which case embedded
     829              :      * characters including backslashes and quotes are backslashed).
     830              :      *
     831              :      * We build the result as an array of pointers followed by the actual
     832              :      * string data, all in one malloc block for convenience of deallocation.
     833              :      * The worst-case storage need is not more than one pointer and one
     834              :      * character for each input character (consider "{,,,,,,,,,,}").
     835              :      */
     836        58341 :     *itemarray = NULL;
     837        58341 :     *nitems = 0;
     838        58341 :     inputlen = strlen(atext);
     839        58341 :     if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
     840            0 :         return false;           /* bad input */
     841        58341 :     items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
     842        58341 :     if (items == NULL)
     843            0 :         return false;           /* out of memory */
     844        58341 :     *itemarray = items;
     845        58341 :     strings = (char *) (items + inputlen);
     846              : 
     847        58341 :     atext++;                    /* advance over initial '{' */
     848        58341 :     curitem = 0;
     849       160385 :     while (*atext != '}')
     850              :     {
     851       102044 :         if (*atext == '\0')
     852            0 :             return false;       /* premature end of string */
     853       102044 :         items[curitem] = strings;
     854      2057387 :         while (*atext != '}' && *atext != ',')
     855              :         {
     856      1955343 :             if (*atext == '\0')
     857            0 :                 return false;   /* premature end of string */
     858      1955343 :             if (*atext != '"')
     859      1955156 :                 *strings++ = *atext++;  /* copy unquoted data */
     860              :             else
     861              :             {
     862              :                 /* process quoted substring */
     863          187 :                 atext++;
     864         6324 :                 while (*atext != '"')
     865              :                 {
     866         6137 :                     if (*atext == '\0')
     867            0 :                         return false;   /* premature end of string */
     868         6137 :                     if (*atext == '\\')
     869              :                     {
     870          925 :                         atext++;
     871          925 :                         if (*atext == '\0')
     872            0 :                             return false;   /* premature end of string */
     873              :                     }
     874         6137 :                     *strings++ = *atext++;  /* copy quoted data */
     875              :                 }
     876          187 :                 atext++;
     877              :             }
     878              :         }
     879       102044 :         *strings++ = '\0';
     880       102044 :         if (*atext == ',')
     881        45049 :             atext++;
     882       102044 :         curitem++;
     883              :     }
     884        58341 :     if (atext[1] != '\0')
     885            0 :         return false;           /* bogus syntax (embedded '}') */
     886        58341 :     *nitems = curitem;
     887        58341 :     return true;
     888              : }
     889              : 
     890              : 
     891              : /*
     892              :  * Append one element to the text representation of a 1-dimensional Postgres
     893              :  * array.
     894              :  *
     895              :  * The caller must provide the initial '{' and closing '}' of the array.
     896              :  * This function handles all else, including insertion of commas and
     897              :  * quoting of values.
     898              :  *
     899              :  * We assume that typdelim is ','.
     900              :  */
     901              : void
     902         6888 : appendPGArray(PQExpBuffer buffer, const char *value)
     903              : {
     904              :     bool        needquote;
     905              :     const char *tmp;
     906              : 
     907         6888 :     if (buffer->data[buffer->len - 1] != '{')
     908         6493 :         appendPQExpBufferChar(buffer, ',');
     909              : 
     910              :     /* Decide if we need quotes; this should match array_out()'s choices. */
     911         6888 :     if (value[0] == '\0')
     912            0 :         needquote = true;       /* force quotes for empty string */
     913         6888 :     else if (pg_strcasecmp(value, "NULL") == 0)
     914            0 :         needquote = true;       /* force quotes for literal NULL */
     915              :     else
     916         6888 :         needquote = false;
     917              : 
     918         6888 :     if (!needquote)
     919              :     {
     920       104368 :         for (tmp = value; *tmp; tmp++)
     921              :         {
     922        97641 :             char        ch = *tmp;
     923              : 
     924        97641 :             if (ch == '"' || ch == '\\' ||
     925        97510 :                 ch == '{' || ch == '}' || ch == ',' ||
     926              :             /* these match scanner_isspace(): */
     927        97510 :                 ch == ' ' || ch == '\t' || ch == '\n' ||
     928        97480 :                 ch == '\r' || ch == '\v' || ch == '\f')
     929              :             {
     930          161 :                 needquote = true;
     931          161 :                 break;
     932              :             }
     933              :         }
     934              :     }
     935              : 
     936         6888 :     if (needquote)
     937              :     {
     938          161 :         appendPQExpBufferChar(buffer, '"');
     939         5850 :         for (tmp = value; *tmp; tmp++)
     940              :         {
     941         5689 :             char        ch = *tmp;
     942              : 
     943         5689 :             if (ch == '"' || ch == '\\')
     944          812 :                 appendPQExpBufferChar(buffer, '\\');
     945         5689 :             appendPQExpBufferChar(buffer, ch);
     946              :         }
     947          161 :         appendPQExpBufferChar(buffer, '"');
     948              :     }
     949              :     else
     950         6727 :         appendPQExpBufferStr(buffer, value);
     951         6888 : }
     952              : 
     953              : 
     954              : /*
     955              :  * Format a reloptions array and append it to the given buffer.
     956              :  *
     957              :  * "prefix" is prepended to the option names; typically it's "" or "toast.".
     958              :  *
     959              :  * Returns false if the reloptions array could not be parsed (in which case
     960              :  * nothing will have been appended to the buffer), or true on success.
     961              :  *
     962              :  * Note: this logic should generally match the backend's flatten_reloptions()
     963              :  * (in adt/ruleutils.c).
     964              :  */
     965              : bool
     966          219 : appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
     967              :                       const char *prefix, int encoding, bool std_strings)
     968              : {
     969              :     char      **options;
     970              :     int         noptions;
     971              :     int         i;
     972              : 
     973          219 :     if (!parsePGArray(reloptions, &options, &noptions))
     974              :     {
     975            0 :         free(options);
     976            0 :         return false;
     977              :     }
     978              : 
     979          495 :     for (i = 0; i < noptions; i++)
     980              :     {
     981          276 :         char       *option = options[i];
     982              :         char       *name;
     983              :         char       *separator;
     984              :         char       *value;
     985              : 
     986              :         /*
     987              :          * Each array element should have the form name=value.  If the "=" is
     988              :          * missing for some reason, treat it like an empty value.
     989              :          */
     990          276 :         name = option;
     991          276 :         separator = strchr(option, '=');
     992          276 :         if (separator)
     993              :         {
     994          276 :             *separator = '\0';
     995          276 :             value = separator + 1;
     996              :         }
     997              :         else
     998            0 :             value = "";
     999              : 
    1000          276 :         if (i > 0)
    1001           57 :             appendPQExpBufferStr(buffer, ", ");
    1002          276 :         appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
    1003              : 
    1004              :         /*
    1005              :          * In general we need to quote the value; but to avoid unnecessary
    1006              :          * clutter, do not quote if it is an identifier that would not need
    1007              :          * quoting.  (We could also allow numbers, but that is a bit trickier
    1008              :          * than it looks --- for example, are leading zeroes significant?  We
    1009              :          * don't want to assume very much here about what custom reloptions
    1010              :          * might mean.)
    1011              :          */
    1012          276 :         if (strcmp(fmtId(value), value) == 0)
    1013           32 :             appendPQExpBufferStr(buffer, value);
    1014              :         else
    1015          244 :             appendStringLiteral(buffer, value, encoding, std_strings);
    1016              :     }
    1017              : 
    1018          219 :     free(options);
    1019              : 
    1020          219 :     return true;
    1021              : }
    1022              : 
    1023              : 
    1024              : /*
    1025              :  * processSQLNamePattern
    1026              :  *
    1027              :  * Scan a wildcard-pattern string and generate appropriate WHERE clauses
    1028              :  * to limit the set of objects returned.  The WHERE clauses are appended
    1029              :  * to the already-partially-constructed query in buf.  Returns whether
    1030              :  * any clause was added.
    1031              :  *
    1032              :  * conn: connection query will be sent to (consulted for escaping rules).
    1033              :  * buf: output parameter.
    1034              :  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
    1035              :  * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
    1036              :  * onto the existing WHERE clause).
    1037              :  * force_escape: always quote regexp special characters, even outside
    1038              :  * double quotes (else they are quoted only between double quotes).
    1039              :  * schemavar: name of query variable to match against a schema-name pattern.
    1040              :  * Can be NULL if no schema.
    1041              :  * namevar: name of query variable to match against an object-name pattern.
    1042              :  * altnamevar: NULL, or name of an alternative variable to match against name.
    1043              :  * visibilityrule: clause to use if we want to restrict to visible objects
    1044              :  * (for example, "pg_catalog.pg_table_is_visible(p.oid)").  Can be NULL.
    1045              :  * dbnamebuf: output parameter receiving the database name portion of the
    1046              :  * pattern, if any.  Can be NULL.
    1047              :  * dotcnt: how many separators were parsed from the pattern, by reference.
    1048              :  *
    1049              :  * Formatting note: the text already present in buf should end with a newline.
    1050              :  * The appended text, if any, will end with one too.
    1051              :  */
    1052              : bool
    1053         3664 : processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
    1054              :                       bool have_where, bool force_escape,
    1055              :                       const char *schemavar, const char *namevar,
    1056              :                       const char *altnamevar, const char *visibilityrule,
    1057              :                       PQExpBuffer dbnamebuf, int *dotcnt)
    1058              : {
    1059              :     PQExpBufferData schemabuf;
    1060              :     PQExpBufferData namebuf;
    1061         3664 :     bool        added_clause = false;
    1062              :     int         dcnt;
    1063              : 
    1064              : #define WHEREAND() \
    1065              :     (appendPQExpBufferStr(buf, have_where ? "  AND " : "WHERE "), \
    1066              :      have_where = true, added_clause = true)
    1067              : 
    1068         3664 :     if (dotcnt == NULL)
    1069            6 :         dotcnt = &dcnt;
    1070         3664 :     *dotcnt = 0;
    1071         3664 :     if (pattern == NULL)
    1072              :     {
    1073              :         /* Default: select all visible objects */
    1074          254 :         if (visibilityrule)
    1075              :         {
    1076           60 :             WHEREAND();
    1077           60 :             appendPQExpBuffer(buf, "%s\n", visibilityrule);
    1078              :         }
    1079          254 :         return added_clause;
    1080              :     }
    1081              : 
    1082         3410 :     initPQExpBuffer(&schemabuf);
    1083         3410 :     initPQExpBuffer(&namebuf);
    1084              : 
    1085              :     /*
    1086              :      * Convert shell-style 'pattern' into the regular expression(s) we want to
    1087              :      * execute.  Quoting/escaping into SQL literal format will be done below
    1088              :      * using appendStringLiteralConn().
    1089              :      *
    1090              :      * If the caller provided a schemavar, we want to split the pattern on
    1091              :      * ".", otherwise not.
    1092              :      */
    1093         3410 :     patternToSQLRegex(PQclientEncoding(conn),
    1094              :                       (schemavar ? dbnamebuf : NULL),
    1095              :                       (schemavar ? &schemabuf : NULL),
    1096              :                       &namebuf,
    1097              :                       pattern, force_escape, true, dotcnt);
    1098              : 
    1099              :     /*
    1100              :      * Now decide what we need to emit.  We may run under a hostile
    1101              :      * search_path, so qualify EVERY name.  Note there will be a leading "^("
    1102              :      * in the patterns in any case.
    1103              :      *
    1104              :      * We want the regex matches to use the database's default collation where
    1105              :      * collation-sensitive behavior is required (for example, which characters
    1106              :      * match '\w').  That happened by default before PG v12, but if the server
    1107              :      * is >= v12 then we need to force it through explicit COLLATE clauses,
    1108              :      * otherwise the "C" collation attached to "name" catalog columns wins.
    1109              :      */
    1110         3410 :     if (namevar && namebuf.len > 2)
    1111              :     {
    1112              :         /* We have a name pattern, so constrain the namevar(s) */
    1113              : 
    1114              :         /* Optimize away a "*" pattern */
    1115         3410 :         if (strcmp(namebuf.data, "^(.*)$") != 0)
    1116              :         {
    1117         3368 :             WHEREAND();
    1118         3368 :             if (altnamevar)
    1119              :             {
    1120          114 :                 appendPQExpBuffer(buf,
    1121              :                                   "(%s OPERATOR(pg_catalog.~) ", namevar);
    1122          114 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1123          114 :                 if (PQserverVersion(conn) >= 120000)
    1124          114 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1125          114 :                 appendPQExpBuffer(buf,
    1126              :                                   "\n        OR %s OPERATOR(pg_catalog.~) ",
    1127              :                                   altnamevar);
    1128          114 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1129          114 :                 if (PQserverVersion(conn) >= 120000)
    1130          114 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1131          114 :                 appendPQExpBufferStr(buf, ")\n");
    1132              :             }
    1133              :             else
    1134              :             {
    1135         3254 :                 appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
    1136         3254 :                 appendStringLiteralConn(buf, namebuf.data, conn);
    1137         3254 :                 if (PQserverVersion(conn) >= 120000)
    1138         3254 :                     appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1139         3254 :                 appendPQExpBufferChar(buf, '\n');
    1140              :             }
    1141              :         }
    1142              :     }
    1143              : 
    1144         3410 :     if (schemavar && schemabuf.len > 2)
    1145              :     {
    1146              :         /* We have a schema pattern, so constrain the schemavar */
    1147              : 
    1148              :         /* Optimize away a "*" pattern */
    1149         1441 :         if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
    1150              :         {
    1151          719 :             WHEREAND();
    1152          719 :             appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
    1153          719 :             appendStringLiteralConn(buf, schemabuf.data, conn);
    1154          719 :             if (PQserverVersion(conn) >= 120000)
    1155          719 :                 appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
    1156          719 :             appendPQExpBufferChar(buf, '\n');
    1157              :         }
    1158              :     }
    1159              :     else
    1160              :     {
    1161              :         /* No schema pattern given, so select only visible objects */
    1162         2688 :         if (visibilityrule)
    1163              :         {
    1164         2161 :             WHEREAND();
    1165         2161 :             appendPQExpBuffer(buf, "%s\n", visibilityrule);
    1166              :         }
    1167              :     }
    1168              : 
    1169         3410 :     termPQExpBuffer(&schemabuf);
    1170         3410 :     termPQExpBuffer(&namebuf);
    1171              : 
    1172         3410 :     return added_clause;
    1173              : #undef WHEREAND
    1174              : }
    1175              : 
    1176              : /*
    1177              :  * Transform a possibly qualified shell-style object name pattern into up to
    1178              :  * three SQL-style regular expressions, converting quotes, lower-casing
    1179              :  * unquoted letters, and adjusting shell-style wildcard characters into regexp
    1180              :  * notation.
    1181              :  *
    1182              :  * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
    1183              :  * contains two or more dbname/schema/name separators, we parse the portions of
    1184              :  * the pattern prior to the first and second separators into dbnamebuf and
    1185              :  * schemabuf, and the rest into namebuf.
    1186              :  *
    1187              :  * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
    1188              :  * least one separator, we parse the first portion into schemabuf and the rest
    1189              :  * into namebuf.
    1190              :  *
    1191              :  * Otherwise, we parse all the pattern into namebuf.
    1192              :  *
    1193              :  * If the pattern contains more dotted parts than buffers to parse into, the
    1194              :  * extra dots will be treated as literal characters and written into the
    1195              :  * namebuf, though they will be counted.  Callers should always check the value
    1196              :  * returned by reference in dotcnt and handle this error case appropriately.
    1197              :  *
    1198              :  * We surround the regexps with "^(...)$" to force them to match whole strings,
    1199              :  * as per SQL practice.  We have to have parens in case strings contain "|",
    1200              :  * else the "^" and "$" will be bound into the first and last alternatives
    1201              :  * which is not what we want.  Whether this is done for dbnamebuf is controlled
    1202              :  * by the want_literal_dbname parameter.
    1203              :  *
    1204              :  * The regexps we parse into the buffers are appended to the data (if any)
    1205              :  * already present.  If we parse fewer fields than the number of buffers we
    1206              :  * were given, the extra buffers are unaltered.
    1207              :  *
    1208              :  * encoding: the character encoding for the given pattern
    1209              :  * dbnamebuf: output parameter receiving the database name portion of the
    1210              :  * pattern, if any.  Can be NULL.
    1211              :  * schemabuf: output parameter receiving the schema name portion of the
    1212              :  * pattern, if any.  Can be NULL.
    1213              :  * namebuf: output parameter receiving the database name portion of the
    1214              :  * pattern, if any.  Can be NULL.
    1215              :  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
    1216              :  * force_escape: always quote regexp special characters, even outside
    1217              :  * double quotes (else they are quoted only between double quotes).
    1218              :  * want_literal_dbname: if true, regexp special characters within the database
    1219              :  * name portion of the pattern will not be escaped, nor will the dbname be
    1220              :  * converted into a regular expression.
    1221              :  * dotcnt: output parameter receiving the number of separators parsed from the
    1222              :  * pattern.
    1223              :  */
    1224              : void
    1225         3512 : patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
    1226              :                   PQExpBuffer namebuf, const char *pattern, bool force_escape,
    1227              :                   bool want_literal_dbname, int *dotcnt)
    1228              : {
    1229              :     PQExpBufferData buf[3];
    1230              :     PQExpBufferData left_literal;
    1231              :     PQExpBuffer curbuf;
    1232              :     PQExpBuffer maxbuf;
    1233              :     int         i;
    1234              :     bool        inquotes;
    1235              :     bool        left;
    1236              :     const char *cp;
    1237              : 
    1238              :     Assert(pattern != NULL);
    1239              :     Assert(namebuf != NULL);
    1240              : 
    1241              :     /* callers should never expect "dbname.relname" format */
    1242              :     Assert(dbnamebuf == NULL || schemabuf != NULL);
    1243              :     Assert(dotcnt != NULL);
    1244              : 
    1245         3512 :     *dotcnt = 0;
    1246         3512 :     inquotes = false;
    1247         3512 :     cp = pattern;
    1248              : 
    1249         3512 :     if (dbnamebuf != NULL)
    1250         2931 :         maxbuf = &buf[2];
    1251          581 :     else if (schemabuf != NULL)
    1252           29 :         maxbuf = &buf[1];
    1253              :     else
    1254          552 :         maxbuf = &buf[0];
    1255              : 
    1256         3512 :     curbuf = &buf[0];
    1257         3512 :     if (want_literal_dbname)
    1258              :     {
    1259         3410 :         left = true;
    1260         3410 :         initPQExpBuffer(&left_literal);
    1261              :     }
    1262              :     else
    1263          102 :         left = false;
    1264         3512 :     initPQExpBuffer(curbuf);
    1265         3512 :     appendPQExpBufferStr(curbuf, "^(");
    1266        61778 :     while (*cp)
    1267              :     {
    1268        58266 :         char        ch = *cp;
    1269              : 
    1270        58266 :         if (ch == '"')
    1271              :         {
    1272         1607 :             if (inquotes && cp[1] == '"')
    1273              :             {
    1274              :                 /* emit one quote, stay in inquotes mode */
    1275            3 :                 appendPQExpBufferChar(curbuf, '"');
    1276            3 :                 if (left)
    1277            3 :                     appendPQExpBufferChar(&left_literal, '"');
    1278            3 :                 cp++;
    1279              :             }
    1280              :             else
    1281         1604 :                 inquotes = !inquotes;
    1282         1607 :             cp++;
    1283              :         }
    1284        56659 :         else if (!inquotes && isupper((unsigned char) ch))
    1285              :         {
    1286          120 :             appendPQExpBufferChar(curbuf,
    1287          120 :                                   pg_tolower((unsigned char) ch));
    1288          120 :             if (left)
    1289           75 :                 appendPQExpBufferChar(&left_literal,
    1290           75 :                                       pg_tolower((unsigned char) ch));
    1291          120 :             cp++;
    1292              :         }
    1293        56539 :         else if (!inquotes && ch == '*')
    1294              :         {
    1295          214 :             appendPQExpBufferStr(curbuf, ".*");
    1296          214 :             if (left)
    1297          161 :                 appendPQExpBufferChar(&left_literal, '*');
    1298          214 :             cp++;
    1299              :         }
    1300        56325 :         else if (!inquotes && ch == '?')
    1301              :         {
    1302            3 :             appendPQExpBufferChar(curbuf, '.');
    1303            3 :             if (left)
    1304            3 :                 appendPQExpBufferChar(&left_literal, '?');
    1305            3 :             cp++;
    1306              :         }
    1307        56322 :         else if (!inquotes && ch == '.')
    1308              :         {
    1309         1417 :             left = false;
    1310         1417 :             if (dotcnt)
    1311         1417 :                 (*dotcnt)++;
    1312         1417 :             if (curbuf < maxbuf)
    1313              :             {
    1314         1125 :                 appendPQExpBufferStr(curbuf, ")$");
    1315         1125 :                 curbuf++;
    1316         1125 :                 initPQExpBuffer(curbuf);
    1317         1125 :                 appendPQExpBufferStr(curbuf, "^(");
    1318         1125 :                 cp++;
    1319              :             }
    1320              :             else
    1321          292 :                 appendPQExpBufferChar(curbuf, *cp++);
    1322              :         }
    1323        54905 :         else if (ch == '$')
    1324              :         {
    1325              :             /*
    1326              :              * Dollar is always quoted, whether inside quotes or not. The
    1327              :              * reason is that it's allowed in SQL identifiers, so there's a
    1328              :              * significant use-case for treating it literally, while because
    1329              :              * we anchor the pattern automatically there is no use-case for
    1330              :              * having it possess its regexp meaning.
    1331              :              */
    1332            6 :             appendPQExpBufferStr(curbuf, "\\$");
    1333            6 :             if (left)
    1334            6 :                 appendPQExpBufferChar(&left_literal, '$');
    1335            6 :             cp++;
    1336              :         }
    1337              :         else
    1338              :         {
    1339              :             /*
    1340              :              * Ordinary data character, transfer to pattern
    1341              :              *
    1342              :              * Inside double quotes, or at all times if force_escape is true,
    1343              :              * quote regexp special characters with a backslash to avoid
    1344              :              * regexp errors.  Outside quotes, however, let them pass through
    1345              :              * as-is; this lets knowledgeable users build regexp expressions
    1346              :              * that are more powerful than shell-style patterns.
    1347              :              *
    1348              :              * As an exception to that, though, always quote "[]", as that's
    1349              :              * much more likely to be an attempt to write an array type name
    1350              :              * than it is to be the start of a regexp bracket expression.
    1351              :              */
    1352        54899 :             if ((inquotes || force_escape) &&
    1353        14996 :                 strchr("|*+?()[]{}.^$\\", ch))
    1354         1937 :                 appendPQExpBufferChar(curbuf, '\\');
    1355        52962 :             else if (ch == '[' && cp[1] == ']')
    1356            3 :                 appendPQExpBufferChar(curbuf, '\\');
    1357        54899 :             i = PQmblenBounded(cp, encoding);
    1358       109798 :             while (i--)
    1359              :             {
    1360        54899 :                 if (left)
    1361        38123 :                     appendPQExpBufferChar(&left_literal, *cp);
    1362        54899 :                 appendPQExpBufferChar(curbuf, *cp++);
    1363              :             }
    1364              :         }
    1365              :     }
    1366         3512 :     appendPQExpBufferStr(curbuf, ")$");
    1367              : 
    1368         3512 :     if (namebuf)
    1369              :     {
    1370         3512 :         appendPQExpBufferStr(namebuf, curbuf->data);
    1371         3512 :         termPQExpBuffer(curbuf);
    1372         3512 :         curbuf--;
    1373              :     }
    1374              : 
    1375         3512 :     if (schemabuf && curbuf >= buf)
    1376              :     {
    1377          748 :         appendPQExpBufferStr(schemabuf, curbuf->data);
    1378          748 :         termPQExpBuffer(curbuf);
    1379          748 :         curbuf--;
    1380              :     }
    1381              : 
    1382         3512 :     if (dbnamebuf && curbuf >= buf)
    1383              :     {
    1384          377 :         if (want_literal_dbname)
    1385          360 :             appendPQExpBufferStr(dbnamebuf, left_literal.data);
    1386              :         else
    1387           17 :             appendPQExpBufferStr(dbnamebuf, curbuf->data);
    1388          377 :         termPQExpBuffer(curbuf);
    1389              :     }
    1390              : 
    1391         3512 :     if (want_literal_dbname)
    1392         3410 :         termPQExpBuffer(&left_literal);
    1393         3512 : }
        

Generated by: LCOV version 2.0-1