LCOV - code coverage report
Current view: top level - src/test/modules/test_escape - test_escape.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18beta1 Lines: 248 294 84.4 %
Date: 2025-05-17 06:15:42 Functions: 17 18 94.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * test_escape.c Test escape functions
       3             :  *
       4             :  * Copyright (c) 2022-2025, PostgreSQL Global Development Group
       5             :  *
       6             :  * IDENTIFICATION
       7             :  *      src/test/modules/test_escape/test_escape.c
       8             :  */
       9             : 
      10             : #include "postgres_fe.h"
      11             : 
      12             : #include <string.h>
      13             : #include <stdio.h>
      14             : 
      15             : #include "common/jsonapi.h"
      16             : #include "fe_utils/psqlscan.h"
      17             : #include "fe_utils/string_utils.h"
      18             : #include "getopt_long.h"
      19             : #include "libpq-fe.h"
      20             : #include "mb/pg_wchar.h"
      21             : #include "utils/memdebug.h"
      22             : 
      23             : 
      24             : typedef struct pe_test_config
      25             : {
      26             :     int         verbosity;
      27             :     bool        force_unsupported;
      28             :     const char *conninfo;
      29             :     PGconn     *conn;
      30             : 
      31             :     int         test_count;
      32             :     int         failure_count;
      33             : } pe_test_config;
      34             : 
      35             : #define NEVER_ACCESS_STR "\xff never-to-be-touched"
      36             : 
      37             : 
      38             : /*
      39             :  * An escape function to be tested by this test.
      40             :  */
      41             : typedef struct pe_test_escape_func
      42             : {
      43             :     const char *name;
      44             : 
      45             :     /*
      46             :      * Can the escape method report errors? If so, we validate that it does in
      47             :      * case of various invalid inputs.
      48             :      */
      49             :     bool        reports_errors;
      50             : 
      51             :     /*
      52             :      * Is the escape method known to not handle invalidly encoded input? If
      53             :      * so, we don't run the test unless --force-unsupported is used.
      54             :      */
      55             :     bool        supports_only_valid;
      56             : 
      57             :     /*
      58             :      * Is the escape method known to only handle encodings where no byte in a
      59             :      * multi-byte characters are valid ascii.
      60             :      */
      61             :     bool        supports_only_ascii_overlap;
      62             : 
      63             :     /*
      64             :      * Does the escape function have a length input?
      65             :      */
      66             :     bool        supports_input_length;
      67             : 
      68             :     bool        (*escape) (PGconn *conn, PQExpBuffer target,
      69             :                            const char *unescaped, size_t unescaped_len,
      70             :                            PQExpBuffer escape_err);
      71             : } pe_test_escape_func;
      72             : 
      73             : /*
      74             :  * A single test input for this test.
      75             :  */
      76             : typedef struct pe_test_vector
      77             : {
      78             :     const char *client_encoding;
      79             :     size_t      escape_len;
      80             :     const char *escape;
      81             : } pe_test_vector;
      82             : 
      83             : 
      84             : /*
      85             :  * Callback functions from flex lexer. Not currently used by the test.
      86             :  */
      87             : static const PsqlScanCallbacks test_scan_callbacks = {
      88             :     NULL
      89             : };
      90             : 
      91             : 
      92             : /*
      93             :  * Print the string into buf, making characters outside of plain ascii
      94             :  * somewhat easier to recognize.
      95             :  *
      96             :  * The output format could stand to be improved significantly, it's not at all
      97             :  * unambiguous.
      98             :  */
      99             : static void
     100        2980 : escapify(PQExpBuffer buf, const char *str, size_t len)
     101             : {
     102       17132 :     for (size_t i = 0; i < len; i++)
     103             :     {
     104       14152 :         char        c = *str;
     105             : 
     106       14152 :         if (c == '\n')
     107           0 :             appendPQExpBufferStr(buf, "\\n");
     108       14152 :         else if (c == '\0')
     109         360 :             appendPQExpBufferStr(buf, "\\0");
     110       13792 :         else if (c < ' ' || c > '~')
     111        2484 :             appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c);
     112             :         else
     113       11308 :             appendPQExpBufferChar(buf, c);
     114       14152 :         str++;
     115             :     }
     116        2980 : }
     117             : 
     118             : static void
     119        2466 : report_result(pe_test_config *tc,
     120             :               bool success,
     121             :               const char *testname,
     122             :               const char *details,
     123             :               const char *subname,
     124             :               const char *resultdesc)
     125             : {
     126        2466 :     int         test_id = ++tc->test_count;
     127        2466 :     bool        print_details = true;
     128        2466 :     bool        print_result = true;
     129             : 
     130        2466 :     if (success)
     131             :     {
     132        2466 :         if (tc->verbosity <= 0)
     133        2466 :             print_details = false;
     134        2466 :         if (tc->verbosity < 0)
     135           0 :             print_result = false;
     136             :     }
     137             :     else
     138           0 :         tc->failure_count++;
     139             : 
     140        2466 :     if (print_details)
     141           0 :         printf("%s", details);
     142             : 
     143        2466 :     if (print_result)
     144        2466 :         printf("%s %d - %s: %s: %s\n",
     145             :                success ? "ok" : "not ok",
     146             :                test_id, testname,
     147             :                subname,
     148             :                resultdesc);
     149        2466 : }
     150             : 
     151             : /*
     152             :  * Return true for encodings in which bytes in a multi-byte character look
     153             :  * like valid ascii characters.
     154             :  */
     155             : static bool
     156         130 : encoding_conflicts_ascii(int encoding)
     157             : {
     158             :     /*
     159             :      * We don't store this property directly anywhere, but whether an encoding
     160             :      * is a client-only encoding is a good proxy.
     161             :      */
     162         130 :     if (encoding > PG_ENCODING_BE_LAST)
     163          58 :         return true;
     164          72 :     return false;
     165             : }
     166             : 
     167             : 
     168             : /*
     169             :  * Confirm escaping doesn't read past the end of an allocation.  Consider the
     170             :  * result of malloc(4096), in the absence of freelist entries satisfying the
     171             :  * allocation.  On OpenBSD, reading one byte past the end of that object
     172             :  * yields SIGSEGV.
     173             :  *
     174             :  * Run this test before the program's other tests, so freelists are minimal.
     175             :  * len=4096 didn't SIGSEGV, likely due to free() calls in libpq.  len=8192
     176             :  * did.  Use 128 KiB, to somewhat insulate the outcome from distant new free()
     177             :  * calls and libc changes.
     178             :  */
     179             : static void
     180           2 : test_gb18030_page_multiple(pe_test_config *tc)
     181             : {
     182             :     PQExpBuffer testname;
     183           2 :     size_t      input_len = 0x20000;
     184             :     char       *input;
     185             : 
     186             :     /* prepare input */
     187           2 :     input = pg_malloc(input_len);
     188           2 :     memset(input, '-', input_len - 1);
     189           2 :     input[input_len - 1] = 0xfe;
     190             : 
     191             :     /* name to describe the test */
     192           2 :     testname = createPQExpBuffer();
     193           2 :     appendPQExpBuffer(testname, ">repeat(%c, %zu)", input[0], input_len - 1);
     194           2 :     escapify(testname, input + input_len - 1, 1);
     195           2 :     appendPQExpBuffer(testname, "< - GB18030 - PQescapeLiteral");
     196             : 
     197             :     /* test itself */
     198           2 :     PQsetClientEncoding(tc->conn, "GB18030");
     199           2 :     report_result(tc, PQescapeLiteral(tc->conn, input, input_len) == NULL,
     200           2 :                   testname->data, "",
     201             :                   "input validity vs escape success", "ok");
     202             : 
     203           2 :     destroyPQExpBuffer(testname);
     204           2 :     pg_free(input);
     205           2 : }
     206             : 
     207             : /*
     208             :  * Confirm json parsing doesn't read past the end of an allocation.  This
     209             :  * exercises wchar.c infrastructure like the true "escape" tests do, but this
     210             :  * isn't an "escape" test.
     211             :  */
     212             : static void
     213           2 : test_gb18030_json(pe_test_config *tc)
     214             : {
     215             :     PQExpBuffer raw_buf;
     216             :     PQExpBuffer testname;
     217           2 :     const char  input[] = "{\"\\u\xFE";
     218           2 :     size_t      input_len = sizeof(input) - 1;
     219             :     JsonLexContext *lex;
     220           2 :     JsonSemAction sem = {0};    /* no callbacks */
     221             :     JsonParseErrorType json_error;
     222             : 
     223             :     /* prepare input like test_one_vector_escape() does */
     224           2 :     raw_buf = createPQExpBuffer();
     225           2 :     appendBinaryPQExpBuffer(raw_buf, input, input_len);
     226           2 :     appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
     227             :     VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len],
     228             :                                raw_buf->len - input_len);
     229             : 
     230             :     /* name to describe the test */
     231           2 :     testname = createPQExpBuffer();
     232           2 :     appendPQExpBuffer(testname, ">");
     233           2 :     escapify(testname, input, input_len);
     234           2 :     appendPQExpBuffer(testname, "< - GB18030 - pg_parse_json");
     235             : 
     236             :     /* test itself */
     237           2 :     lex = makeJsonLexContextCstringLen(NULL, raw_buf->data, input_len,
     238             :                                        PG_GB18030, false);
     239           2 :     json_error = pg_parse_json(lex, &sem);
     240           2 :     report_result(tc, json_error == JSON_UNICODE_ESCAPE_FORMAT,
     241           2 :                   testname->data, "",
     242           2 :                   "diagnosed", json_errdetail(json_error, lex));
     243             : 
     244           2 :     freeJsonLexContext(lex);
     245           2 :     destroyPQExpBuffer(testname);
     246           2 :     destroyPQExpBuffer(raw_buf);
     247           2 : }
     248             : 
     249             : 
     250             : static bool
     251         130 : escape_literal(PGconn *conn, PQExpBuffer target,
     252             :                const char *unescaped, size_t unescaped_len,
     253             :                PQExpBuffer escape_err)
     254             : {
     255             :     char       *escaped;
     256             : 
     257         130 :     escaped = PQescapeLiteral(conn, unescaped, unescaped_len);
     258         130 :     if (!escaped)
     259             :     {
     260          82 :         appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
     261          82 :         escape_err->data[escape_err->len - 1] = 0;
     262          82 :         escape_err->len--;
     263          82 :         return false;
     264             :     }
     265             :     else
     266             :     {
     267          48 :         appendPQExpBufferStr(target, escaped);
     268          48 :         PQfreemem(escaped);
     269          48 :         return true;
     270             :     }
     271             : }
     272             : 
     273             : static bool
     274         130 : escape_identifier(PGconn *conn, PQExpBuffer target,
     275             :                   const char *unescaped, size_t unescaped_len,
     276             :                   PQExpBuffer escape_err)
     277             : {
     278             :     char       *escaped;
     279             : 
     280         130 :     escaped = PQescapeIdentifier(conn, unescaped, unescaped_len);
     281         130 :     if (!escaped)
     282             :     {
     283          82 :         appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
     284          82 :         escape_err->data[escape_err->len - 1] = 0;
     285          82 :         escape_err->len--;
     286          82 :         return false;
     287             :     }
     288             :     else
     289             :     {
     290          48 :         appendPQExpBufferStr(target, escaped);
     291          48 :         PQfreemem(escaped);
     292          48 :         return true;
     293             :     }
     294             : }
     295             : 
     296             : static bool
     297         130 : escape_string_conn(PGconn *conn, PQExpBuffer target,
     298             :                    const char *unescaped, size_t unescaped_len,
     299             :                    PQExpBuffer escape_err)
     300             : {
     301             :     int         error;
     302             :     size_t      sz;
     303             : 
     304         130 :     appendPQExpBufferChar(target, '\'');
     305         130 :     enlargePQExpBuffer(target, unescaped_len * 2 + 1);
     306         130 :     sz = PQescapeStringConn(conn, target->data + target->len,
     307             :                             unescaped, unescaped_len,
     308             :                             &error);
     309             : 
     310         130 :     target->len += sz;
     311         130 :     appendPQExpBufferChar(target, '\'');
     312             : 
     313         130 :     if (error)
     314             :     {
     315          82 :         appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
     316          82 :         escape_err->data[escape_err->len - 1] = 0;
     317          82 :         escape_err->len--;
     318          82 :         return false;
     319             :     }
     320             :     else
     321             :     {
     322          48 :         return true;
     323             :     }
     324             : }
     325             : 
     326             : static bool
     327         130 : escape_string(PGconn *conn, PQExpBuffer target,
     328             :               const char *unescaped, size_t unescaped_len,
     329             :               PQExpBuffer escape_err)
     330             : {
     331             :     size_t      sz;
     332             : 
     333         130 :     appendPQExpBufferChar(target, '\'');
     334         130 :     enlargePQExpBuffer(target, unescaped_len * 2 + 1);
     335         130 :     sz = PQescapeString(target->data + target->len,
     336             :                         unescaped, unescaped_len);
     337         130 :     target->len += sz;
     338         130 :     appendPQExpBufferChar(target, '\'');
     339             : 
     340             : 
     341         130 :     return true;
     342             : }
     343             : 
     344             : /*
     345             :  * Escape via s/'/''/.  Non-core drivers invariably wrap libpq or use this
     346             :  * method.  It suffices iff the input passes encoding validation, so it's
     347             :  * marked as supports_only_valid.
     348             :  */
     349             : static bool
     350          20 : escape_replace(PGconn *conn, PQExpBuffer target,
     351             :                const char *unescaped, size_t unescaped_len,
     352             :                PQExpBuffer escape_err)
     353             : {
     354          20 :     const char *s = unescaped;
     355             : 
     356          20 :     appendPQExpBufferChar(target, '\'');
     357             : 
     358          50 :     for (int i = 0; i < unescaped_len; i++)
     359             :     {
     360          30 :         char        c = *s;
     361             : 
     362          30 :         if (c == '\'')
     363             :         {
     364           8 :             appendPQExpBufferStr(target, "''");
     365             :         }
     366             :         else
     367          22 :             appendPQExpBufferChar(target, c);
     368          30 :         s++;
     369             :     }
     370          20 :     appendPQExpBufferChar(target, '\'');
     371             : 
     372          20 :     return true;
     373             : }
     374             : 
     375             : static bool
     376         130 : escape_append_literal(PGconn *conn, PQExpBuffer target,
     377             :                       const char *unescaped, size_t unescaped_len,
     378             :                       PQExpBuffer escape_err)
     379             : {
     380         130 :     appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1);
     381             : 
     382         130 :     return true;
     383             : }
     384             : 
     385             : static bool
     386         130 : escape_fmt_id(PGconn *conn, PQExpBuffer target,
     387             :               const char *unescaped, size_t unescaped_len,
     388             :               PQExpBuffer escape_err)
     389             : {
     390         130 :     setFmtEncoding(PQclientEncoding(conn));
     391         130 :     appendPQExpBufferStr(target, fmtId(unescaped));
     392             : 
     393         130 :     return true;
     394             : }
     395             : 
     396             : static pe_test_escape_func pe_test_escape_funcs[] =
     397             : {
     398             :     {
     399             :         .name = "PQescapeLiteral",
     400             :         .reports_errors = true,
     401             :         .supports_input_length = true,
     402             :         .escape = escape_literal,
     403             :     },
     404             :     {
     405             :         .name = "PQescapeIdentifier",
     406             :         .reports_errors = true,
     407             :         .supports_input_length = true,
     408             :         .escape = escape_identifier
     409             :     },
     410             :     {
     411             :         .name = "PQescapeStringConn",
     412             :         .reports_errors = true,
     413             :         .supports_input_length = true,
     414             :         .escape = escape_string_conn
     415             :     },
     416             :     {
     417             :         .name = "PQescapeString",
     418             :         .reports_errors = false,
     419             :         .supports_input_length = true,
     420             :         .escape = escape_string
     421             :     },
     422             :     {
     423             :         .name = "replace",
     424             :         .reports_errors = false,
     425             :         .supports_only_valid = true,
     426             :         .supports_only_ascii_overlap = true,
     427             :         .supports_input_length = true,
     428             :         .escape = escape_replace
     429             :     },
     430             :     {
     431             :         .name = "appendStringLiteral",
     432             :         .reports_errors = false,
     433             :         .escape = escape_append_literal
     434             :     },
     435             :     {
     436             :         .name = "fmtId",
     437             :         .reports_errors = false,
     438             :         .escape = escape_fmt_id
     439             :     },
     440             : };
     441             : 
     442             : 
     443             : #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
     444             : #define TV_LEN(enc, string, len) {.client_encoding = (enc), .escape=string, .escape_len=len, }
     445             : static pe_test_vector pe_test_vectors[] =
     446             : {
     447             :     /* expected to work sanity checks */
     448             :     TV("UTF-8", "1"),
     449             :     TV("UTF-8", "'"),
     450             :     TV("UTF-8", "\""),
     451             : 
     452             :     TV("UTF-8", "\'"),
     453             :     TV("UTF-8", "\""),
     454             : 
     455             :     TV("UTF-8", "\\"),
     456             : 
     457             :     TV("UTF-8", "\\'"),
     458             :     TV("UTF-8", "\\\""),
     459             : 
     460             :     /* trailing multi-byte character, paddable in available space */
     461             :     TV("UTF-8", "1\xC0"),
     462             :     TV("UTF-8", "1\xE0 "),
     463             :     TV("UTF-8", "1\xF0 "),
     464             :     TV("UTF-8", "1\xF0  "),
     465             :     TV("UTF-8", "1\xF0   "),
     466             : 
     467             :     /* trailing multi-byte character, not enough space to pad */
     468             :     TV("UTF-8", "1\xE0"),
     469             :     TV("UTF-8", "1\xF0"),
     470             :     TV("UTF-8", "\xF0"),
     471             : 
     472             :     /* try to smuggle in something in invalid characters */
     473             :     TV("UTF-8", "1\xE0'"),
     474             :     TV("UTF-8", "1\xE0\""),
     475             :     TV("UTF-8", "1\xF0'"),
     476             :     TV("UTF-8", "1\xF0\""),
     477             :     TV("UTF-8", "1\xF0'; "),
     478             :     TV("UTF-8", "1\xF0\"; "),
     479             :     TV("UTF-8", "1\xF0';;;;"),
     480             :     TV("UTF-8", "1\xF0  ';;;;"),
     481             :     TV("UTF-8", "1\xF0  \";;;;"),
     482             :     TV("UTF-8", "1\xE0'; \\l ; "),
     483             :     TV("UTF-8", "1\xE0\"; \\l ; "),
     484             : 
     485             :     /* null byte handling */
     486             :     TV("UTF-8", "some\0thing"),
     487             :     TV("UTF-8", "some\0"),
     488             :     TV("UTF-8", "some\xF0'\0"),
     489             :     TV("UTF-8", "some\xF0'\0'"),
     490             :     TV("UTF-8", "some\xF0" "ab\0'"),
     491             : 
     492             :     /* GB18030's 4 byte encoding requires a 2nd byte limited values */
     493             :     TV("GB18030", "\x90\x31"),
     494             :     TV("GB18030", "\\\x81\x5c'"),
     495             :     TV("GB18030", "\\\x81\x5c\""),
     496             :     TV("GB18030", "\\\x81\x5c\0'"),
     497             : 
     498             :     /*
     499             :      * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but
     500             :      * that requires encoding verification to know. E.g. replace_string()
     501             :      * doesn't cope.
     502             :      */
     503             :     TV("GB18030", "\\\x81';"),
     504             :     TV("GB18030", "\\\x81\";"),
     505             : 
     506             :     /*
     507             :      * \x81 indicates a 2 byte char. \ is a valid second character.
     508             :      */
     509             :     TV("GB18030", "\\\x81\\';"),
     510             :     TV("GB18030", "\\\x81\\\";"),
     511             :     TV("GB18030", "\\\x81\0;"),
     512             :     TV("GB18030", "\\\x81\0'"),
     513             :     TV("GB18030", "\\\x81'\0"),
     514             : 
     515             :     TV("SJIS", "\xF0\x40;"),
     516             : 
     517             :     TV("SJIS", "\xF0';"),
     518             :     TV("SJIS", "\xF0\";"),
     519             :     TV("SJIS", "\xF0\0'"),
     520             :     TV("SJIS", "\\\xF0\\';"),
     521             :     TV("SJIS", "\\\xF0\\\";"),
     522             : 
     523             :     TV("gbk", "\x80';"),
     524             :     TV("gbk", "\x80"),
     525             :     TV("gbk", "\x80'"),
     526             :     TV("gbk", "\x80\""),
     527             :     TV("gbk", "\x80\\"),
     528             : 
     529             :     TV("mule_internal", "\\\x9c';\0;"),
     530             : 
     531             :     TV("sql_ascii", "1\xC0'"),
     532             : 
     533             :     /*
     534             :      * Testcases that are not null terminated for the specified input length.
     535             :      * That's interesting to verify that escape functions don't read beyond
     536             :      * the intended input length.
     537             :      *
     538             :      * One interesting special case is GB18030, which has the odd behaviour
     539             :      * needing to read beyond the first byte to determine the length of a
     540             :      * multi-byte character.
     541             :      */
     542             :     TV_LEN("gbk", "\x80", 1),
     543             :     TV_LEN("GB18030", "\x80", 1),
     544             :     TV_LEN("GB18030", "\x80\0", 2),
     545             :     TV_LEN("GB18030", "\x80\x30", 2),
     546             :     TV_LEN("GB18030", "\x80\x30\0", 3),
     547             :     TV_LEN("GB18030", "\x80\x30\x30", 3),
     548             :     TV_LEN("GB18030", "\x80\x30\x30\0", 4),
     549             :     TV_LEN("UTF-8", "\xC3\xb6  ", 1),
     550             :     TV_LEN("UTF-8", "\xC3\xb6  ", 2),
     551             : };
     552             : 
     553             : 
     554             : static const char *
     555         636 : scan_res_s(PsqlScanResult res)
     556             : {
     557             : #define TOSTR_CASE(sym) case sym: return #sym
     558             : 
     559         636 :     switch (res)
     560             :     {
     561           0 :             TOSTR_CASE(PSCAN_SEMICOLON);
     562           0 :             TOSTR_CASE(PSCAN_BACKSLASH);
     563           0 :             TOSTR_CASE(PSCAN_INCOMPLETE);
     564         636 :             TOSTR_CASE(PSCAN_EOL);
     565             :     }
     566             : 
     567           0 :     pg_unreachable();
     568             :     return "";                    /* silence compiler */
     569             : }
     570             : 
     571             : /*
     572             :  * Verify that psql parses the input as a single statement. If this property
     573             :  * is violated, the escape function does not effectively protect against
     574             :  * smuggling in a second statement.
     575             :  */
     576             : static void
     577         636 : test_psql_parse(pe_test_config *tc, PQExpBuffer testname,
     578             :                 PQExpBuffer input_buf, PQExpBuffer details)
     579             : {
     580             :     PsqlScanState scan_state;
     581             :     PsqlScanResult scan_result;
     582             :     PQExpBuffer query_buf;
     583         636 :     promptStatus_t prompt_status = PROMPT_READY;
     584         636 :     int         matches = 0;
     585             :     bool        test_fails;
     586             :     const char *resdesc;
     587             : 
     588         636 :     query_buf = createPQExpBuffer();
     589             : 
     590         636 :     scan_state = psql_scan_create(&test_scan_callbacks);
     591             : 
     592             :     /*
     593             :      * TODO: This hardcodes standard conforming strings, it would be useful to
     594             :      * test without as well.
     595             :      */
     596         636 :     psql_scan_setup(scan_state, input_buf->data, input_buf->len,
     597         636 :                     PQclientEncoding(tc->conn), 1);
     598             : 
     599             :     do
     600             :     {
     601         636 :         resetPQExpBuffer(query_buf);
     602             : 
     603         636 :         scan_result = psql_scan(scan_state, query_buf,
     604             :                                 &prompt_status);
     605             : 
     606         636 :         appendPQExpBuffer(details,
     607             :                           "#\t\t %d: scan_result: %s prompt: %u, query_buf: ",
     608             :                           matches, scan_res_s(scan_result), prompt_status);
     609         636 :         escapify(details, query_buf->data, query_buf->len);
     610         636 :         appendPQExpBufferChar(details, '\n');
     611             : 
     612         636 :         matches++;
     613             :     }
     614         636 :     while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL);
     615             : 
     616         636 :     psql_scan_destroy(scan_state);
     617         636 :     destroyPQExpBuffer(query_buf);
     618             : 
     619         636 :     test_fails = matches > 1 || scan_result != PSCAN_EOL;
     620             : 
     621         636 :     if (matches > 1)
     622           0 :         resdesc = "more than one match";
     623         636 :     else if (scan_result != PSCAN_EOL)
     624           0 :         resdesc = "unexpected end state";
     625             :     else
     626         636 :         resdesc = "ok";
     627             : 
     628         636 :     report_result(tc, !test_fails, testname->data, details->data,
     629             :                   "psql parse",
     630         636 :                   resdesc);
     631         636 : }
     632             : 
     633             : static void
     634         910 : test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef)
     635             : {
     636             :     PQExpBuffer testname;
     637             :     PQExpBuffer details;
     638             :     PQExpBuffer raw_buf;
     639             :     PQExpBuffer escape_buf;
     640             :     PQExpBuffer escape_err;
     641             :     size_t      input_encoding_validlen;
     642             :     bool        input_encoding_valid;
     643             :     size_t      input_encoding0_validlen;
     644             :     bool        input_encoding0_valid;
     645             :     bool        escape_success;
     646             :     size_t      escape_encoding_length;
     647             :     bool        escape_encoding_valid;
     648             : 
     649         910 :     escape_err = createPQExpBuffer();
     650         910 :     testname = createPQExpBuffer();
     651         910 :     details = createPQExpBuffer();
     652         910 :     raw_buf = createPQExpBuffer();
     653         910 :     escape_buf = createPQExpBuffer();
     654             : 
     655        1040 :     if (ef->supports_only_ascii_overlap &&
     656         130 :         encoding_conflicts_ascii(PQclientEncoding(tc->conn)))
     657             :     {
     658          58 :         goto out;
     659             :     }
     660             : 
     661             :     /* name to describe the test */
     662         852 :     appendPQExpBufferChar(testname, '>');
     663         852 :     escapify(testname, tv->escape, tv->escape_len);
     664         852 :     appendPQExpBuffer(testname, "< - %s - %s",
     665             :                       tv->client_encoding, ef->name);
     666             : 
     667             :     /* details to describe the test, to allow for debugging */
     668         852 :     appendPQExpBuffer(details, "#\t input: %zd bytes: ",
     669             :                       tv->escape_len);
     670         852 :     escapify(details, tv->escape, tv->escape_len);
     671         852 :     appendPQExpBufferChar(details, '\n');
     672         852 :     appendPQExpBuffer(details, "#\t encoding: %s\n",
     673             :                       tv->client_encoding);
     674             : 
     675             : 
     676             :     /* check encoding of input, to compare with after the test */
     677         852 :     input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
     678             :                                                       tv->escape,
     679         852 :                                                       tv->escape_len);
     680         852 :     input_encoding_valid = input_encoding_validlen == tv->escape_len;
     681         852 :     appendPQExpBuffer(details, "#\t input encoding valid: %d\n",
     682             :                       input_encoding_valid);
     683             : 
     684         852 :     input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
     685             :                                                        tv->escape,
     686         852 :                                                        strnlen(tv->escape, tv->escape_len));
     687         852 :     input_encoding0_valid = input_encoding0_validlen == strnlen(tv->escape, tv->escape_len);
     688         852 :     appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n",
     689             :                       input_encoding0_valid);
     690             : 
     691         852 :     appendPQExpBuffer(details, "#\t escape func: %s\n",
     692             :                       ef->name);
     693             : 
     694         852 :     if (!input_encoding_valid && ef->supports_only_valid
     695          52 :         && !tc->force_unsupported)
     696          52 :         goto out;
     697             : 
     698             : 
     699             :     /*
     700             :      * Put the to-be-escaped data into a buffer, so that we
     701             :      *
     702             :      * a) can mark memory beyond end of the string as inaccessible when using
     703             :      * valgrind
     704             :      *
     705             :      * b) can append extra data beyond the length passed to the escape
     706             :      * function, to verify that that data is not processed.
     707             :      *
     708             :      * TODO: Should we instead/additionally escape twice, once with unmodified
     709             :      * and once with appended input? That way we could compare the two.
     710             :      */
     711         800 :     appendBinaryPQExpBuffer(raw_buf, tv->escape, tv->escape_len);
     712             : 
     713         800 :     if (ef->supports_input_length)
     714             :     {
     715             :         /*
     716             :          * Append likely invalid string that does *not* contain a null byte
     717             :          * (which'd prevent some invalid accesses to later memory).
     718             :          */
     719         540 :         appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
     720             : 
     721             :         VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len],
     722             :                                    raw_buf->len - tv->escape_len);
     723             :     }
     724             :     else
     725             :     {
     726             :         /* append invalid string, after \0 */
     727         260 :         appendPQExpBufferChar(raw_buf, 0);
     728         260 :         appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
     729             : 
     730             :         VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len + 1],
     731             :                                    raw_buf->len - tv->escape_len - 1);
     732             :     }
     733             : 
     734             :     /* call the to-be-tested escape function */
     735         800 :     escape_success = ef->escape(tc->conn, escape_buf,
     736         800 :                                 raw_buf->data, tv->escape_len,
     737             :                                 escape_err);
     738         800 :     if (!escape_success)
     739             :     {
     740         246 :         appendPQExpBuffer(details, "#\t escape error: %s\n",
     741             :                           escape_err->data);
     742             :     }
     743             : 
     744         800 :     if (escape_buf->len > 0)
     745             :     {
     746             :         bool        contains_never;
     747             : 
     748         636 :         appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len);
     749         636 :         escapify(details, escape_buf->data, escape_buf->len);
     750         636 :         appendPQExpBufferChar(details, '\n');
     751             : 
     752         636 :         escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
     753         636 :                                                          escape_buf->data,
     754         636 :                                                          escape_buf->len);
     755         636 :         escape_encoding_valid = escape_encoding_length == escape_buf->len;
     756             : 
     757         636 :         appendPQExpBuffer(details, "#\t escape encoding valid: %d\n",
     758             :                           escape_encoding_valid);
     759             : 
     760             :         /*
     761             :          * Verify that no data beyond the end of the input is included in the
     762             :          * escaped string.  It'd be better to use something like memmem()
     763             :          * here, but that's not available everywhere.
     764             :          */
     765         636 :         contains_never = strstr(escape_buf->data, NEVER_ACCESS_STR) == NULL;
     766         636 :         report_result(tc, contains_never, testname->data, details->data,
     767             :                       "escaped data beyond end of input",
     768             :                       contains_never ? "no" : "all secrets revealed");
     769             :     }
     770             :     else
     771             :     {
     772         164 :         escape_encoding_length = 0;
     773         164 :         escape_encoding_valid = 1;
     774             :     }
     775             : 
     776             :     /*
     777             :      * If the test reports errors, and the input was invalidly encoded,
     778             :      * escaping should fail.  One edge-case that we accept for now is that the
     779             :      * input could have an embedded null byte, which the escape functions will
     780             :      * just treat as a shorter string. If the encoding error is after the zero
     781             :      * byte, the output thus won't contain it.
     782             :      */
     783         800 :     if (ef->reports_errors)
     784             :     {
     785         390 :         bool        ok = true;
     786         390 :         const char *resdesc = "ok";
     787             : 
     788         390 :         if (escape_success)
     789             :         {
     790         144 :             if (!input_encoding0_valid)
     791             :             {
     792           0 :                 ok = false;
     793           0 :                 resdesc = "invalid input escaped successfully";
     794             :             }
     795         144 :             else if (!input_encoding_valid)
     796          18 :                 resdesc = "invalid input escaped successfully, due to zero byte";
     797             :         }
     798             :         else
     799             :         {
     800         246 :             if (input_encoding0_valid)
     801             :             {
     802           0 :                 ok = false;
     803           0 :                 resdesc = "valid input failed to escape";
     804             :             }
     805         246 :             else if (input_encoding_valid)
     806           0 :                 resdesc = "valid input failed to escape, due to zero byte";
     807             :         }
     808             : 
     809         390 :         report_result(tc, ok, testname->data, details->data,
     810             :                       "input validity vs escape success",
     811             :                       resdesc);
     812             :     }
     813             : 
     814             :     /*
     815             :      * If the input is invalidly encoded, the output should also be invalidly
     816             :      * encoded. We accept the same zero-byte edge case as above.
     817             :      */
     818             :     {
     819         800 :         bool        ok = true;
     820         800 :         const char *resdesc = "ok";
     821             : 
     822         800 :         if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid)
     823             :         {
     824          36 :             resdesc = "invalid input produced valid output, due to zero byte";
     825             :         }
     826         764 :         else if (input_encoding0_valid && !escape_encoding_valid)
     827             :         {
     828           0 :             ok = false;
     829           0 :             resdesc = "valid input produced invalid output";
     830             :         }
     831         764 :         else if (!input_encoding0_valid &&
     832         492 :                  (!ef->reports_errors || escape_success) &&
     833             :                  escape_encoding_valid)
     834             :         {
     835           0 :             ok = false;
     836           0 :             resdesc = "invalid input produced valid output";
     837             :         }
     838             : 
     839         800 :         report_result(tc, ok, testname->data, details->data,
     840             :                       "input and escaped encoding validity",
     841             :                       resdesc);
     842             :     }
     843             : 
     844             :     /*
     845             :      * Test psql parsing whenever we get any string back, even if the escape
     846             :      * function returned a failure.
     847             :      */
     848         800 :     if (escape_buf->len > 0)
     849             :     {
     850         636 :         test_psql_parse(tc, testname,
     851             :                         escape_buf, details);
     852             :     }
     853             : 
     854         164 : out:
     855         910 :     destroyPQExpBuffer(escape_err);
     856         910 :     destroyPQExpBuffer(details);
     857         910 :     destroyPQExpBuffer(testname);
     858         910 :     destroyPQExpBuffer(escape_buf);
     859         910 :     destroyPQExpBuffer(raw_buf);
     860         910 : }
     861             : 
     862             : static void
     863         130 : test_one_vector(pe_test_config *tc, const pe_test_vector *tv)
     864             : {
     865         130 :     if (PQsetClientEncoding(tc->conn, tv->client_encoding))
     866             :     {
     867           0 :         fprintf(stderr, "failed to set encoding to %s:\n%s\n",
     868           0 :                 tv->client_encoding, PQerrorMessage(tc->conn));
     869           0 :         exit(1);
     870             :     }
     871             : 
     872        1040 :     for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++)
     873             :     {
     874         910 :         const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff];
     875             : 
     876         910 :         test_one_vector_escape(tc, tv, ef);
     877             :     }
     878         130 : }
     879             : 
     880             : static void
     881           0 : usage(const char *hint)
     882             : {
     883           0 :     if (hint)
     884           0 :         fprintf(stderr, "Error: %s\n\n", hint);
     885             : 
     886           0 :     printf("PostgreSQL escape function test\n"
     887             :            "\n"
     888             :            "Usage:\n"
     889             :            "  test_escape --conninfo=CONNINFO [OPTIONS]\n"
     890             :            "\n"
     891             :            "Options:\n"
     892             :            "  -h, --help                show this help\n"
     893             :            "  -c, --conninfo=CONNINFO   connection information to use\n"
     894             :            "  -v, --verbose             show test details even for successes\n"
     895             :            "  -q, --quiet               only show failures\n"
     896             :            "  -f, --force-unsupported   test invalid input even if unsupported\n"
     897             :         );
     898             : 
     899           0 :     if (hint)
     900           0 :         exit(1);
     901           0 : }
     902             : 
     903             : int
     904           2 : main(int argc, char *argv[])
     905             : {
     906           2 :     pe_test_config tc = {0};
     907             :     int         c;
     908             :     int         option_index;
     909             : 
     910             :     static const struct option long_options[] = {
     911             :         {"help", no_argument, NULL, 'h'},
     912             :         {"conninfo", required_argument, NULL, 'c'},
     913             :         {"verbose", no_argument, NULL, 'v'},
     914             :         {"quiet", no_argument, NULL, 'q'},
     915             :         {"force-unsupported", no_argument, NULL, 'f'},
     916             :         {NULL, 0, NULL, 0},
     917             :     };
     918             : 
     919           6 :     while ((c = getopt_long(argc, argv, "c:fhqv", long_options, &option_index)) != -1)
     920             :     {
     921           2 :         switch (c)
     922             :         {
     923           0 :             case 'h':
     924           0 :                 usage(NULL);
     925           0 :                 exit(0);
     926             :                 break;
     927           2 :             case 'c':
     928           2 :                 tc.conninfo = optarg;
     929           2 :                 break;
     930           0 :             case 'v':
     931           0 :                 tc.verbosity++;
     932           0 :                 break;
     933           0 :             case 'q':
     934           0 :                 tc.verbosity--;
     935           0 :                 break;
     936           0 :             case 'f':
     937           0 :                 tc.force_unsupported = true;
     938           0 :                 break;
     939             :         }
     940           4 :     }
     941             : 
     942           2 :     if (argc - optind >= 1)
     943           0 :         usage("unused option(s) specified");
     944             : 
     945           2 :     if (tc.conninfo == NULL)
     946           0 :         usage("--conninfo needs to be specified");
     947             : 
     948           2 :     tc.conn = PQconnectdb(tc.conninfo);
     949             : 
     950           2 :     if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK)
     951             :     {
     952           0 :         fprintf(stderr, "could not connect: %s\n",
     953           0 :                 PQerrorMessage(tc.conn));
     954           0 :         exit(1);
     955             :     }
     956             : 
     957           2 :     test_gb18030_page_multiple(&tc);
     958           2 :     test_gb18030_json(&tc);
     959             : 
     960         132 :     for (int i = 0; i < lengthof(pe_test_vectors); i++)
     961             :     {
     962         130 :         test_one_vector(&tc, &pe_test_vectors[i]);
     963             :     }
     964             : 
     965           2 :     PQfinish(tc.conn);
     966             : 
     967           2 :     printf("# %d failures\n", tc.failure_count);
     968           2 :     printf("1..%d\n", tc.test_count);
     969           2 :     return tc.failure_count > 0;
     970             : }

Generated by: LCOV version 1.14