Line data Source code
1 : /*
2 : * test_escape.c Test escape functions
3 : *
4 : * Copyright (c) 2022-2025, PostgreSQL Global Development Group
5 : *
6 : * IDENTIFICATION
7 : * src/test/modules/test_escape/test_escape.c
8 : */
9 :
10 : #include "postgres_fe.h"
11 :
12 : #include <string.h>
13 : #include <stdio.h>
14 :
15 : #include "common/jsonapi.h"
16 : #include "fe_utils/psqlscan.h"
17 : #include "fe_utils/string_utils.h"
18 : #include "getopt_long.h"
19 : #include "libpq-fe.h"
20 : #include "mb/pg_wchar.h"
21 : #include "utils/memdebug.h"
22 :
23 :
24 : typedef struct pe_test_config
25 : {
26 : int verbosity;
27 : bool force_unsupported;
28 : const char *conninfo;
29 : PGconn *conn;
30 :
31 : int test_count;
32 : int failure_count;
33 : } pe_test_config;
34 :
35 : #define NEVER_ACCESS_STR "\xff never-to-be-touched"
36 :
37 :
38 : /*
39 : * An escape function to be tested by this test.
40 : */
41 : typedef struct pe_test_escape_func
42 : {
43 : const char *name;
44 :
45 : /*
46 : * Can the escape method report errors? If so, we validate that it does in
47 : * case of various invalid inputs.
48 : */
49 : bool reports_errors;
50 :
51 : /*
52 : * Is the escape method known to not handle invalidly encoded input? If
53 : * so, we don't run the test unless --force-unsupported is used.
54 : */
55 : bool supports_only_valid;
56 :
57 : /*
58 : * Is the escape method known to only handle encodings where no byte in a
59 : * multi-byte characters are valid ascii.
60 : */
61 : bool supports_only_ascii_overlap;
62 :
63 : /*
64 : * Does the escape function have a length input?
65 : */
66 : bool supports_input_length;
67 :
68 : bool (*escape) (PGconn *conn, PQExpBuffer target,
69 : const char *unescaped, size_t unescaped_len,
70 : PQExpBuffer escape_err);
71 : } pe_test_escape_func;
72 :
73 : /*
74 : * A single test input for this test.
75 : */
76 : typedef struct pe_test_vector
77 : {
78 : const char *client_encoding;
79 : size_t escape_len;
80 : const char *escape;
81 : } pe_test_vector;
82 :
83 :
84 : /*
85 : * Callback functions from flex lexer. Not currently used by the test.
86 : */
87 : static const PsqlScanCallbacks test_scan_callbacks = {
88 : NULL
89 : };
90 :
91 :
92 : /*
93 : * Print the string into buf, making characters outside of plain ascii
94 : * somewhat easier to recognize.
95 : *
96 : * The output format could stand to be improved significantly, it's not at all
97 : * unambiguous.
98 : */
99 : static void
100 2980 : escapify(PQExpBuffer buf, const char *str, size_t len)
101 : {
102 17132 : for (size_t i = 0; i < len; i++)
103 : {
104 14152 : char c = *str;
105 :
106 14152 : if (c == '\n')
107 0 : appendPQExpBufferStr(buf, "\\n");
108 14152 : else if (c == '\0')
109 360 : appendPQExpBufferStr(buf, "\\0");
110 13792 : else if (c < ' ' || c > '~')
111 2484 : appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c);
112 : else
113 11308 : appendPQExpBufferChar(buf, c);
114 14152 : str++;
115 : }
116 2980 : }
117 :
118 : static void
119 2466 : report_result(pe_test_config *tc,
120 : bool success,
121 : const char *testname,
122 : const char *details,
123 : const char *subname,
124 : const char *resultdesc)
125 : {
126 2466 : int test_id = ++tc->test_count;
127 2466 : bool print_details = true;
128 2466 : bool print_result = true;
129 :
130 2466 : if (success)
131 : {
132 2466 : if (tc->verbosity <= 0)
133 2466 : print_details = false;
134 2466 : if (tc->verbosity < 0)
135 0 : print_result = false;
136 : }
137 : else
138 0 : tc->failure_count++;
139 :
140 2466 : if (print_details)
141 0 : printf("%s", details);
142 :
143 2466 : if (print_result)
144 2466 : printf("%s %d - %s: %s: %s\n",
145 : success ? "ok" : "not ok",
146 : test_id, testname,
147 : subname,
148 : resultdesc);
149 2466 : }
150 :
151 : /*
152 : * Return true for encodings in which bytes in a multi-byte character look
153 : * like valid ascii characters.
154 : */
155 : static bool
156 130 : encoding_conflicts_ascii(int encoding)
157 : {
158 : /*
159 : * We don't store this property directly anywhere, but whether an encoding
160 : * is a client-only encoding is a good proxy.
161 : */
162 130 : if (encoding > PG_ENCODING_BE_LAST)
163 58 : return true;
164 72 : return false;
165 : }
166 :
167 :
168 : /*
169 : * Confirm escaping doesn't read past the end of an allocation. Consider the
170 : * result of malloc(4096), in the absence of freelist entries satisfying the
171 : * allocation. On OpenBSD, reading one byte past the end of that object
172 : * yields SIGSEGV.
173 : *
174 : * Run this test before the program's other tests, so freelists are minimal.
175 : * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192
176 : * did. Use 128 KiB, to somewhat insulate the outcome from distant new free()
177 : * calls and libc changes.
178 : */
179 : static void
180 2 : test_gb18030_page_multiple(pe_test_config *tc)
181 : {
182 : PQExpBuffer testname;
183 2 : size_t input_len = 0x20000;
184 : char *input;
185 :
186 : /* prepare input */
187 2 : input = pg_malloc(input_len);
188 2 : memset(input, '-', input_len - 1);
189 2 : input[input_len - 1] = 0xfe;
190 :
191 : /* name to describe the test */
192 2 : testname = createPQExpBuffer();
193 2 : appendPQExpBuffer(testname, ">repeat(%c, %zu)", input[0], input_len - 1);
194 2 : escapify(testname, input + input_len - 1, 1);
195 2 : appendPQExpBuffer(testname, "< - GB18030 - PQescapeLiteral");
196 :
197 : /* test itself */
198 2 : PQsetClientEncoding(tc->conn, "GB18030");
199 2 : report_result(tc, PQescapeLiteral(tc->conn, input, input_len) == NULL,
200 2 : testname->data, "",
201 : "input validity vs escape success", "ok");
202 :
203 2 : destroyPQExpBuffer(testname);
204 2 : pg_free(input);
205 2 : }
206 :
207 : /*
208 : * Confirm json parsing doesn't read past the end of an allocation. This
209 : * exercises wchar.c infrastructure like the true "escape" tests do, but this
210 : * isn't an "escape" test.
211 : */
212 : static void
213 2 : test_gb18030_json(pe_test_config *tc)
214 : {
215 : PQExpBuffer raw_buf;
216 : PQExpBuffer testname;
217 2 : const char input[] = "{\"\\u\xFE";
218 2 : size_t input_len = sizeof(input) - 1;
219 : JsonLexContext *lex;
220 2 : JsonSemAction sem = {0}; /* no callbacks */
221 : JsonParseErrorType json_error;
222 :
223 : /* prepare input like test_one_vector_escape() does */
224 2 : raw_buf = createPQExpBuffer();
225 2 : appendBinaryPQExpBuffer(raw_buf, input, input_len);
226 2 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
227 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len],
228 : raw_buf->len - input_len);
229 :
230 : /* name to describe the test */
231 2 : testname = createPQExpBuffer();
232 2 : appendPQExpBuffer(testname, ">");
233 2 : escapify(testname, input, input_len);
234 2 : appendPQExpBuffer(testname, "< - GB18030 - pg_parse_json");
235 :
236 : /* test itself */
237 2 : lex = makeJsonLexContextCstringLen(NULL, raw_buf->data, input_len,
238 : PG_GB18030, false);
239 2 : json_error = pg_parse_json(lex, &sem);
240 2 : report_result(tc, json_error == JSON_UNICODE_ESCAPE_FORMAT,
241 2 : testname->data, "",
242 2 : "diagnosed", json_errdetail(json_error, lex));
243 :
244 2 : freeJsonLexContext(lex);
245 2 : destroyPQExpBuffer(testname);
246 2 : destroyPQExpBuffer(raw_buf);
247 2 : }
248 :
249 :
250 : static bool
251 130 : escape_literal(PGconn *conn, PQExpBuffer target,
252 : const char *unescaped, size_t unescaped_len,
253 : PQExpBuffer escape_err)
254 : {
255 : char *escaped;
256 :
257 130 : escaped = PQescapeLiteral(conn, unescaped, unescaped_len);
258 130 : if (!escaped)
259 : {
260 82 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
261 82 : escape_err->data[escape_err->len - 1] = 0;
262 82 : escape_err->len--;
263 82 : return false;
264 : }
265 : else
266 : {
267 48 : appendPQExpBufferStr(target, escaped);
268 48 : PQfreemem(escaped);
269 48 : return true;
270 : }
271 : }
272 :
273 : static bool
274 130 : escape_identifier(PGconn *conn, PQExpBuffer target,
275 : const char *unescaped, size_t unescaped_len,
276 : PQExpBuffer escape_err)
277 : {
278 : char *escaped;
279 :
280 130 : escaped = PQescapeIdentifier(conn, unescaped, unescaped_len);
281 130 : if (!escaped)
282 : {
283 82 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
284 82 : escape_err->data[escape_err->len - 1] = 0;
285 82 : escape_err->len--;
286 82 : return false;
287 : }
288 : else
289 : {
290 48 : appendPQExpBufferStr(target, escaped);
291 48 : PQfreemem(escaped);
292 48 : return true;
293 : }
294 : }
295 :
296 : static bool
297 130 : escape_string_conn(PGconn *conn, PQExpBuffer target,
298 : const char *unescaped, size_t unescaped_len,
299 : PQExpBuffer escape_err)
300 : {
301 : int error;
302 : size_t sz;
303 :
304 130 : appendPQExpBufferChar(target, '\'');
305 130 : enlargePQExpBuffer(target, unescaped_len * 2 + 1);
306 130 : sz = PQescapeStringConn(conn, target->data + target->len,
307 : unescaped, unescaped_len,
308 : &error);
309 :
310 130 : target->len += sz;
311 130 : appendPQExpBufferChar(target, '\'');
312 :
313 130 : if (error)
314 : {
315 82 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
316 82 : escape_err->data[escape_err->len - 1] = 0;
317 82 : escape_err->len--;
318 82 : return false;
319 : }
320 : else
321 : {
322 48 : return true;
323 : }
324 : }
325 :
326 : static bool
327 130 : escape_string(PGconn *conn, PQExpBuffer target,
328 : const char *unescaped, size_t unescaped_len,
329 : PQExpBuffer escape_err)
330 : {
331 : size_t sz;
332 :
333 130 : appendPQExpBufferChar(target, '\'');
334 130 : enlargePQExpBuffer(target, unescaped_len * 2 + 1);
335 130 : sz = PQescapeString(target->data + target->len,
336 : unescaped, unescaped_len);
337 130 : target->len += sz;
338 130 : appendPQExpBufferChar(target, '\'');
339 :
340 :
341 130 : return true;
342 : }
343 :
344 : /*
345 : * Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this
346 : * method. It suffices iff the input passes encoding validation, so it's
347 : * marked as supports_only_valid.
348 : */
349 : static bool
350 20 : escape_replace(PGconn *conn, PQExpBuffer target,
351 : const char *unescaped, size_t unescaped_len,
352 : PQExpBuffer escape_err)
353 : {
354 20 : const char *s = unescaped;
355 :
356 20 : appendPQExpBufferChar(target, '\'');
357 :
358 50 : for (int i = 0; i < unescaped_len; i++)
359 : {
360 30 : char c = *s;
361 :
362 30 : if (c == '\'')
363 : {
364 8 : appendPQExpBufferStr(target, "''");
365 : }
366 : else
367 22 : appendPQExpBufferChar(target, c);
368 30 : s++;
369 : }
370 20 : appendPQExpBufferChar(target, '\'');
371 :
372 20 : return true;
373 : }
374 :
375 : static bool
376 130 : escape_append_literal(PGconn *conn, PQExpBuffer target,
377 : const char *unescaped, size_t unescaped_len,
378 : PQExpBuffer escape_err)
379 : {
380 130 : appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1);
381 :
382 130 : return true;
383 : }
384 :
385 : static bool
386 130 : escape_fmt_id(PGconn *conn, PQExpBuffer target,
387 : const char *unescaped, size_t unescaped_len,
388 : PQExpBuffer escape_err)
389 : {
390 130 : setFmtEncoding(PQclientEncoding(conn));
391 130 : appendPQExpBufferStr(target, fmtId(unescaped));
392 :
393 130 : return true;
394 : }
395 :
396 : static pe_test_escape_func pe_test_escape_funcs[] =
397 : {
398 : {
399 : .name = "PQescapeLiteral",
400 : .reports_errors = true,
401 : .supports_input_length = true,
402 : .escape = escape_literal,
403 : },
404 : {
405 : .name = "PQescapeIdentifier",
406 : .reports_errors = true,
407 : .supports_input_length = true,
408 : .escape = escape_identifier
409 : },
410 : {
411 : .name = "PQescapeStringConn",
412 : .reports_errors = true,
413 : .supports_input_length = true,
414 : .escape = escape_string_conn
415 : },
416 : {
417 : .name = "PQescapeString",
418 : .reports_errors = false,
419 : .supports_input_length = true,
420 : .escape = escape_string
421 : },
422 : {
423 : .name = "replace",
424 : .reports_errors = false,
425 : .supports_only_valid = true,
426 : .supports_only_ascii_overlap = true,
427 : .supports_input_length = true,
428 : .escape = escape_replace
429 : },
430 : {
431 : .name = "appendStringLiteral",
432 : .reports_errors = false,
433 : .escape = escape_append_literal
434 : },
435 : {
436 : .name = "fmtId",
437 : .reports_errors = false,
438 : .escape = escape_fmt_id
439 : },
440 : };
441 :
442 :
443 : #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
444 : #define TV_LEN(enc, string, len) {.client_encoding = (enc), .escape=string, .escape_len=len, }
445 : static pe_test_vector pe_test_vectors[] =
446 : {
447 : /* expected to work sanity checks */
448 : TV("UTF-8", "1"),
449 : TV("UTF-8", "'"),
450 : TV("UTF-8", "\""),
451 :
452 : TV("UTF-8", "\'"),
453 : TV("UTF-8", "\""),
454 :
455 : TV("UTF-8", "\\"),
456 :
457 : TV("UTF-8", "\\'"),
458 : TV("UTF-8", "\\\""),
459 :
460 : /* trailing multi-byte character, paddable in available space */
461 : TV("UTF-8", "1\xC0"),
462 : TV("UTF-8", "1\xE0 "),
463 : TV("UTF-8", "1\xF0 "),
464 : TV("UTF-8", "1\xF0 "),
465 : TV("UTF-8", "1\xF0 "),
466 :
467 : /* trailing multi-byte character, not enough space to pad */
468 : TV("UTF-8", "1\xE0"),
469 : TV("UTF-8", "1\xF0"),
470 : TV("UTF-8", "\xF0"),
471 :
472 : /* try to smuggle in something in invalid characters */
473 : TV("UTF-8", "1\xE0'"),
474 : TV("UTF-8", "1\xE0\""),
475 : TV("UTF-8", "1\xF0'"),
476 : TV("UTF-8", "1\xF0\""),
477 : TV("UTF-8", "1\xF0'; "),
478 : TV("UTF-8", "1\xF0\"; "),
479 : TV("UTF-8", "1\xF0';;;;"),
480 : TV("UTF-8", "1\xF0 ';;;;"),
481 : TV("UTF-8", "1\xF0 \";;;;"),
482 : TV("UTF-8", "1\xE0'; \\l ; "),
483 : TV("UTF-8", "1\xE0\"; \\l ; "),
484 :
485 : /* null byte handling */
486 : TV("UTF-8", "some\0thing"),
487 : TV("UTF-8", "some\0"),
488 : TV("UTF-8", "some\xF0'\0"),
489 : TV("UTF-8", "some\xF0'\0'"),
490 : TV("UTF-8", "some\xF0" "ab\0'"),
491 :
492 : /* GB18030's 4 byte encoding requires a 2nd byte limited values */
493 : TV("GB18030", "\x90\x31"),
494 : TV("GB18030", "\\\x81\x5c'"),
495 : TV("GB18030", "\\\x81\x5c\""),
496 : TV("GB18030", "\\\x81\x5c\0'"),
497 :
498 : /*
499 : * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but
500 : * that requires encoding verification to know. E.g. replace_string()
501 : * doesn't cope.
502 : */
503 : TV("GB18030", "\\\x81';"),
504 : TV("GB18030", "\\\x81\";"),
505 :
506 : /*
507 : * \x81 indicates a 2 byte char. \ is a valid second character.
508 : */
509 : TV("GB18030", "\\\x81\\';"),
510 : TV("GB18030", "\\\x81\\\";"),
511 : TV("GB18030", "\\\x81\0;"),
512 : TV("GB18030", "\\\x81\0'"),
513 : TV("GB18030", "\\\x81'\0"),
514 :
515 : TV("SJIS", "\xF0\x40;"),
516 :
517 : TV("SJIS", "\xF0';"),
518 : TV("SJIS", "\xF0\";"),
519 : TV("SJIS", "\xF0\0'"),
520 : TV("SJIS", "\\\xF0\\';"),
521 : TV("SJIS", "\\\xF0\\\";"),
522 :
523 : TV("gbk", "\x80';"),
524 : TV("gbk", "\x80"),
525 : TV("gbk", "\x80'"),
526 : TV("gbk", "\x80\""),
527 : TV("gbk", "\x80\\"),
528 :
529 : TV("mule_internal", "\\\x9c';\0;"),
530 :
531 : TV("sql_ascii", "1\xC0'"),
532 :
533 : /*
534 : * Testcases that are not null terminated for the specified input length.
535 : * That's interesting to verify that escape functions don't read beyond
536 : * the intended input length.
537 : *
538 : * One interesting special case is GB18030, which has the odd behaviour
539 : * needing to read beyond the first byte to determine the length of a
540 : * multi-byte character.
541 : */
542 : TV_LEN("gbk", "\x80", 1),
543 : TV_LEN("GB18030", "\x80", 1),
544 : TV_LEN("GB18030", "\x80\0", 2),
545 : TV_LEN("GB18030", "\x80\x30", 2),
546 : TV_LEN("GB18030", "\x80\x30\0", 3),
547 : TV_LEN("GB18030", "\x80\x30\x30", 3),
548 : TV_LEN("GB18030", "\x80\x30\x30\0", 4),
549 : TV_LEN("UTF-8", "\xC3\xb6 ", 1),
550 : TV_LEN("UTF-8", "\xC3\xb6 ", 2),
551 : };
552 :
553 :
554 : static const char *
555 636 : scan_res_s(PsqlScanResult res)
556 : {
557 : #define TOSTR_CASE(sym) case sym: return #sym
558 :
559 636 : switch (res)
560 : {
561 0 : TOSTR_CASE(PSCAN_SEMICOLON);
562 0 : TOSTR_CASE(PSCAN_BACKSLASH);
563 0 : TOSTR_CASE(PSCAN_INCOMPLETE);
564 636 : TOSTR_CASE(PSCAN_EOL);
565 : }
566 :
567 0 : pg_unreachable();
568 : return ""; /* silence compiler */
569 : }
570 :
571 : /*
572 : * Verify that psql parses the input as a single statement. If this property
573 : * is violated, the escape function does not effectively protect against
574 : * smuggling in a second statement.
575 : */
576 : static void
577 636 : test_psql_parse(pe_test_config *tc, PQExpBuffer testname,
578 : PQExpBuffer input_buf, PQExpBuffer details)
579 : {
580 : PsqlScanState scan_state;
581 : PsqlScanResult scan_result;
582 : PQExpBuffer query_buf;
583 636 : promptStatus_t prompt_status = PROMPT_READY;
584 636 : int matches = 0;
585 : bool test_fails;
586 : const char *resdesc;
587 :
588 636 : query_buf = createPQExpBuffer();
589 :
590 636 : scan_state = psql_scan_create(&test_scan_callbacks);
591 :
592 : /*
593 : * TODO: This hardcodes standard conforming strings, it would be useful to
594 : * test without as well.
595 : */
596 636 : psql_scan_setup(scan_state, input_buf->data, input_buf->len,
597 636 : PQclientEncoding(tc->conn), 1);
598 :
599 : do
600 : {
601 636 : resetPQExpBuffer(query_buf);
602 :
603 636 : scan_result = psql_scan(scan_state, query_buf,
604 : &prompt_status);
605 :
606 636 : appendPQExpBuffer(details,
607 : "#\t\t %d: scan_result: %s prompt: %u, query_buf: ",
608 : matches, scan_res_s(scan_result), prompt_status);
609 636 : escapify(details, query_buf->data, query_buf->len);
610 636 : appendPQExpBufferChar(details, '\n');
611 :
612 636 : matches++;
613 : }
614 636 : while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL);
615 :
616 636 : psql_scan_destroy(scan_state);
617 636 : destroyPQExpBuffer(query_buf);
618 :
619 636 : test_fails = matches > 1 || scan_result != PSCAN_EOL;
620 :
621 636 : if (matches > 1)
622 0 : resdesc = "more than one match";
623 636 : else if (scan_result != PSCAN_EOL)
624 0 : resdesc = "unexpected end state";
625 : else
626 636 : resdesc = "ok";
627 :
628 636 : report_result(tc, !test_fails, testname->data, details->data,
629 : "psql parse",
630 636 : resdesc);
631 636 : }
632 :
633 : static void
634 910 : test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef)
635 : {
636 : PQExpBuffer testname;
637 : PQExpBuffer details;
638 : PQExpBuffer raw_buf;
639 : PQExpBuffer escape_buf;
640 : PQExpBuffer escape_err;
641 : size_t input_encoding_validlen;
642 : bool input_encoding_valid;
643 : size_t input_encoding0_validlen;
644 : bool input_encoding0_valid;
645 : bool escape_success;
646 : size_t escape_encoding_length;
647 : bool escape_encoding_valid;
648 :
649 910 : escape_err = createPQExpBuffer();
650 910 : testname = createPQExpBuffer();
651 910 : details = createPQExpBuffer();
652 910 : raw_buf = createPQExpBuffer();
653 910 : escape_buf = createPQExpBuffer();
654 :
655 1040 : if (ef->supports_only_ascii_overlap &&
656 130 : encoding_conflicts_ascii(PQclientEncoding(tc->conn)))
657 : {
658 58 : goto out;
659 : }
660 :
661 : /* name to describe the test */
662 852 : appendPQExpBufferChar(testname, '>');
663 852 : escapify(testname, tv->escape, tv->escape_len);
664 852 : appendPQExpBuffer(testname, "< - %s - %s",
665 : tv->client_encoding, ef->name);
666 :
667 : /* details to describe the test, to allow for debugging */
668 852 : appendPQExpBuffer(details, "#\t input: %zd bytes: ",
669 : tv->escape_len);
670 852 : escapify(details, tv->escape, tv->escape_len);
671 852 : appendPQExpBufferChar(details, '\n');
672 852 : appendPQExpBuffer(details, "#\t encoding: %s\n",
673 : tv->client_encoding);
674 :
675 :
676 : /* check encoding of input, to compare with after the test */
677 852 : input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
678 : tv->escape,
679 852 : tv->escape_len);
680 852 : input_encoding_valid = input_encoding_validlen == tv->escape_len;
681 852 : appendPQExpBuffer(details, "#\t input encoding valid: %d\n",
682 : input_encoding_valid);
683 :
684 852 : input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
685 : tv->escape,
686 852 : strnlen(tv->escape, tv->escape_len));
687 852 : input_encoding0_valid = input_encoding0_validlen == strnlen(tv->escape, tv->escape_len);
688 852 : appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n",
689 : input_encoding0_valid);
690 :
691 852 : appendPQExpBuffer(details, "#\t escape func: %s\n",
692 : ef->name);
693 :
694 852 : if (!input_encoding_valid && ef->supports_only_valid
695 52 : && !tc->force_unsupported)
696 52 : goto out;
697 :
698 :
699 : /*
700 : * Put the to-be-escaped data into a buffer, so that we
701 : *
702 : * a) can mark memory beyond end of the string as inaccessible when using
703 : * valgrind
704 : *
705 : * b) can append extra data beyond the length passed to the escape
706 : * function, to verify that that data is not processed.
707 : *
708 : * TODO: Should we instead/additionally escape twice, once with unmodified
709 : * and once with appended input? That way we could compare the two.
710 : */
711 800 : appendBinaryPQExpBuffer(raw_buf, tv->escape, tv->escape_len);
712 :
713 800 : if (ef->supports_input_length)
714 : {
715 : /*
716 : * Append likely invalid string that does *not* contain a null byte
717 : * (which'd prevent some invalid accesses to later memory).
718 : */
719 540 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
720 :
721 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len],
722 : raw_buf->len - tv->escape_len);
723 : }
724 : else
725 : {
726 : /* append invalid string, after \0 */
727 260 : appendPQExpBufferChar(raw_buf, 0);
728 260 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
729 :
730 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len + 1],
731 : raw_buf->len - tv->escape_len - 1);
732 : }
733 :
734 : /* call the to-be-tested escape function */
735 800 : escape_success = ef->escape(tc->conn, escape_buf,
736 800 : raw_buf->data, tv->escape_len,
737 : escape_err);
738 800 : if (!escape_success)
739 : {
740 246 : appendPQExpBuffer(details, "#\t escape error: %s\n",
741 : escape_err->data);
742 : }
743 :
744 800 : if (escape_buf->len > 0)
745 : {
746 : bool contains_never;
747 :
748 636 : appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len);
749 636 : escapify(details, escape_buf->data, escape_buf->len);
750 636 : appendPQExpBufferChar(details, '\n');
751 :
752 636 : escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
753 636 : escape_buf->data,
754 636 : escape_buf->len);
755 636 : escape_encoding_valid = escape_encoding_length == escape_buf->len;
756 :
757 636 : appendPQExpBuffer(details, "#\t escape encoding valid: %d\n",
758 : escape_encoding_valid);
759 :
760 : /*
761 : * Verify that no data beyond the end of the input is included in the
762 : * escaped string. It'd be better to use something like memmem()
763 : * here, but that's not available everywhere.
764 : */
765 636 : contains_never = strstr(escape_buf->data, NEVER_ACCESS_STR) == NULL;
766 636 : report_result(tc, contains_never, testname->data, details->data,
767 : "escaped data beyond end of input",
768 : contains_never ? "no" : "all secrets revealed");
769 : }
770 : else
771 : {
772 164 : escape_encoding_length = 0;
773 164 : escape_encoding_valid = 1;
774 : }
775 :
776 : /*
777 : * If the test reports errors, and the input was invalidly encoded,
778 : * escaping should fail. One edge-case that we accept for now is that the
779 : * input could have an embedded null byte, which the escape functions will
780 : * just treat as a shorter string. If the encoding error is after the zero
781 : * byte, the output thus won't contain it.
782 : */
783 800 : if (ef->reports_errors)
784 : {
785 390 : bool ok = true;
786 390 : const char *resdesc = "ok";
787 :
788 390 : if (escape_success)
789 : {
790 144 : if (!input_encoding0_valid)
791 : {
792 0 : ok = false;
793 0 : resdesc = "invalid input escaped successfully";
794 : }
795 144 : else if (!input_encoding_valid)
796 18 : resdesc = "invalid input escaped successfully, due to zero byte";
797 : }
798 : else
799 : {
800 246 : if (input_encoding0_valid)
801 : {
802 0 : ok = false;
803 0 : resdesc = "valid input failed to escape";
804 : }
805 246 : else if (input_encoding_valid)
806 0 : resdesc = "valid input failed to escape, due to zero byte";
807 : }
808 :
809 390 : report_result(tc, ok, testname->data, details->data,
810 : "input validity vs escape success",
811 : resdesc);
812 : }
813 :
814 : /*
815 : * If the input is invalidly encoded, the output should also be invalidly
816 : * encoded. We accept the same zero-byte edge case as above.
817 : */
818 : {
819 800 : bool ok = true;
820 800 : const char *resdesc = "ok";
821 :
822 800 : if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid)
823 : {
824 36 : resdesc = "invalid input produced valid output, due to zero byte";
825 : }
826 764 : else if (input_encoding0_valid && !escape_encoding_valid)
827 : {
828 0 : ok = false;
829 0 : resdesc = "valid input produced invalid output";
830 : }
831 764 : else if (!input_encoding0_valid &&
832 492 : (!ef->reports_errors || escape_success) &&
833 : escape_encoding_valid)
834 : {
835 0 : ok = false;
836 0 : resdesc = "invalid input produced valid output";
837 : }
838 :
839 800 : report_result(tc, ok, testname->data, details->data,
840 : "input and escaped encoding validity",
841 : resdesc);
842 : }
843 :
844 : /*
845 : * Test psql parsing whenever we get any string back, even if the escape
846 : * function returned a failure.
847 : */
848 800 : if (escape_buf->len > 0)
849 : {
850 636 : test_psql_parse(tc, testname,
851 : escape_buf, details);
852 : }
853 :
854 164 : out:
855 910 : destroyPQExpBuffer(escape_err);
856 910 : destroyPQExpBuffer(details);
857 910 : destroyPQExpBuffer(testname);
858 910 : destroyPQExpBuffer(escape_buf);
859 910 : destroyPQExpBuffer(raw_buf);
860 910 : }
861 :
862 : static void
863 130 : test_one_vector(pe_test_config *tc, const pe_test_vector *tv)
864 : {
865 130 : if (PQsetClientEncoding(tc->conn, tv->client_encoding))
866 : {
867 0 : fprintf(stderr, "failed to set encoding to %s:\n%s\n",
868 0 : tv->client_encoding, PQerrorMessage(tc->conn));
869 0 : exit(1);
870 : }
871 :
872 1040 : for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++)
873 : {
874 910 : const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff];
875 :
876 910 : test_one_vector_escape(tc, tv, ef);
877 : }
878 130 : }
879 :
880 : static void
881 0 : usage(const char *hint)
882 : {
883 0 : if (hint)
884 0 : fprintf(stderr, "Error: %s\n\n", hint);
885 :
886 0 : printf("PostgreSQL escape function test\n"
887 : "\n"
888 : "Usage:\n"
889 : " test_escape --conninfo=CONNINFO [OPTIONS]\n"
890 : "\n"
891 : "Options:\n"
892 : " -h, --help show this help\n"
893 : " -c, --conninfo=CONNINFO connection information to use\n"
894 : " -v, --verbose show test details even for successes\n"
895 : " -q, --quiet only show failures\n"
896 : " -f, --force-unsupported test invalid input even if unsupported\n"
897 : );
898 :
899 0 : if (hint)
900 0 : exit(1);
901 0 : }
902 :
903 : int
904 2 : main(int argc, char *argv[])
905 : {
906 2 : pe_test_config tc = {0};
907 : int c;
908 : int option_index;
909 :
910 : static const struct option long_options[] = {
911 : {"help", no_argument, NULL, 'h'},
912 : {"conninfo", required_argument, NULL, 'c'},
913 : {"verbose", no_argument, NULL, 'v'},
914 : {"quiet", no_argument, NULL, 'q'},
915 : {"force-unsupported", no_argument, NULL, 'f'},
916 : {NULL, 0, NULL, 0},
917 : };
918 :
919 6 : while ((c = getopt_long(argc, argv, "c:fhqv", long_options, &option_index)) != -1)
920 : {
921 2 : switch (c)
922 : {
923 0 : case 'h':
924 0 : usage(NULL);
925 0 : exit(0);
926 : break;
927 2 : case 'c':
928 2 : tc.conninfo = optarg;
929 2 : break;
930 0 : case 'v':
931 0 : tc.verbosity++;
932 0 : break;
933 0 : case 'q':
934 0 : tc.verbosity--;
935 0 : break;
936 0 : case 'f':
937 0 : tc.force_unsupported = true;
938 0 : break;
939 : }
940 4 : }
941 :
942 2 : if (argc - optind >= 1)
943 0 : usage("unused option(s) specified");
944 :
945 2 : if (tc.conninfo == NULL)
946 0 : usage("--conninfo needs to be specified");
947 :
948 2 : tc.conn = PQconnectdb(tc.conninfo);
949 :
950 2 : if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK)
951 : {
952 0 : fprintf(stderr, "could not connect: %s\n",
953 0 : PQerrorMessage(tc.conn));
954 0 : exit(1);
955 : }
956 :
957 2 : test_gb18030_page_multiple(&tc);
958 2 : test_gb18030_json(&tc);
959 :
960 132 : for (int i = 0; i < lengthof(pe_test_vectors); i++)
961 : {
962 130 : test_one_vector(&tc, &pe_test_vectors[i]);
963 : }
964 :
965 2 : PQfinish(tc.conn);
966 :
967 2 : printf("# %d failures\n", tc.failure_count);
968 2 : printf("1..%d\n", tc.test_count);
969 2 : return tc.failure_count > 0;
970 : }
|