Line data Source code
1 : /*
2 : * test_escape.c Test escape functions
3 : *
4 : * Copyright (c) 2022-2026, PostgreSQL Global Development Group
5 : *
6 : * IDENTIFICATION
7 : * src/test/modules/test_escape/test_escape.c
8 : */
9 :
10 : #include "postgres_fe.h"
11 :
12 : #include <string.h>
13 : #include <stdio.h>
14 :
15 : #include "common/jsonapi.h"
16 : #include "fe_utils/psqlscan.h"
17 : #include "fe_utils/string_utils.h"
18 : #include "getopt_long.h"
19 : #include "libpq-fe.h"
20 : #include "mb/pg_wchar.h"
21 : #include "utils/memdebug.h"
22 :
23 :
24 : typedef struct pe_test_config
25 : {
26 : int verbosity;
27 : bool force_unsupported;
28 : const char *conninfo;
29 : PGconn *conn;
30 :
31 : int test_count;
32 : int failure_count;
33 : } pe_test_config;
34 :
35 : #define NEVER_ACCESS_STR "\xff never-to-be-touched"
36 :
37 :
38 : /*
39 : * An escape function to be tested by this test.
40 : */
41 : typedef struct pe_test_escape_func
42 : {
43 : const char *name;
44 :
45 : /*
46 : * Can the escape method report errors? If so, we validate that it does in
47 : * case of various invalid inputs.
48 : */
49 : bool reports_errors;
50 :
51 : /*
52 : * Is the escape method known to not handle invalidly encoded input? If
53 : * so, we don't run the test unless --force-unsupported is used.
54 : */
55 : bool supports_only_valid;
56 :
57 : /*
58 : * Is the escape method known to only handle encodings where no byte in a
59 : * multi-byte characters are valid ascii.
60 : */
61 : bool supports_only_ascii_overlap;
62 :
63 : /*
64 : * Does the escape function have a length input?
65 : */
66 : bool supports_input_length;
67 :
68 : bool (*escape) (PGconn *conn, PQExpBuffer target,
69 : const char *unescaped, size_t unescaped_len,
70 : PQExpBuffer escape_err);
71 : } pe_test_escape_func;
72 :
73 : /*
74 : * A single test input for this test.
75 : */
76 : typedef struct pe_test_vector
77 : {
78 : const char *client_encoding;
79 : size_t escape_len;
80 : const char *escape;
81 : } pe_test_vector;
82 :
83 :
84 : /*
85 : * Callback functions from flex lexer. Not currently used by the test.
86 : */
87 : static const PsqlScanCallbacks test_scan_callbacks = {
88 : NULL
89 : };
90 :
91 :
92 : /*
93 : * Print the string into buf, making characters outside of plain ascii
94 : * somewhat easier to recognize.
95 : *
96 : * The output format could stand to be improved significantly, it's not at all
97 : * unambiguous.
98 : */
99 : static void
100 1468 : escapify(PQExpBuffer buf, const char *str, size_t len)
101 : {
102 8398 : for (size_t i = 0; i < len; i++)
103 : {
104 6930 : char c = *str;
105 :
106 6930 : if (c == '\n')
107 0 : appendPQExpBufferStr(buf, "\\n");
108 6930 : else if (c == '\0')
109 166 : appendPQExpBufferStr(buf, "\\0");
110 6764 : else if (c < ' ' || c > '~')
111 1220 : appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c);
112 : else
113 5544 : appendPQExpBufferChar(buf, c);
114 6930 : str++;
115 : }
116 1468 : }
117 :
118 : static void
119 1216 : report_result(pe_test_config *tc,
120 : bool success,
121 : const char *testname,
122 : const char *details,
123 : const char *subname,
124 : const char *resultdesc)
125 : {
126 1216 : int test_id = ++tc->test_count;
127 1216 : bool print_details = true;
128 1216 : bool print_result = true;
129 :
130 1216 : if (success)
131 : {
132 1216 : if (tc->verbosity <= 0)
133 1216 : print_details = false;
134 1216 : if (tc->verbosity < 0)
135 0 : print_result = false;
136 : }
137 : else
138 0 : tc->failure_count++;
139 :
140 1216 : if (print_details)
141 0 : printf("%s", details);
142 :
143 1216 : if (print_result)
144 1216 : printf("%s %d - %s: %s: %s\n",
145 : success ? "ok" : "not ok",
146 : test_id, testname,
147 : subname,
148 : resultdesc);
149 1216 : }
150 :
151 : /*
152 : * Return true for encodings in which bytes in a multi-byte character look
153 : * like valid ascii characters.
154 : */
155 : static bool
156 64 : encoding_conflicts_ascii(int encoding)
157 : {
158 : /*
159 : * We don't store this property directly anywhere, but whether an encoding
160 : * is a client-only encoding is a good proxy.
161 : */
162 64 : if (encoding > PG_ENCODING_BE_LAST)
163 29 : return true;
164 35 : return false;
165 : }
166 :
167 :
168 : /*
169 : * Confirm escaping doesn't read past the end of an allocation. Consider the
170 : * result of malloc(4096), in the absence of freelist entries satisfying the
171 : * allocation. On OpenBSD, reading one byte past the end of that object
172 : * yields SIGSEGV.
173 : *
174 : * Run this test before the program's other tests, so freelists are minimal.
175 : * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192
176 : * did. Use 128 KiB, to somewhat insulate the outcome from distant new free()
177 : * calls and libc changes.
178 : */
179 : static void
180 1 : test_gb18030_page_multiple(pe_test_config *tc)
181 : {
182 : PQExpBuffer testname;
183 1 : size_t input_len = 0x20000;
184 : char *input;
185 :
186 : /* prepare input */
187 1 : input = pg_malloc(input_len);
188 1 : memset(input, '-', input_len - 1);
189 1 : input[input_len - 1] = 0xfe;
190 :
191 : /* name to describe the test */
192 1 : testname = createPQExpBuffer();
193 1 : appendPQExpBuffer(testname, ">repeat(%c, %zu)", input[0], input_len - 1);
194 1 : escapify(testname, input + input_len - 1, 1);
195 1 : appendPQExpBufferStr(testname, "< - GB18030 - PQescapeLiteral");
196 :
197 : /* test itself */
198 1 : PQsetClientEncoding(tc->conn, "GB18030");
199 1 : report_result(tc, PQescapeLiteral(tc->conn, input, input_len) == NULL,
200 1 : testname->data, "",
201 : "input validity vs escape success", "ok");
202 :
203 1 : destroyPQExpBuffer(testname);
204 1 : pg_free(input);
205 1 : }
206 :
207 : /*
208 : * Confirm json parsing doesn't read past the end of an allocation. This
209 : * exercises wchar.c infrastructure like the true "escape" tests do, but this
210 : * isn't an "escape" test.
211 : */
212 : static void
213 1 : test_gb18030_json(pe_test_config *tc)
214 : {
215 : PQExpBuffer raw_buf;
216 : PQExpBuffer testname;
217 1 : const char input[] = "{\"\\u\xFE";
218 1 : size_t input_len = sizeof(input) - 1;
219 : JsonLexContext *lex;
220 1 : JsonSemAction sem = {0}; /* no callbacks */
221 : JsonParseErrorType json_error;
222 :
223 : /* prepare input like test_one_vector_escape() does */
224 1 : raw_buf = createPQExpBuffer();
225 1 : appendBinaryPQExpBuffer(raw_buf, input, input_len);
226 1 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
227 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len],
228 : raw_buf->len - input_len);
229 :
230 : /* name to describe the test */
231 1 : testname = createPQExpBuffer();
232 1 : appendPQExpBufferChar(testname, '>');
233 1 : escapify(testname, input, input_len);
234 1 : appendPQExpBufferStr(testname, "< - GB18030 - pg_parse_json");
235 :
236 : /* test itself */
237 1 : lex = makeJsonLexContextCstringLen(NULL, raw_buf->data, input_len,
238 : PG_GB18030, false);
239 1 : json_error = pg_parse_json(lex, &sem);
240 1 : report_result(tc, json_error == JSON_UNICODE_ESCAPE_FORMAT,
241 1 : testname->data, "",
242 1 : "diagnosed", json_errdetail(json_error, lex));
243 :
244 1 : freeJsonLexContext(lex);
245 1 : destroyPQExpBuffer(testname);
246 1 : destroyPQExpBuffer(raw_buf);
247 1 : }
248 :
249 :
250 : static bool
251 64 : escape_literal(PGconn *conn, PQExpBuffer target,
252 : const char *unescaped, size_t unescaped_len,
253 : PQExpBuffer escape_err)
254 : {
255 : char *escaped;
256 :
257 64 : escaped = PQescapeLiteral(conn, unescaped, unescaped_len);
258 64 : if (!escaped)
259 : {
260 40 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
261 40 : escape_err->data[escape_err->len - 1] = 0;
262 40 : escape_err->len--;
263 40 : return false;
264 : }
265 : else
266 : {
267 24 : appendPQExpBufferStr(target, escaped);
268 24 : PQfreemem(escaped);
269 24 : return true;
270 : }
271 : }
272 :
273 : static bool
274 64 : escape_identifier(PGconn *conn, PQExpBuffer target,
275 : const char *unescaped, size_t unescaped_len,
276 : PQExpBuffer escape_err)
277 : {
278 : char *escaped;
279 :
280 64 : escaped = PQescapeIdentifier(conn, unescaped, unescaped_len);
281 64 : if (!escaped)
282 : {
283 40 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
284 40 : escape_err->data[escape_err->len - 1] = 0;
285 40 : escape_err->len--;
286 40 : return false;
287 : }
288 : else
289 : {
290 24 : appendPQExpBufferStr(target, escaped);
291 24 : PQfreemem(escaped);
292 24 : return true;
293 : }
294 : }
295 :
296 : static bool
297 64 : escape_string_conn(PGconn *conn, PQExpBuffer target,
298 : const char *unescaped, size_t unescaped_len,
299 : PQExpBuffer escape_err)
300 : {
301 : int error;
302 : size_t sz;
303 :
304 64 : appendPQExpBufferChar(target, '\'');
305 64 : enlargePQExpBuffer(target, unescaped_len * 2 + 1);
306 64 : sz = PQescapeStringConn(conn, target->data + target->len,
307 : unescaped, unescaped_len,
308 : &error);
309 :
310 64 : target->len += sz;
311 64 : appendPQExpBufferChar(target, '\'');
312 :
313 64 : if (error)
314 : {
315 40 : appendPQExpBufferStr(escape_err, PQerrorMessage(conn));
316 40 : escape_err->data[escape_err->len - 1] = 0;
317 40 : escape_err->len--;
318 40 : return false;
319 : }
320 : else
321 : {
322 24 : return true;
323 : }
324 : }
325 :
326 : static bool
327 64 : escape_string(PGconn *conn, PQExpBuffer target,
328 : const char *unescaped, size_t unescaped_len,
329 : PQExpBuffer escape_err)
330 : {
331 : size_t sz;
332 :
333 64 : appendPQExpBufferChar(target, '\'');
334 64 : enlargePQExpBuffer(target, unescaped_len * 2 + 1);
335 64 : sz = PQescapeString(target->data + target->len,
336 : unescaped, unescaped_len);
337 64 : target->len += sz;
338 64 : appendPQExpBufferChar(target, '\'');
339 :
340 :
341 64 : return true;
342 : }
343 :
344 : /*
345 : * Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this
346 : * method. It suffices iff the input passes encoding validation, so it's
347 : * marked as supports_only_valid.
348 : */
349 : static bool
350 10 : escape_replace(PGconn *conn, PQExpBuffer target,
351 : const char *unescaped, size_t unescaped_len,
352 : PQExpBuffer escape_err)
353 : {
354 10 : const char *s = unescaped;
355 :
356 10 : appendPQExpBufferChar(target, '\'');
357 :
358 25 : for (int i = 0; i < unescaped_len; i++)
359 : {
360 15 : char c = *s;
361 :
362 15 : if (c == '\'')
363 : {
364 4 : appendPQExpBufferStr(target, "''");
365 : }
366 : else
367 11 : appendPQExpBufferChar(target, c);
368 15 : s++;
369 : }
370 10 : appendPQExpBufferChar(target, '\'');
371 :
372 10 : return true;
373 : }
374 :
375 : static bool
376 64 : escape_append_literal(PGconn *conn, PQExpBuffer target,
377 : const char *unescaped, size_t unescaped_len,
378 : PQExpBuffer escape_err)
379 : {
380 64 : appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1);
381 :
382 64 : return true;
383 : }
384 :
385 : static bool
386 64 : escape_fmt_id(PGconn *conn, PQExpBuffer target,
387 : const char *unescaped, size_t unescaped_len,
388 : PQExpBuffer escape_err)
389 : {
390 64 : setFmtEncoding(PQclientEncoding(conn));
391 64 : appendPQExpBufferStr(target, fmtId(unescaped));
392 :
393 64 : return true;
394 : }
395 :
396 : static pe_test_escape_func pe_test_escape_funcs[] =
397 : {
398 : {
399 : .name = "PQescapeLiteral",
400 : .reports_errors = true,
401 : .supports_input_length = true,
402 : .escape = escape_literal,
403 : },
404 : {
405 : .name = "PQescapeIdentifier",
406 : .reports_errors = true,
407 : .supports_input_length = true,
408 : .escape = escape_identifier
409 : },
410 : {
411 : .name = "PQescapeStringConn",
412 : .reports_errors = true,
413 : .supports_input_length = true,
414 : .escape = escape_string_conn
415 : },
416 : {
417 : .name = "PQescapeString",
418 : .reports_errors = false,
419 : .supports_input_length = true,
420 : .escape = escape_string
421 : },
422 : {
423 : .name = "replace",
424 : .reports_errors = false,
425 : .supports_only_valid = true,
426 : .supports_only_ascii_overlap = true,
427 : .supports_input_length = true,
428 : .escape = escape_replace
429 : },
430 : {
431 : .name = "appendStringLiteral",
432 : .reports_errors = false,
433 : .escape = escape_append_literal
434 : },
435 : {
436 : .name = "fmtId",
437 : .reports_errors = false,
438 : .escape = escape_fmt_id
439 : },
440 : };
441 :
442 :
443 : #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
444 : #define TV_LEN(enc, string, len) {.client_encoding = (enc), .escape=string, .escape_len=len, }
445 : static pe_test_vector pe_test_vectors[] =
446 : {
447 : /* expected to work sanity checks */
448 : TV("UTF-8", "1"),
449 : TV("UTF-8", "'"),
450 : TV("UTF-8", "\""),
451 :
452 : TV("UTF-8", "\'"),
453 : TV("UTF-8", "\""),
454 :
455 : TV("UTF-8", "\\"),
456 :
457 : TV("UTF-8", "\\'"),
458 : TV("UTF-8", "\\\""),
459 :
460 : /* trailing multi-byte character, paddable in available space */
461 : TV("UTF-8", "1\xC0"),
462 : TV("UTF-8", "1\xE0 "),
463 : TV("UTF-8", "1\xF0 "),
464 : TV("UTF-8", "1\xF0 "),
465 : TV("UTF-8", "1\xF0 "),
466 :
467 : /* trailing multi-byte character, not enough space to pad */
468 : TV("UTF-8", "1\xE0"),
469 : TV("UTF-8", "1\xF0"),
470 : TV("UTF-8", "\xF0"),
471 :
472 : /* try to smuggle in something in invalid characters */
473 : TV("UTF-8", "1\xE0'"),
474 : TV("UTF-8", "1\xE0\""),
475 : TV("UTF-8", "1\xF0'"),
476 : TV("UTF-8", "1\xF0\""),
477 : TV("UTF-8", "1\xF0'; "),
478 : TV("UTF-8", "1\xF0\"; "),
479 : TV("UTF-8", "1\xF0';;;;"),
480 : TV("UTF-8", "1\xF0 ';;;;"),
481 : TV("UTF-8", "1\xF0 \";;;;"),
482 : TV("UTF-8", "1\xE0'; \\l ; "),
483 : TV("UTF-8", "1\xE0\"; \\l ; "),
484 :
485 : /* null byte handling */
486 : TV("UTF-8", "some\0thing"),
487 : TV("UTF-8", "some\0"),
488 : TV("UTF-8", "some\xF0'\0"),
489 : TV("UTF-8", "some\xF0'\0'"),
490 : TV("UTF-8", "some\xF0" "ab\0'"),
491 :
492 : /* GB18030's 4 byte encoding requires a 2nd byte limited values */
493 : TV("GB18030", "\x90\x31"),
494 : TV("GB18030", "\\\x81\x5c'"),
495 : TV("GB18030", "\\\x81\x5c\""),
496 : TV("GB18030", "\\\x81\x5c\0'"),
497 :
498 : /*
499 : * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but
500 : * that requires encoding verification to know. E.g. replace_string()
501 : * doesn't cope.
502 : */
503 : TV("GB18030", "\\\x81';"),
504 : TV("GB18030", "\\\x81\";"),
505 :
506 : /*
507 : * \x81 indicates a 2 byte char. \ is a valid second character.
508 : */
509 : TV("GB18030", "\\\x81\\';"),
510 : TV("GB18030", "\\\x81\\\";"),
511 : TV("GB18030", "\\\x81\0;"),
512 : TV("GB18030", "\\\x81\0'"),
513 : TV("GB18030", "\\\x81'\0"),
514 :
515 : TV("SJIS", "\xF0\x40;"),
516 :
517 : TV("SJIS", "\xF0';"),
518 : TV("SJIS", "\xF0\";"),
519 : TV("SJIS", "\xF0\0'"),
520 : TV("SJIS", "\\\xF0\\';"),
521 : TV("SJIS", "\\\xF0\\\";"),
522 :
523 : TV("gbk", "\x80';"),
524 : TV("gbk", "\x80"),
525 : TV("gbk", "\x80'"),
526 : TV("gbk", "\x80\""),
527 : TV("gbk", "\x80\\"),
528 :
529 : TV("sql_ascii", "1\xC0'"),
530 :
531 : /*
532 : * Testcases that are not null terminated for the specified input length.
533 : * That's interesting to verify that escape functions don't read beyond
534 : * the intended input length.
535 : *
536 : * One interesting special case is GB18030, which has the odd behaviour
537 : * needing to read beyond the first byte to determine the length of a
538 : * multi-byte character.
539 : */
540 : TV_LEN("gbk", "\x80", 1),
541 : TV_LEN("GB18030", "\x80", 1),
542 : TV_LEN("GB18030", "\x80\0", 2),
543 : TV_LEN("GB18030", "\x80\x30", 2),
544 : TV_LEN("GB18030", "\x80\x30\0", 3),
545 : TV_LEN("GB18030", "\x80\x30\x30", 3),
546 : TV_LEN("GB18030", "\x80\x30\x30\0", 4),
547 : TV_LEN("UTF-8", "\xC3\xb6 ", 1),
548 : TV_LEN("UTF-8", "\xC3\xb6 ", 2),
549 : };
550 :
551 :
552 : static const char *
553 314 : scan_res_s(PsqlScanResult res)
554 : {
555 : #define TOSTR_CASE(sym) case sym: return #sym
556 :
557 314 : switch (res)
558 : {
559 0 : TOSTR_CASE(PSCAN_SEMICOLON);
560 0 : TOSTR_CASE(PSCAN_BACKSLASH);
561 0 : TOSTR_CASE(PSCAN_INCOMPLETE);
562 314 : TOSTR_CASE(PSCAN_EOL);
563 : }
564 :
565 0 : pg_unreachable();
566 : return ""; /* silence compiler */
567 : }
568 :
569 : /*
570 : * Verify that psql parses the input as a single statement. If this property
571 : * is violated, the escape function does not effectively protect against
572 : * smuggling in a second statement.
573 : */
574 : static void
575 314 : test_psql_parse(pe_test_config *tc, PQExpBuffer testname,
576 : PQExpBuffer input_buf, PQExpBuffer details)
577 : {
578 : PsqlScanState scan_state;
579 : PsqlScanResult scan_result;
580 : PQExpBuffer query_buf;
581 314 : promptStatus_t prompt_status = PROMPT_READY;
582 314 : int matches = 0;
583 : bool test_fails;
584 : const char *resdesc;
585 :
586 314 : query_buf = createPQExpBuffer();
587 :
588 314 : scan_state = psql_scan_create(&test_scan_callbacks);
589 :
590 : /*
591 : * TODO: This hardcodes standard conforming strings, it would be useful to
592 : * test without as well.
593 : */
594 314 : psql_scan_setup(scan_state, input_buf->data, input_buf->len,
595 314 : PQclientEncoding(tc->conn), 1);
596 :
597 : do
598 : {
599 314 : resetPQExpBuffer(query_buf);
600 :
601 314 : scan_result = psql_scan(scan_state, query_buf,
602 : &prompt_status);
603 :
604 314 : appendPQExpBuffer(details,
605 : "#\t\t %d: scan_result: %s prompt: %u, query_buf: ",
606 : matches, scan_res_s(scan_result), prompt_status);
607 314 : escapify(details, query_buf->data, query_buf->len);
608 314 : appendPQExpBufferChar(details, '\n');
609 :
610 314 : matches++;
611 : }
612 314 : while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL);
613 :
614 314 : psql_scan_destroy(scan_state);
615 314 : destroyPQExpBuffer(query_buf);
616 :
617 314 : test_fails = matches > 1 || scan_result != PSCAN_EOL;
618 :
619 314 : if (matches > 1)
620 0 : resdesc = "more than one match";
621 314 : else if (scan_result != PSCAN_EOL)
622 0 : resdesc = "unexpected end state";
623 : else
624 314 : resdesc = "ok";
625 :
626 314 : report_result(tc, !test_fails, testname->data, details->data,
627 : "psql parse",
628 314 : resdesc);
629 314 : }
630 :
631 : static void
632 448 : test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef)
633 : {
634 : PQExpBuffer testname;
635 : PQExpBuffer details;
636 : PQExpBuffer raw_buf;
637 : PQExpBuffer escape_buf;
638 : PQExpBuffer escape_err;
639 : size_t input_encoding_validlen;
640 : bool input_encoding_valid;
641 : size_t input_encoding0_validlen;
642 : bool input_encoding0_valid;
643 : bool escape_success;
644 : size_t escape_encoding_length;
645 : bool escape_encoding_valid;
646 :
647 448 : escape_err = createPQExpBuffer();
648 448 : testname = createPQExpBuffer();
649 448 : details = createPQExpBuffer();
650 448 : raw_buf = createPQExpBuffer();
651 448 : escape_buf = createPQExpBuffer();
652 :
653 512 : if (ef->supports_only_ascii_overlap &&
654 64 : encoding_conflicts_ascii(PQclientEncoding(tc->conn)))
655 : {
656 29 : goto out;
657 : }
658 :
659 : /* name to describe the test */
660 419 : appendPQExpBufferChar(testname, '>');
661 419 : escapify(testname, tv->escape, tv->escape_len);
662 419 : appendPQExpBuffer(testname, "< - %s - %s",
663 419 : tv->client_encoding, ef->name);
664 :
665 : /* details to describe the test, to allow for debugging */
666 419 : appendPQExpBuffer(details, "#\t input: %zd bytes: ",
667 419 : tv->escape_len);
668 419 : escapify(details, tv->escape, tv->escape_len);
669 419 : appendPQExpBufferChar(details, '\n');
670 419 : appendPQExpBuffer(details, "#\t encoding: %s\n",
671 419 : tv->client_encoding);
672 :
673 :
674 : /* check encoding of input, to compare with after the test */
675 419 : input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
676 419 : tv->escape,
677 419 : tv->escape_len);
678 419 : input_encoding_valid = input_encoding_validlen == tv->escape_len;
679 419 : appendPQExpBuffer(details, "#\t input encoding valid: %d\n",
680 : input_encoding_valid);
681 :
682 419 : input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
683 419 : tv->escape,
684 419 : strnlen(tv->escape, tv->escape_len));
685 419 : input_encoding0_valid = input_encoding0_validlen == strnlen(tv->escape, tv->escape_len);
686 419 : appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n",
687 : input_encoding0_valid);
688 :
689 419 : appendPQExpBuffer(details, "#\t escape func: %s\n",
690 419 : ef->name);
691 :
692 419 : if (!input_encoding_valid && ef->supports_only_valid
693 25 : && !tc->force_unsupported)
694 25 : goto out;
695 :
696 :
697 : /*
698 : * Put the to-be-escaped data into a buffer, so that we
699 : *
700 : * a) can mark memory beyond end of the string as inaccessible when using
701 : * valgrind
702 : *
703 : * b) can append extra data beyond the length passed to the escape
704 : * function, to verify that that data is not processed.
705 : *
706 : * TODO: Should we instead/additionally escape twice, once with unmodified
707 : * and once with appended input? That way we could compare the two.
708 : */
709 394 : appendBinaryPQExpBuffer(raw_buf, tv->escape, tv->escape_len);
710 :
711 394 : if (ef->supports_input_length)
712 : {
713 : /*
714 : * Append likely invalid string that does *not* contain a null byte
715 : * (which'd prevent some invalid accesses to later memory).
716 : */
717 266 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
718 :
719 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len],
720 : raw_buf->len - tv->escape_len);
721 : }
722 : else
723 : {
724 : /* append invalid string, after \0 */
725 128 : appendPQExpBufferChar(raw_buf, 0);
726 128 : appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
727 :
728 : VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[tv->escape_len + 1],
729 : raw_buf->len - tv->escape_len - 1);
730 : }
731 :
732 : /* call the to-be-tested escape function */
733 394 : escape_success = ef->escape(tc->conn, escape_buf,
734 394 : raw_buf->data, tv->escape_len,
735 : escape_err);
736 394 : if (!escape_success)
737 : {
738 120 : appendPQExpBuffer(details, "#\t escape error: %s\n",
739 : escape_err->data);
740 : }
741 :
742 394 : if (escape_buf->len > 0)
743 : {
744 : bool contains_never;
745 :
746 314 : appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len);
747 314 : escapify(details, escape_buf->data, escape_buf->len);
748 314 : appendPQExpBufferChar(details, '\n');
749 :
750 314 : escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
751 314 : escape_buf->data,
752 314 : escape_buf->len);
753 314 : escape_encoding_valid = escape_encoding_length == escape_buf->len;
754 :
755 314 : appendPQExpBuffer(details, "#\t escape encoding valid: %d\n",
756 : escape_encoding_valid);
757 :
758 : /*
759 : * Verify that no data beyond the end of the input is included in the
760 : * escaped string. It'd be better to use something like memmem()
761 : * here, but that's not available everywhere.
762 : */
763 314 : contains_never = strstr(escape_buf->data, NEVER_ACCESS_STR) == NULL;
764 314 : report_result(tc, contains_never, testname->data, details->data,
765 : "escaped data beyond end of input",
766 : contains_never ? "no" : "all secrets revealed");
767 : }
768 : else
769 : {
770 80 : escape_encoding_length = 0;
771 80 : escape_encoding_valid = 1;
772 : }
773 :
774 : /*
775 : * If the test reports errors, and the input was invalidly encoded,
776 : * escaping should fail. One edge-case that we accept for now is that the
777 : * input could have an embedded null byte, which the escape functions will
778 : * just treat as a shorter string. If the encoding error is after the zero
779 : * byte, the output thus won't contain it.
780 : */
781 394 : if (ef->reports_errors)
782 : {
783 192 : bool ok = true;
784 192 : const char *resdesc = "ok";
785 :
786 192 : if (escape_success)
787 : {
788 72 : if (!input_encoding0_valid)
789 : {
790 0 : ok = false;
791 0 : resdesc = "invalid input escaped successfully";
792 : }
793 72 : else if (!input_encoding_valid)
794 9 : resdesc = "invalid input escaped successfully, due to zero byte";
795 : }
796 : else
797 : {
798 120 : if (input_encoding0_valid)
799 : {
800 0 : ok = false;
801 0 : resdesc = "valid input failed to escape";
802 : }
803 120 : else if (input_encoding_valid)
804 0 : resdesc = "valid input failed to escape, due to zero byte";
805 : }
806 :
807 192 : report_result(tc, ok, testname->data, details->data,
808 : "input validity vs escape success",
809 : resdesc);
810 : }
811 :
812 : /*
813 : * If the input is invalidly encoded, the output should also be invalidly
814 : * encoded. We accept the same zero-byte edge case as above.
815 : */
816 : {
817 394 : bool ok = true;
818 394 : const char *resdesc = "ok";
819 :
820 394 : if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid)
821 : {
822 18 : resdesc = "invalid input produced valid output, due to zero byte";
823 : }
824 376 : else if (input_encoding0_valid && !escape_encoding_valid)
825 : {
826 0 : ok = false;
827 0 : resdesc = "valid input produced invalid output";
828 : }
829 376 : else if (!input_encoding0_valid &&
830 240 : (!ef->reports_errors || escape_success) &&
831 : escape_encoding_valid)
832 : {
833 0 : ok = false;
834 0 : resdesc = "invalid input produced valid output";
835 : }
836 :
837 394 : report_result(tc, ok, testname->data, details->data,
838 : "input and escaped encoding validity",
839 : resdesc);
840 : }
841 :
842 : /*
843 : * Test psql parsing whenever we get any string back, even if the escape
844 : * function returned a failure.
845 : */
846 394 : if (escape_buf->len > 0)
847 : {
848 314 : test_psql_parse(tc, testname,
849 : escape_buf, details);
850 : }
851 :
852 80 : out:
853 448 : destroyPQExpBuffer(escape_err);
854 448 : destroyPQExpBuffer(details);
855 448 : destroyPQExpBuffer(testname);
856 448 : destroyPQExpBuffer(escape_buf);
857 448 : destroyPQExpBuffer(raw_buf);
858 448 : }
859 :
860 : static void
861 64 : test_one_vector(pe_test_config *tc, const pe_test_vector *tv)
862 : {
863 64 : if (PQsetClientEncoding(tc->conn, tv->client_encoding))
864 : {
865 0 : fprintf(stderr, "failed to set encoding to %s:\n%s\n",
866 0 : tv->client_encoding, PQerrorMessage(tc->conn));
867 0 : exit(1);
868 : }
869 :
870 512 : for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++)
871 : {
872 448 : const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff];
873 :
874 448 : test_one_vector_escape(tc, tv, ef);
875 : }
876 64 : }
877 :
878 : static void
879 0 : usage(const char *hint)
880 : {
881 0 : if (hint)
882 0 : fprintf(stderr, "Error: %s\n\n", hint);
883 :
884 0 : printf("PostgreSQL escape function test\n"
885 : "\n"
886 : "Usage:\n"
887 : " test_escape --conninfo=CONNINFO [OPTIONS]\n"
888 : "\n"
889 : "Options:\n"
890 : " -h, --help show this help\n"
891 : " -c, --conninfo=CONNINFO connection information to use\n"
892 : " -v, --verbose show test details even for successes\n"
893 : " -q, --quiet only show failures\n"
894 : " -f, --force-unsupported test invalid input even if unsupported\n"
895 : );
896 :
897 0 : if (hint)
898 0 : exit(1);
899 0 : }
900 :
901 : int
902 1 : main(int argc, char *argv[])
903 : {
904 1 : pe_test_config tc = {0};
905 : int c;
906 : int option_index;
907 :
908 : static const struct option long_options[] = {
909 : {"help", no_argument, NULL, 'h'},
910 : {"conninfo", required_argument, NULL, 'c'},
911 : {"verbose", no_argument, NULL, 'v'},
912 : {"quiet", no_argument, NULL, 'q'},
913 : {"force-unsupported", no_argument, NULL, 'f'},
914 : {NULL, 0, NULL, 0},
915 : };
916 :
917 3 : while ((c = getopt_long(argc, argv, "c:fhqv", long_options, &option_index)) != -1)
918 : {
919 1 : switch (c)
920 : {
921 0 : case 'h':
922 0 : usage(NULL);
923 0 : exit(0);
924 : break;
925 1 : case 'c':
926 1 : tc.conninfo = optarg;
927 1 : break;
928 0 : case 'v':
929 0 : tc.verbosity++;
930 0 : break;
931 0 : case 'q':
932 0 : tc.verbosity--;
933 0 : break;
934 0 : case 'f':
935 0 : tc.force_unsupported = true;
936 0 : break;
937 : }
938 : }
939 :
940 1 : if (argc - optind >= 1)
941 0 : usage("unused option(s) specified");
942 :
943 1 : if (tc.conninfo == NULL)
944 0 : usage("--conninfo needs to be specified");
945 :
946 1 : tc.conn = PQconnectdb(tc.conninfo);
947 :
948 1 : if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK)
949 : {
950 0 : fprintf(stderr, "could not connect: %s\n",
951 0 : PQerrorMessage(tc.conn));
952 0 : exit(1);
953 : }
954 :
955 1 : test_gb18030_page_multiple(&tc);
956 1 : test_gb18030_json(&tc);
957 :
958 65 : for (int i = 0; i < lengthof(pe_test_vectors); i++)
959 : {
960 64 : test_one_vector(&tc, &pe_test_vectors[i]);
961 : }
962 :
963 1 : PQfinish(tc.conn);
964 :
965 1 : printf("# %d failures\n", tc.failure_count);
966 1 : printf("1..%d\n", tc.test_count);
967 1 : return tc.failure_count > 0;
968 : }
|