Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * test_integerset.c
4 : * Test integer set data structure.
5 : *
6 : * Copyright (c) 2019-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/test/modules/test_integerset/test_integerset.c
10 : *
11 : * -------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "common/pg_prng.h"
16 : #include "fmgr.h"
17 : #include "lib/integerset.h"
18 : #include "miscadmin.h"
19 : #include "nodes/bitmapset.h"
20 : #include "storage/block.h"
21 : #include "storage/itemptr.h"
22 : #include "utils/memutils.h"
23 : #include "utils/timestamp.h"
24 :
25 : /*
26 : * If you enable this, the "pattern" tests will print information about
27 : * how long populating, probing, and iterating the test set takes, and
28 : * how much memory the test set consumed. That can be used as
29 : * micro-benchmark of various operations and input patterns (you might
30 : * want to increase the number of values used in each of the test, if
31 : * you do that, to reduce noise).
32 : *
33 : * The information is printed to the server's stderr, mostly because
34 : * that's where MemoryContextStats() output goes.
35 : */
36 : static const bool intset_test_stats = false;
37 :
38 2 : PG_MODULE_MAGIC;
39 :
40 4 : PG_FUNCTION_INFO_V1(test_integerset);
41 :
42 : /*
43 : * A struct to define a pattern of integers, for use with the test_pattern()
44 : * function.
45 : */
46 : typedef struct
47 : {
48 : char *test_name; /* short name of the test, for humans */
49 : char *pattern_str; /* a bit pattern */
50 : uint64 spacing; /* pattern repeats at this interval */
51 : uint64 num_values; /* number of integers to set in total */
52 : } test_spec;
53 :
54 : static const test_spec test_specs[] = {
55 : {
56 : "all ones", "1111111111",
57 : 10, 10000000
58 : },
59 : {
60 : "alternating bits", "0101010101",
61 : 10, 10000000
62 : },
63 : {
64 : "clusters of ten", "1111111111",
65 : 10000, 10000000
66 : },
67 : {
68 : "clusters of hundred",
69 : "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
70 : 10000, 100000000
71 : },
72 : {
73 : "one-every-64k", "1",
74 : 65536, 10000000
75 : },
76 : {
77 : "sparse", "100000000000000000000000000000001",
78 : 10000000, 10000000
79 : },
80 : {
81 : "single values, distance > 2^32", "1",
82 : UINT64CONST(10000000000), 1000000
83 : },
84 : {
85 : "clusters, distance > 2^32", "10101010",
86 : UINT64CONST(10000000000), 10000000
87 : },
88 : {
89 : "clusters, distance > 2^60", "10101010",
90 : UINT64CONST(2000000000000000000),
91 : 23 /* can't be much higher than this, or we
92 : * overflow uint64 */
93 : }
94 : };
95 :
96 : static void test_pattern(const test_spec *spec);
97 : static void test_empty(void);
98 : static void test_single_value(uint64 value);
99 : static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
100 : static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
101 : static void test_huge_distances(void);
102 :
103 : /*
104 : * SQL-callable entry point to perform all tests.
105 : */
106 : Datum
107 2 : test_integerset(PG_FUNCTION_ARGS)
108 : {
109 : /* Tests for various corner cases */
110 2 : test_empty();
111 2 : test_huge_distances();
112 2 : test_single_value(0);
113 2 : test_single_value(1);
114 2 : test_single_value(PG_UINT64_MAX - 1);
115 2 : test_single_value(PG_UINT64_MAX);
116 2 : test_single_value_and_filler(0, 1000, 2000);
117 2 : test_single_value_and_filler(1, 1000, 2000);
118 2 : test_single_value_and_filler(1, 1000, 2000000);
119 2 : test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
120 2 : test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
121 :
122 : /* Test different test patterns, with lots of entries */
123 20 : for (int i = 0; i < lengthof(test_specs); i++)
124 : {
125 18 : test_pattern(&test_specs[i]);
126 : }
127 :
128 2 : PG_RETURN_VOID();
129 : }
130 :
131 : /*
132 : * Test with a repeating pattern, defined by the 'spec'.
133 : */
134 : static void
135 18 : test_pattern(const test_spec *spec)
136 : {
137 : IntegerSet *intset;
138 : MemoryContext intset_ctx;
139 : MemoryContext old_ctx;
140 : TimestampTz starttime;
141 : TimestampTz endtime;
142 : uint64 n;
143 : uint64 last_int;
144 : int patternlen;
145 : uint64 *pattern_values;
146 : uint64 pattern_num_values;
147 :
148 18 : elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
149 18 : if (intset_test_stats)
150 0 : fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
151 :
152 : /* Pre-process the pattern, creating an array of integers from it. */
153 18 : patternlen = strlen(spec->pattern_str);
154 18 : pattern_values = palloc(patternlen * sizeof(uint64));
155 18 : pattern_num_values = 0;
156 380 : for (int i = 0; i < patternlen; i++)
157 : {
158 362 : if (spec->pattern_str[i] == '1')
159 274 : pattern_values[pattern_num_values++] = i;
160 : }
161 :
162 : /*
163 : * Allocate the integer set.
164 : *
165 : * Allocate it in a separate memory context, so that we can print its
166 : * memory usage easily. (intset_create() creates a memory context of its
167 : * own, too, but we don't have direct access to it, so we cannot call
168 : * MemoryContextStats() on it directly).
169 : */
170 18 : intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
171 : "intset test",
172 : ALLOCSET_SMALL_SIZES);
173 18 : MemoryContextSetIdentifier(intset_ctx, spec->test_name);
174 18 : old_ctx = MemoryContextSwitchTo(intset_ctx);
175 18 : intset = intset_create();
176 18 : MemoryContextSwitchTo(old_ctx);
177 :
178 : /*
179 : * Add values to the set.
180 : */
181 18 : starttime = GetCurrentTimestamp();
182 :
183 18 : n = 0;
184 18 : last_int = 0;
185 47000030 : while (n < spec->num_values)
186 : {
187 47000012 : uint64 x = 0;
188 :
189 369000058 : for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
190 : {
191 322000046 : x = last_int + pattern_values[i];
192 :
193 322000046 : intset_add_member(intset, x);
194 322000046 : n++;
195 : }
196 47000012 : last_int += spec->spacing;
197 : }
198 :
199 18 : endtime = GetCurrentTimestamp();
200 :
201 18 : if (intset_test_stats)
202 0 : fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
203 0 : spec->num_values, (int) (endtime - starttime) / 1000);
204 :
205 : /*
206 : * Print stats on the amount of memory used.
207 : *
208 : * We print the usage reported by intset_memory_usage(), as well as the
209 : * stats from the memory context. They should be in the same ballpark,
210 : * but it's hard to automate testing that, so if you're making changes to
211 : * the implementation, just observe that manually.
212 : */
213 18 : if (intset_test_stats)
214 : {
215 : uint64 mem_usage;
216 :
217 : /*
218 : * Also print memory usage as reported by intset_memory_usage(). It
219 : * should be in the same ballpark as the usage reported by
220 : * MemoryContextStats().
221 : */
222 0 : mem_usage = intset_memory_usage(intset);
223 0 : fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
224 0 : mem_usage, (double) mem_usage / spec->num_values);
225 :
226 0 : MemoryContextStats(intset_ctx);
227 : }
228 :
229 : /* Check that intset_get_num_entries works */
230 18 : n = intset_num_entries(intset);
231 18 : if (n != spec->num_values)
232 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
233 :
234 : /*
235 : * Test random-access probes with intset_is_member()
236 : */
237 18 : starttime = GetCurrentTimestamp();
238 :
239 1800018 : for (n = 0; n < 100000; n++)
240 : {
241 : bool b;
242 : bool expected;
243 : uint64 x;
244 :
245 : /*
246 : * Pick next value to probe at random. We limit the probes to the
247 : * last integer that we added to the set, plus an arbitrary constant
248 : * (1000). There's no point in probing the whole 0 - 2^64 range, if
249 : * only a small part of the integer space is used. We would very
250 : * rarely hit values that are actually in the set.
251 : */
252 1800000 : x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
253 :
254 : /* Do we expect this value to be present in the set? */
255 1800000 : if (x >= last_int)
256 22 : expected = false;
257 : else
258 : {
259 1799978 : uint64 idx = x % spec->spacing;
260 :
261 1799978 : if (idx >= patternlen)
262 1397842 : expected = false;
263 402136 : else if (spec->pattern_str[idx] == '1')
264 301798 : expected = true;
265 : else
266 100338 : expected = false;
267 : }
268 :
269 : /* Is it present according to intset_is_member() ? */
270 1800000 : b = intset_is_member(intset, x);
271 :
272 1800000 : if (b != expected)
273 0 : elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
274 : }
275 18 : endtime = GetCurrentTimestamp();
276 18 : if (intset_test_stats)
277 0 : fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
278 0 : n, (int) (endtime - starttime) / 1000);
279 :
280 : /*
281 : * Test iterator
282 : */
283 18 : starttime = GetCurrentTimestamp();
284 :
285 18 : intset_begin_iterate(intset);
286 18 : n = 0;
287 18 : last_int = 0;
288 47000030 : while (n < spec->num_values)
289 : {
290 369000058 : for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
291 : {
292 322000046 : uint64 expected = last_int + pattern_values[i];
293 : uint64 x;
294 :
295 322000046 : if (!intset_iterate_next(intset, &x))
296 0 : break;
297 :
298 322000046 : if (x != expected)
299 0 : elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
300 322000046 : n++;
301 : }
302 47000012 : last_int += spec->spacing;
303 : }
304 18 : endtime = GetCurrentTimestamp();
305 18 : if (intset_test_stats)
306 0 : fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
307 0 : n, (int) (endtime - starttime) / 1000);
308 :
309 18 : if (n < spec->num_values)
310 0 : elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
311 18 : if (n > spec->num_values)
312 0 : elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
313 :
314 18 : MemoryContextDelete(intset_ctx);
315 18 : }
316 :
317 : /*
318 : * Test with a set containing a single integer.
319 : */
320 : static void
321 8 : test_single_value(uint64 value)
322 : {
323 : IntegerSet *intset;
324 : uint64 x;
325 : uint64 num_entries;
326 : bool found;
327 :
328 8 : elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
329 :
330 : /* Create the set. */
331 8 : intset = intset_create();
332 8 : intset_add_member(intset, value);
333 :
334 : /* Test intset_get_num_entries() */
335 8 : num_entries = intset_num_entries(intset);
336 8 : if (num_entries != 1)
337 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
338 :
339 : /*
340 : * Test intset_is_member() at various special values, like 0 and maximum
341 : * possible 64-bit integer, as well as the value itself.
342 : */
343 8 : if (intset_is_member(intset, 0) != (value == 0))
344 0 : elog(ERROR, "intset_is_member failed for 0");
345 8 : if (intset_is_member(intset, 1) != (value == 1))
346 0 : elog(ERROR, "intset_is_member failed for 1");
347 8 : if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
348 0 : elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
349 8 : if (intset_is_member(intset, value) != true)
350 0 : elog(ERROR, "intset_is_member failed for the tested value");
351 :
352 : /*
353 : * Test iterator
354 : */
355 8 : intset_begin_iterate(intset);
356 8 : found = intset_iterate_next(intset, &x);
357 8 : if (!found || x != value)
358 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
359 :
360 8 : found = intset_iterate_next(intset, &x);
361 8 : if (found)
362 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
363 8 : }
364 :
365 : /*
366 : * Test with an integer set that contains:
367 : *
368 : * - a given single 'value', and
369 : * - all integers between 'filler_min' and 'filler_max'.
370 : *
371 : * This exercises different codepaths than testing just with a single value,
372 : * because the implementation buffers newly-added values. If we add just a
373 : * single value to the set, we won't test the internal B-tree code at all,
374 : * just the code that deals with the buffer.
375 : */
376 : static void
377 10 : test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
378 : {
379 : IntegerSet *intset;
380 : uint64 x;
381 : bool found;
382 : uint64 *iter_expected;
383 10 : uint64 n = 0;
384 10 : uint64 num_entries = 0;
385 : uint64 mem_usage;
386 :
387 10 : elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
388 : value, filler_min, filler_max);
389 :
390 10 : intset = intset_create();
391 :
392 10 : iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
393 10 : if (value < filler_min)
394 : {
395 6 : intset_add_member(intset, value);
396 6 : iter_expected[n++] = value;
397 : }
398 :
399 4006010 : for (x = filler_min; x < filler_max; x++)
400 : {
401 4006000 : intset_add_member(intset, x);
402 4006000 : iter_expected[n++] = x;
403 : }
404 :
405 10 : if (value >= filler_max)
406 : {
407 4 : intset_add_member(intset, value);
408 4 : iter_expected[n++] = value;
409 : }
410 :
411 : /* Test intset_get_num_entries() */
412 10 : num_entries = intset_num_entries(intset);
413 10 : if (num_entries != n)
414 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
415 :
416 : /*
417 : * Test intset_is_member() at various spots, at and around the values that
418 : * we expect to be set, as well as 0 and the maximum possible value.
419 : */
420 10 : check_with_filler(intset, 0,
421 : value, filler_min, filler_max);
422 10 : check_with_filler(intset, 1,
423 : value, filler_min, filler_max);
424 10 : check_with_filler(intset, filler_min - 1,
425 : value, filler_min, filler_max);
426 10 : check_with_filler(intset, filler_min,
427 : value, filler_min, filler_max);
428 10 : check_with_filler(intset, filler_min + 1,
429 : value, filler_min, filler_max);
430 10 : check_with_filler(intset, value - 1,
431 : value, filler_min, filler_max);
432 10 : check_with_filler(intset, value,
433 : value, filler_min, filler_max);
434 10 : check_with_filler(intset, value + 1,
435 : value, filler_min, filler_max);
436 10 : check_with_filler(intset, filler_max - 1,
437 : value, filler_min, filler_max);
438 10 : check_with_filler(intset, filler_max,
439 : value, filler_min, filler_max);
440 10 : check_with_filler(intset, filler_max + 1,
441 : value, filler_min, filler_max);
442 10 : check_with_filler(intset, PG_UINT64_MAX - 1,
443 : value, filler_min, filler_max);
444 10 : check_with_filler(intset, PG_UINT64_MAX,
445 : value, filler_min, filler_max);
446 :
447 10 : intset_begin_iterate(intset);
448 4006020 : for (uint64 i = 0; i < n; i++)
449 : {
450 4006010 : found = intset_iterate_next(intset, &x);
451 4006010 : if (!found || x != iter_expected[i])
452 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
453 : }
454 10 : found = intset_iterate_next(intset, &x);
455 10 : if (found)
456 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
457 :
458 10 : mem_usage = intset_memory_usage(intset);
459 10 : if (mem_usage < 5000 || mem_usage > 500000000)
460 0 : elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
461 10 : }
462 :
463 : /*
464 : * Helper function for test_single_value_and_filler.
465 : *
466 : * Calls intset_is_member() for value 'x', and checks that the result is what
467 : * we expect.
468 : */
469 : static void
470 130 : check_with_filler(IntegerSet *intset, uint64 x,
471 : uint64 value, uint64 filler_min, uint64 filler_max)
472 : {
473 : bool expected;
474 : bool actual;
475 :
476 130 : expected = (x == value || (filler_min <= x && x < filler_max));
477 :
478 130 : actual = intset_is_member(intset, x);
479 :
480 130 : if (actual != expected)
481 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
482 130 : }
483 :
484 : /*
485 : * Test empty set
486 : */
487 : static void
488 2 : test_empty(void)
489 : {
490 : IntegerSet *intset;
491 : uint64 x;
492 :
493 2 : elog(NOTICE, "testing intset with empty set");
494 :
495 2 : intset = intset_create();
496 :
497 : /* Test intset_is_member() */
498 2 : if (intset_is_member(intset, 0) != false)
499 0 : elog(ERROR, "intset_is_member on empty set returned true");
500 2 : if (intset_is_member(intset, 1) != false)
501 0 : elog(ERROR, "intset_is_member on empty set returned true");
502 2 : if (intset_is_member(intset, PG_UINT64_MAX) != false)
503 0 : elog(ERROR, "intset_is_member on empty set returned true");
504 :
505 : /* Test iterator */
506 2 : intset_begin_iterate(intset);
507 2 : if (intset_iterate_next(intset, &x))
508 0 : elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
509 2 : }
510 :
511 : /*
512 : * Test with integers that are more than 2^60 apart.
513 : *
514 : * The Simple-8b encoding used by the set implementation can only encode
515 : * values up to 2^60. That makes large differences like this interesting
516 : * to test.
517 : */
518 : static void
519 2 : test_huge_distances(void)
520 : {
521 : IntegerSet *intset;
522 : uint64 values[1000];
523 2 : int num_values = 0;
524 2 : uint64 val = 0;
525 : bool found;
526 : uint64 x;
527 :
528 2 : elog(NOTICE, "testing intset with distances > 2^60 between values");
529 :
530 2 : val = 0;
531 2 : values[num_values++] = val;
532 :
533 : /* Test differences on both sides of the 2^60 boundary. */
534 2 : val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
535 2 : values[num_values++] = val;
536 :
537 2 : val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
538 2 : values[num_values++] = val;
539 :
540 2 : val += UINT64CONST(1152921504606846976); /* 2^60 */
541 2 : values[num_values++] = val;
542 :
543 2 : val += UINT64CONST(1152921504606846976); /* 2^60 */
544 2 : values[num_values++] = val;
545 :
546 2 : val += UINT64CONST(1152921504606846976); /* 2^60 */
547 2 : values[num_values++] = val;
548 :
549 2 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
550 2 : values[num_values++] = val;
551 :
552 2 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
553 2 : values[num_values++] = val;
554 :
555 2 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
556 2 : values[num_values++] = val;
557 :
558 2 : val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
559 2 : values[num_values++] = val;
560 :
561 2 : val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
562 2 : values[num_values++] = val;
563 :
564 2 : val += UINT64CONST(1152921504606846976); /* 2^60 */
565 2 : values[num_values++] = val;
566 :
567 : /*
568 : * We're now very close to 2^64, so can't add large values anymore. But
569 : * add more smaller values to the end, to make sure that all the above
570 : * values get flushed and packed into the tree structure.
571 : */
572 1978 : while (num_values < 1000)
573 : {
574 1976 : val += pg_prng_uint32(&pg_global_prng_state);
575 1976 : values[num_values++] = val;
576 : }
577 :
578 : /* Create an IntegerSet using these values */
579 2 : intset = intset_create();
580 2002 : for (int i = 0; i < num_values; i++)
581 2000 : intset_add_member(intset, values[i]);
582 :
583 : /*
584 : * Test intset_is_member() around each of these values
585 : */
586 2002 : for (int i = 0; i < num_values; i++)
587 : {
588 2000 : uint64 y = values[i];
589 : bool expected;
590 : bool result;
591 :
592 2000 : if (y > 0)
593 : {
594 1998 : expected = (values[i - 1] == y - 1);
595 1998 : result = intset_is_member(intset, y - 1);
596 1998 : if (result != expected)
597 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y - 1);
598 : }
599 :
600 2000 : result = intset_is_member(intset, y);
601 2000 : if (result != true)
602 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y);
603 :
604 2000 : expected = (i != num_values - 1) ? (values[i + 1] == y + 1) : false;
605 2000 : result = intset_is_member(intset, y + 1);
606 2000 : if (result != expected)
607 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y + 1);
608 : }
609 :
610 : /*
611 : * Test iterator
612 : */
613 2 : intset_begin_iterate(intset);
614 2002 : for (int i = 0; i < num_values; i++)
615 : {
616 2000 : found = intset_iterate_next(intset, &x);
617 2000 : if (!found || x != values[i])
618 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
619 : }
620 2 : found = intset_iterate_next(intset, &x);
621 2 : if (found)
622 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
623 2 : }
|