Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * test_bloomfilter.c
4 : * Test false positive rate of Bloom filter.
5 : *
6 : * Copyright (c) 2018-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/test/modules/test_bloomfilter/test_bloomfilter.c
10 : *
11 : * -------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "common/pg_prng.h"
16 : #include "fmgr.h"
17 : #include "lib/bloomfilter.h"
18 : #include "miscadmin.h"
19 :
20 2 : PG_MODULE_MAGIC;
21 :
22 : /* Fits decimal representation of PG_INT64_MIN + 2 bytes: */
23 : #define MAX_ELEMENT_BYTES 21
24 : /* False positive rate WARNING threshold (1%): */
25 : #define FPOSITIVE_THRESHOLD 0.01
26 :
27 :
28 : /*
29 : * Populate an empty Bloom filter with "nelements" dummy strings.
30 : */
31 : static void
32 2 : populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
33 : {
34 : char element[MAX_ELEMENT_BYTES];
35 : int64 i;
36 :
37 1677724 : for (i = 0; i < nelements; i++)
38 : {
39 1677722 : CHECK_FOR_INTERRUPTS();
40 :
41 1677722 : snprintf(element, sizeof(element), "i" INT64_FORMAT, i);
42 1677722 : bloom_add_element(filter, (unsigned char *) element, strlen(element));
43 : }
44 2 : }
45 :
46 : /*
47 : * Returns number of strings that are indicated as probably appearing in Bloom
48 : * filter that were in fact never added by populate_with_dummy_strings().
49 : * These are false positives.
50 : */
51 : static int64
52 2 : nfalsepos_for_missing_strings(bloom_filter *filter, int64 nelements)
53 : {
54 : char element[MAX_ELEMENT_BYTES];
55 2 : int64 nfalsepos = 0;
56 : int64 i;
57 :
58 1677724 : for (i = 0; i < nelements; i++)
59 : {
60 1677722 : CHECK_FOR_INTERRUPTS();
61 :
62 1677722 : snprintf(element, sizeof(element), "M" INT64_FORMAT, i);
63 1677722 : if (!bloom_lacks_element(filter, (unsigned char *) element,
64 : strlen(element)))
65 13774 : nfalsepos++;
66 : }
67 :
68 2 : return nfalsepos;
69 : }
70 :
71 : static void
72 2 : create_and_test_bloom(int power, int64 nelements, int callerseed)
73 : {
74 : int bloom_work_mem;
75 : uint64 seed;
76 : int64 nfalsepos;
77 : bloom_filter *filter;
78 :
79 2 : bloom_work_mem = (1L << power) / 8L / 1024L;
80 :
81 2 : elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);
82 :
83 : /*
84 : * Generate random seed, or use caller's. Seed should always be a
85 : * positive value less than or equal to PG_INT32_MAX, to ensure that any
86 : * random seed can be recreated through callerseed if the need arises.
87 : */
88 2 : seed = callerseed < 0 ? pg_prng_int32p(&pg_global_prng_state) : callerseed;
89 :
90 : /* Create Bloom filter, populate it, and report on false positive rate */
91 2 : filter = bloom_create(nelements, bloom_work_mem, seed);
92 2 : populate_with_dummy_strings(filter, nelements);
93 2 : nfalsepos = nfalsepos_for_missing_strings(filter, nelements);
94 :
95 2 : ereport((nfalsepos > nelements * FPOSITIVE_THRESHOLD) ? WARNING : DEBUG1,
96 : (errmsg_internal("seed: " UINT64_FORMAT " false positives: " INT64_FORMAT " (%.6f%%) bitset %.2f%% set",
97 : seed, nfalsepos, (double) nfalsepos / nelements,
98 : 100.0 * bloom_prop_bits_set(filter))));
99 :
100 2 : bloom_free(filter);
101 2 : }
102 :
103 4 : PG_FUNCTION_INFO_V1(test_bloomfilter);
104 :
105 : /*
106 : * SQL-callable entry point to perform all tests.
107 : *
108 : * If a 1% false positive threshold is not met, emits WARNINGs.
109 : *
110 : * See README for details of arguments.
111 : */
112 : Datum
113 2 : test_bloomfilter(PG_FUNCTION_ARGS)
114 : {
115 2 : int power = PG_GETARG_INT32(0);
116 2 : int64 nelements = PG_GETARG_INT64(1);
117 2 : int seed = PG_GETARG_INT32(2);
118 2 : int tests = PG_GETARG_INT32(3);
119 : int i;
120 :
121 2 : if (power < 23 || power > 32)
122 0 : elog(ERROR, "power argument must be between 23 and 32 inclusive");
123 :
124 2 : if (tests <= 0)
125 0 : elog(ERROR, "invalid number of tests: %d", tests);
126 :
127 2 : if (nelements < 0)
128 0 : elog(ERROR, "invalid number of elements: %d", tests);
129 :
130 4 : for (i = 0; i < tests; i++)
131 : {
132 2 : elog(DEBUG1, "beginning test #%d...", i + 1);
133 :
134 2 : create_and_test_bloom(power, nelements, seed);
135 : }
136 :
137 2 : PG_RETURN_VOID();
138 : }
|