Line data Source code
1 : /*
2 : * brin_bloom.c
3 : * Implementation of Bloom opclass for BRIN
4 : *
5 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : *
9 : * A BRIN opclass summarizing page range into a bloom filter.
10 : *
11 : * Bloom filters allow efficient testing whether a given page range contains
12 : * a particular value. Therefore, if we summarize each page range into a small
13 : * bloom filter, we can easily (and cheaply) test whether it contains values
14 : * we get later.
15 : *
16 : * The index only supports equality operators, similarly to hash indexes.
17 : * Bloom indexes are however much smaller, and support only bitmap scans.
18 : *
19 : * Note: Don't confuse this with bloom indexes, implemented in a contrib
20 : * module. That extension implements an entirely new AM, building a bloom
21 : * filter on multiple columns in a single row. This opclass works with an
22 : * existing AM (BRIN) and builds bloom filter on a column.
23 : *
24 : *
25 : * values vs. hashes
26 : * -----------------
27 : *
28 : * The original column values are not used directly, but are first hashed
29 : * using the regular type-specific hash function, producing a uint32 hash.
30 : * And this hash value is then added to the summary - i.e. it's hashed
31 : * again and added to the bloom filter.
32 : *
33 : * This allows the code to treat all data types (byval/byref/...) the same
34 : * way, with only minimal space requirements, because we're working with
35 : * hashes and not the original values. Everything is uint32.
36 : *
37 : * Of course, this assumes the built-in hash function is reasonably good,
38 : * without too many collisions etc. But that does seem to be the case, at
39 : * least based on past experience. After all, the same hash functions are
40 : * used for hash indexes, hash partitioning and so on.
41 : *
42 : *
43 : * hashing scheme
44 : * --------------
45 : *
46 : * Bloom filters require a number of independent hash functions. There are
47 : * different schemes how to construct them - for example we might use
48 : * hash_uint32_extended with random seeds, but that seems fairly expensive.
49 : * We use a scheme requiring only two functions described in this paper:
50 : *
51 : * Less Hashing, Same Performance:Building a Better Bloom Filter
52 : * Adam Kirsch, Michael Mitzenmacher, Harvard School of Engineering and
53 : * Applied Sciences, Cambridge, Massachusetts [DOI 10.1002/rsa.20208]
54 : *
55 : * The two hash functions h1 and h2 are calculated using hard-coded seeds,
56 : * and then combined using (h1 + i * h2) to generate the hash functions.
57 : *
58 : *
59 : * sizing the bloom filter
60 : * -----------------------
61 : *
62 : * Size of a bloom filter depends on the number of distinct values we will
63 : * store in it, and the desired false positive rate. The higher the number
64 : * of distinct values and/or the lower the false positive rate, the larger
65 : * the bloom filter. On the other hand, we want to keep the index as small
66 : * as possible - that's one of the basic advantages of BRIN indexes.
67 : *
68 : * Although the number of distinct elements (in a page range) depends on
69 : * the data, we can consider it fixed. This simplifies the trade-off to
70 : * just false positive rate vs. size.
71 : *
72 : * At the page range level, false positive rate is a probability the bloom
73 : * filter matches a random value. For the whole index (with sufficiently
74 : * many page ranges) it represents the fraction of the index ranges (and
75 : * thus fraction of the table to be scanned) matching the random value.
76 : *
77 : * Furthermore, the size of the bloom filter is subject to implementation
78 : * limits - it has to fit onto a single index page (8kB by default). As
79 : * the bitmap is inherently random (when "full" about half the bits is set
80 : * to 1, randomly), compression can't help very much.
81 : *
82 : * To reduce the size of a filter (to fit to a page), we have to either
83 : * accept higher false positive rate (undesirable), or reduce the number
84 : * of distinct items to be stored in the filter. We can't alter the input
85 : * data, of course, but we may make the BRIN page ranges smaller - instead
86 : * of the default 128 pages (1MB) we may build index with 16-page ranges,
87 : * or something like that. This should reduce the number of distinct values
88 : * in the page range, making the filter smaller (with fixed false positive
89 : * rate). Even for random data sets this should help, as the number of rows
90 : * per heap page is limited (to ~290 with very narrow tables, likely ~20
91 : * in practice).
92 : *
93 : * Of course, good sizing decisions depend on having the necessary data,
94 : * i.e. number of distinct values in a page range (of a given size) and
95 : * table size (to estimate cost change due to change in false positive
96 : * rate due to having larger index vs. scanning larger indexes). We may
97 : * not have that data - for example when building an index on empty table
98 : * it's not really possible. And for some data we only have estimates for
99 : * the whole table and we can only estimate per-range values (ndistinct).
100 : *
101 : * Another challenge is that while the bloom filter is per-column, it's
102 : * the whole index tuple that has to fit into a page. And for multi-column
103 : * indexes that may include pieces we have no control over (not necessarily
104 : * bloom filters, the other columns may use other BRIN opclasses). So it's
105 : * not entirely clear how to distribute the space between those columns.
106 : *
107 : * The current logic, implemented in brin_bloom_get_ndistinct, attempts to
108 : * make some basic sizing decisions, based on the size of BRIN ranges, and
109 : * the maximum number of rows per range.
110 : *
111 : *
112 : * IDENTIFICATION
113 : * src/backend/access/brin/brin_bloom.c
114 : */
115 : #include "postgres.h"
116 :
117 : #include <limits.h>
118 : #include <math.h>
119 :
120 : #include "access/brin.h"
121 : #include "access/brin_internal.h"
122 : #include "access/brin_page.h"
123 : #include "access/brin_tuple.h"
124 : #include "access/genam.h"
125 : #include "access/htup_details.h"
126 : #include "access/reloptions.h"
127 : #include "catalog/pg_am.h"
128 : #include "catalog/pg_type.h"
129 : #include "common/hashfn.h"
130 : #include "port/pg_bitutils.h"
131 : #include "utils/fmgrprotos.h"
132 : #include "utils/rel.h"
133 :
134 : #define BloomEqualStrategyNumber 1
135 :
136 : /*
137 : * Additional SQL level support functions. We only have one, which is
138 : * used to calculate hash of the input value.
139 : *
140 : * Procedure numbers must not use values reserved for BRIN itself; see
141 : * brin_internal.h.
142 : */
143 : #define BLOOM_MAX_PROCNUMS 1 /* maximum support procs we need */
144 : #define PROCNUM_HASH 11 /* required */
145 :
146 : /*
147 : * Subtract this from procnum to obtain index in BloomOpaque arrays
148 : * (Must be equal to minimum of private procnums).
149 : */
150 : #define PROCNUM_BASE 11
151 :
152 : /*
153 : * Storage type for BRIN's reloptions.
154 : */
155 : typedef struct BloomOptions
156 : {
157 : int32 vl_len_; /* varlena header (do not touch directly!) */
158 : double nDistinctPerRange; /* number of distinct values per range */
159 : double falsePositiveRate; /* false positive for bloom filter */
160 : } BloomOptions;
161 :
162 : /*
163 : * The current min value (16) is somewhat arbitrary, but it's based
164 : * on the fact that the filter header is ~20B alone, which is about
165 : * the same as the filter bitmap for 16 distinct items with 1% false
166 : * positive rate. So by allowing lower values we'd not gain much. In
167 : * any case, the min should not be larger than MaxHeapTuplesPerPage
168 : * (~290), which is the theoretical maximum for single-page ranges.
169 : */
170 : #define BLOOM_MIN_NDISTINCT_PER_RANGE 16
171 :
172 : /*
173 : * Used to determine number of distinct items, based on the number of rows
174 : * in a page range. The 10% is somewhat similar to what estimate_num_groups
175 : * does, so we use the same factor here.
176 : */
177 : #define BLOOM_DEFAULT_NDISTINCT_PER_RANGE -0.1 /* 10% of values */
178 :
179 : /*
180 : * Allowed range and default value for the false positive range. The exact
181 : * values are somewhat arbitrary, but were chosen considering the various
182 : * parameters (size of filter vs. page size, etc.).
183 : *
184 : * The lower the false-positive rate, the more accurate the filter is, but
185 : * it also gets larger - at some point this eliminates the main advantage
186 : * of BRIN indexes, which is the tiny size. At 0.01% the index is about
187 : * 10% of the table (assuming 290 distinct values per 8kB page).
188 : *
189 : * On the other hand, as the false-positive rate increases, larger part of
190 : * the table has to be scanned due to mismatches - at 25% we're probably
191 : * close to sequential scan being cheaper.
192 : */
193 : #define BLOOM_MIN_FALSE_POSITIVE_RATE 0.0001 /* 0.01% fp rate */
194 : #define BLOOM_MAX_FALSE_POSITIVE_RATE 0.25 /* 25% fp rate */
195 : #define BLOOM_DEFAULT_FALSE_POSITIVE_RATE 0.01 /* 1% fp rate */
196 :
197 : #define BloomGetNDistinctPerRange(opts) \
198 : ((opts) && (((BloomOptions *) (opts))->nDistinctPerRange != 0) ? \
199 : (((BloomOptions *) (opts))->nDistinctPerRange) : \
200 : BLOOM_DEFAULT_NDISTINCT_PER_RANGE)
201 :
202 : #define BloomGetFalsePositiveRate(opts) \
203 : ((opts) && (((BloomOptions *) (opts))->falsePositiveRate != 0.0) ? \
204 : (((BloomOptions *) (opts))->falsePositiveRate) : \
205 : BLOOM_DEFAULT_FALSE_POSITIVE_RATE)
206 :
207 : /*
208 : * And estimate of the largest bloom we can fit onto a page. This is not
209 : * a perfect guarantee, for a couple of reasons. For example, the row may
210 : * be larger because the index has multiple columns.
211 : */
212 : #define BloomMaxFilterSize \
213 : MAXALIGN_DOWN(BLCKSZ - \
214 : (MAXALIGN(SizeOfPageHeaderData + \
215 : sizeof(ItemIdData)) + \
216 : MAXALIGN(sizeof(BrinSpecialSpace)) + \
217 : SizeOfBrinTuple))
218 :
219 : /*
220 : * Seeds used to calculate two hash functions h1 and h2, which are then used
221 : * to generate k hashes using the (h1 + i * h2) scheme.
222 : */
223 : #define BLOOM_SEED_1 0x71d924af
224 : #define BLOOM_SEED_2 0xba48b314
225 :
226 : /*
227 : * Bloom Filter
228 : *
229 : * Represents a bloom filter, built on hashes of the indexed values. That is,
230 : * we compute a uint32 hash of the value, and then store this hash into the
231 : * bloom filter (and compute additional hashes on it).
232 : *
233 : * XXX We could implement "sparse" bloom filters, keeping only the bytes that
234 : * are not entirely 0. But while indexes don't support TOAST, the varlena can
235 : * still be compressed. So this seems unnecessary, because the compression
236 : * should do the same job.
237 : *
238 : * XXX We can also watch the number of bits set in the bloom filter, and then
239 : * stop using it (and not store the bitmap, to save space) when the false
240 : * positive rate gets too high. But even if the false positive rate exceeds the
241 : * desired value, it still can eliminate some page ranges.
242 : */
243 : typedef struct BloomFilter
244 : {
245 : /* varlena header (do not touch directly!) */
246 : int32 vl_len_;
247 :
248 : /* space for various flags (unused for now) */
249 : uint16 flags;
250 :
251 : /* fields for the HASHED phase */
252 : uint8 nhashes; /* number of hash functions */
253 : uint32 nbits; /* number of bits in the bitmap (size) */
254 : uint32 nbits_set; /* number of bits set to 1 */
255 :
256 : /* data of the bloom filter */
257 : char data[FLEXIBLE_ARRAY_MEMBER];
258 : } BloomFilter;
259 :
260 : /*
261 : * bloom_filter_size
262 : * Calculate Bloom filter parameters (nbits, nbytes, nhashes).
263 : *
264 : * Given expected number of distinct values and desired false positive rate,
265 : * calculates the optimal parameters of the Bloom filter.
266 : *
267 : * The resulting parameters are returned through nbytesp (number of bytes),
268 : * nbitsp (number of bits) and nhashesp (number of hash functions). If a
269 : * pointer is NULL, the parameter is not returned.
270 : */
271 : static void
272 7932 : bloom_filter_size(int ndistinct, double false_positive_rate,
273 : int *nbytesp, int *nbitsp, int *nhashesp)
274 : {
275 : double k;
276 : int nbits,
277 : nbytes;
278 :
279 : /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
280 7932 : nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
281 :
282 : /* round m to whole bytes */
283 7932 : nbytes = ((nbits + 7) / 8);
284 7932 : nbits = nbytes * 8;
285 :
286 : /*
287 : * round(log(2.0) * m / ndistinct), but assume round() may not be
288 : * available on Windows
289 : */
290 7932 : k = log(2.0) * nbits / ndistinct;
291 7932 : k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
292 :
293 7932 : if (nbytesp)
294 7932 : *nbytesp = nbytes;
295 :
296 7932 : if (nbitsp)
297 7932 : *nbitsp = nbits;
298 :
299 7932 : if (nhashesp)
300 7932 : *nhashesp = (int) k;
301 7932 : }
302 :
303 : /*
304 : * bloom_init
305 : * Initialize the Bloom Filter, allocate all the memory.
306 : *
307 : * The filter is initialized with optimal size for ndistinct expected values
308 : * and the requested false positive rate. The filter is stored as varlena.
309 : */
310 : static BloomFilter *
311 7932 : bloom_init(int ndistinct, double false_positive_rate)
312 : {
313 : Size len;
314 : BloomFilter *filter;
315 :
316 : int nbits; /* size of filter / number of bits */
317 : int nbytes; /* size of filter / number of bytes */
318 : int nhashes; /* number of hash functions */
319 :
320 : Assert(ndistinct > 0);
321 : Assert(false_positive_rate > 0 && false_positive_rate < 1);
322 :
323 : /* calculate bloom filter size / parameters */
324 7932 : bloom_filter_size(ndistinct, false_positive_rate,
325 : &nbytes, &nbits, &nhashes);
326 :
327 : /*
328 : * Reject filters that are obviously too large to store on a page.
329 : *
330 : * Initially the bloom filter is just zeroes and so very compressible, but
331 : * as we add values it gets more and more random, and so less and less
332 : * compressible. So initially everything fits on the page, but we might
333 : * get surprising failures later - we want to prevent that, so we reject
334 : * bloom filter that are obviously too large.
335 : *
336 : * XXX It's not uncommon to oversize the bloom filter a bit, to defend
337 : * against unexpected data anomalies (parts of table with more distinct
338 : * values per range etc.). But we still need to make sure even the
339 : * oversized filter fits on page, if such need arises.
340 : *
341 : * XXX This check is not perfect, because the index may have multiple
342 : * filters that are small individually, but too large when combined.
343 : */
344 7932 : if (nbytes > BloomMaxFilterSize)
345 0 : elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
346 : BloomMaxFilterSize);
347 :
348 : /*
349 : * We allocate the whole filter. Most of it is going to be 0 bits, so the
350 : * varlena is easy to compress.
351 : */
352 7932 : len = offsetof(BloomFilter, data) + nbytes;
353 :
354 7932 : filter = (BloomFilter *) palloc0(len);
355 :
356 7932 : filter->flags = 0;
357 7932 : filter->nhashes = nhashes;
358 7932 : filter->nbits = nbits;
359 :
360 7932 : SET_VARSIZE(filter, len);
361 :
362 7932 : return filter;
363 : }
364 :
365 :
366 : /*
367 : * bloom_add_value
368 : * Add value to the bloom filter.
369 : */
370 : static BloomFilter *
371 68024 : bloom_add_value(BloomFilter *filter, uint32 value, bool *updated)
372 : {
373 : int i;
374 : uint64 h1,
375 : h2;
376 :
377 : /* compute the hashes, used for the bloom filter */
378 68024 : h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
379 68024 : h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
380 :
381 : /* compute the requested number of hashes */
382 544192 : for (i = 0; i < filter->nhashes; i++)
383 : {
384 : /* h1 + h2 + f(i) */
385 476168 : uint32 h = (h1 + i * h2) % filter->nbits;
386 476168 : uint32 byte = (h / 8);
387 476168 : uint32 bit = (h % 8);
388 :
389 : /* if the bit is not set, set it and remember we did that */
390 476168 : if (!(filter->data[byte] & (0x01 << bit)))
391 : {
392 227182 : filter->data[byte] |= (0x01 << bit);
393 227182 : filter->nbits_set++;
394 227182 : if (updated)
395 227182 : *updated = true;
396 : }
397 : }
398 :
399 68024 : return filter;
400 : }
401 :
402 :
403 : /*
404 : * bloom_contains_value
405 : * Check if the bloom filter contains a particular value.
406 : */
407 : static bool
408 8208 : bloom_contains_value(BloomFilter *filter, uint32 value)
409 : {
410 : int i;
411 : uint64 h1,
412 : h2;
413 :
414 : /* calculate the two hashes */
415 8208 : h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits;
416 8208 : h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits;
417 :
418 : /* compute the requested number of hashes */
419 10434 : for (i = 0; i < filter->nhashes; i++)
420 : {
421 : /* h1 + h2 + f(i) */
422 10164 : uint32 h = (h1 + i * h2) % filter->nbits;
423 10164 : uint32 byte = (h / 8);
424 10164 : uint32 bit = (h % 8);
425 :
426 : /* if the bit is not set, the value is not there */
427 10164 : if (!(filter->data[byte] & (0x01 << bit)))
428 7938 : return false;
429 : }
430 :
431 : /* all hashes found in bloom filter */
432 270 : return true;
433 : }
434 :
435 : typedef struct BloomOpaque
436 : {
437 : /*
438 : * XXX At this point we only need a single proc (to compute the hash), but
439 : * let's keep the array just like inclusion and minmax opclasses, for
440 : * consistency. We may need additional procs in the future.
441 : */
442 : FmgrInfo extra_procinfos[BLOOM_MAX_PROCNUMS];
443 : } BloomOpaque;
444 :
445 : static FmgrInfo *bloom_get_procinfo(BrinDesc *bdesc, uint16 attno,
446 : uint16 procnum);
447 :
448 :
449 : Datum
450 4812 : brin_bloom_opcinfo(PG_FUNCTION_ARGS)
451 : {
452 : BrinOpcInfo *result;
453 :
454 : /*
455 : * opaque->strategy_procinfos is initialized lazily; here it is set to
456 : * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
457 : *
458 : * bloom indexes only store the filter as a single BYTEA column
459 : */
460 :
461 4812 : result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
462 : sizeof(BloomOpaque));
463 4812 : result->oi_nstored = 1;
464 4812 : result->oi_regular_nulls = true;
465 4812 : result->oi_opaque = (BloomOpaque *)
466 4812 : MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
467 4812 : result->oi_typcache[0] = lookup_type_cache(PG_BRIN_BLOOM_SUMMARYOID, 0);
468 :
469 4812 : PG_RETURN_POINTER(result);
470 : }
471 :
472 : /*
473 : * brin_bloom_get_ndistinct
474 : * Determine the ndistinct value used to size bloom filter.
475 : *
476 : * Adjust the ndistinct value based on the pagesPerRange value. First,
477 : * if it's negative, it's assumed to be relative to maximum number of
478 : * tuples in the range (assuming each page gets MaxHeapTuplesPerPage
479 : * tuples, which is likely a significant over-estimate). We also clamp
480 : * the value, not to over-size the bloom filter unnecessarily.
481 : *
482 : * XXX We can only do this when the pagesPerRange value was supplied.
483 : * If it wasn't, it has to be a read-only access to the index, in which
484 : * case we don't really care. But perhaps we should fall-back to the
485 : * default pagesPerRange value?
486 : *
487 : * XXX We might also fetch info about ndistinct estimate for the column,
488 : * and compute the expected number of distinct values in a range. But
489 : * that may be tricky due to data being sorted in various ways, so it
490 : * seems better to rely on the upper estimate.
491 : *
492 : * XXX We might also calculate a better estimate of rows per BRIN range,
493 : * instead of using MaxHeapTuplesPerPage (which probably produces values
494 : * much higher than reality).
495 : */
496 : static int
497 7932 : brin_bloom_get_ndistinct(BrinDesc *bdesc, BloomOptions *opts)
498 : {
499 : double ndistinct;
500 : double maxtuples;
501 : BlockNumber pagesPerRange;
502 :
503 7932 : pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
504 7932 : ndistinct = BloomGetNDistinctPerRange(opts);
505 :
506 : Assert(BlockNumberIsValid(pagesPerRange));
507 :
508 7932 : maxtuples = MaxHeapTuplesPerPage * pagesPerRange;
509 :
510 : /*
511 : * Similarly to n_distinct, negative values are relative - in this case to
512 : * maximum number of tuples in the page range (maxtuples).
513 : */
514 7932 : if (ndistinct < 0)
515 7932 : ndistinct = (-ndistinct) * maxtuples;
516 :
517 : /*
518 : * Positive values are to be used directly, but we still apply a couple of
519 : * safeties to avoid using unreasonably small bloom filters.
520 : */
521 7932 : ndistinct = Max(ndistinct, BLOOM_MIN_NDISTINCT_PER_RANGE);
522 :
523 : /*
524 : * And don't use more than the maximum possible number of tuples, in the
525 : * range, which would be entirely wasteful.
526 : */
527 7932 : ndistinct = Min(ndistinct, maxtuples);
528 :
529 7932 : return (int) ndistinct;
530 : }
531 :
532 : /*
533 : * Examine the given index tuple (which contains partial status of a certain
534 : * page range) by comparing it to the given value that comes from another heap
535 : * tuple. If the new value is outside the bloom filter specified by the
536 : * existing tuple values, update the index tuple and return true. Otherwise,
537 : * return false and do not modify in this case.
538 : */
539 : Datum
540 68024 : brin_bloom_add_value(PG_FUNCTION_ARGS)
541 : {
542 68024 : BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
543 68024 : BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
544 68024 : Datum newval = PG_GETARG_DATUM(2);
545 68024 : bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3);
546 68024 : BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS();
547 68024 : Oid colloid = PG_GET_COLLATION();
548 : FmgrInfo *hashFn;
549 : uint32 hashValue;
550 68024 : bool updated = false;
551 : AttrNumber attno;
552 : BloomFilter *filter;
553 :
554 : Assert(!isnull);
555 :
556 68024 : attno = column->bv_attno;
557 :
558 : /*
559 : * If this is the first non-null value, we need to initialize the bloom
560 : * filter. Otherwise just extract the existing bloom filter from
561 : * BrinValues.
562 : */
563 68024 : if (column->bv_allnulls)
564 : {
565 15864 : filter = bloom_init(brin_bloom_get_ndistinct(bdesc, opts),
566 7932 : BloomGetFalsePositiveRate(opts));
567 7932 : column->bv_values[0] = PointerGetDatum(filter);
568 7932 : column->bv_allnulls = false;
569 7932 : updated = true;
570 : }
571 : else
572 60092 : filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
573 :
574 : /*
575 : * Compute the hash of the new value, using the supplied hash function,
576 : * and then add the hash value to the bloom filter.
577 : */
578 68024 : hashFn = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
579 :
580 68024 : hashValue = DatumGetUInt32(FunctionCall1Coll(hashFn, colloid, newval));
581 :
582 68024 : filter = bloom_add_value(filter, hashValue, &updated);
583 :
584 68024 : column->bv_values[0] = PointerGetDatum(filter);
585 :
586 68024 : PG_RETURN_BOOL(updated);
587 : }
588 :
589 : /*
590 : * Given an index tuple corresponding to a certain page range and a scan key,
591 : * return whether the scan key is consistent with the index tuple's bloom
592 : * filter. Return true if so, false otherwise.
593 : */
594 : Datum
595 8208 : brin_bloom_consistent(PG_FUNCTION_ARGS)
596 : {
597 8208 : BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
598 8208 : BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
599 8208 : ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2);
600 8208 : int nkeys = PG_GETARG_INT32(3);
601 8208 : Oid colloid = PG_GET_COLLATION();
602 : AttrNumber attno;
603 : Datum value;
604 : bool matches;
605 : FmgrInfo *finfo;
606 : uint32 hashValue;
607 : BloomFilter *filter;
608 : int keyno;
609 :
610 8208 : filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]);
611 :
612 : Assert(filter);
613 :
614 : /*
615 : * Assume all scan keys match. We'll be searching for a scan key
616 : * eliminating the page range (we can stop on the first such key).
617 : */
618 8208 : matches = true;
619 :
620 8478 : for (keyno = 0; keyno < nkeys; keyno++)
621 : {
622 8208 : ScanKey key = keys[keyno];
623 :
624 : /* NULL keys are handled and filtered-out in bringetbitmap */
625 : Assert(!(key->sk_flags & SK_ISNULL));
626 :
627 8208 : attno = key->sk_attno;
628 8208 : value = key->sk_argument;
629 :
630 8208 : switch (key->sk_strategy)
631 : {
632 8208 : case BloomEqualStrategyNumber:
633 :
634 : /*
635 : * We want to return the current page range if the bloom
636 : * filter seems to contain the value.
637 : */
638 8208 : finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH);
639 :
640 8208 : hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value));
641 8208 : matches &= bloom_contains_value(filter, hashValue);
642 :
643 8208 : break;
644 0 : default:
645 : /* shouldn't happen */
646 0 : elog(ERROR, "invalid strategy number %d", key->sk_strategy);
647 : matches = false;
648 : break;
649 : }
650 :
651 8208 : if (!matches)
652 7938 : break;
653 : }
654 :
655 8208 : PG_RETURN_BOOL(matches);
656 : }
657 :
658 : /*
659 : * Given two BrinValues, update the first of them as a union of the summary
660 : * values contained in both. The second one is untouched.
661 : *
662 : * XXX We assume the bloom filters have the same parameters for now. In the
663 : * future we should have 'can union' function, to decide if we can combine
664 : * two particular bloom filters.
665 : */
666 : Datum
667 2 : brin_bloom_union(PG_FUNCTION_ARGS)
668 : {
669 : int i;
670 : int nbytes;
671 2 : BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
672 2 : BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
673 : BloomFilter *filter_a;
674 : BloomFilter *filter_b;
675 :
676 : Assert(col_a->bv_attno == col_b->bv_attno);
677 : Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
678 :
679 2 : filter_a = (BloomFilter *) PG_DETOAST_DATUM(col_a->bv_values[0]);
680 2 : filter_b = (BloomFilter *) PG_DETOAST_DATUM(col_b->bv_values[0]);
681 :
682 : /* make sure the filters use the same parameters */
683 : Assert(filter_a && filter_b);
684 : Assert(filter_a->nbits == filter_b->nbits);
685 : Assert(filter_a->nhashes == filter_b->nhashes);
686 : Assert((filter_a->nbits > 0) && (filter_a->nbits % 8 == 0));
687 :
688 2 : nbytes = (filter_a->nbits) / 8;
689 :
690 : /* simply OR the bitmaps */
691 490 : for (i = 0; i < nbytes; i++)
692 488 : filter_a->data[i] |= filter_b->data[i];
693 :
694 : /* update the number of bits set in the filter */
695 2 : filter_a->nbits_set = pg_popcount((const char *) filter_a->data, nbytes);
696 :
697 : /* if we decompressed filter_a, update the summary */
698 2 : if (PointerGetDatum(filter_a) != col_a->bv_values[0])
699 : {
700 0 : pfree(DatumGetPointer(col_a->bv_values[0]));
701 0 : col_a->bv_values[0] = PointerGetDatum(filter_a);
702 : }
703 :
704 : /* also free filter_b, if it was decompressed */
705 2 : if (PointerGetDatum(filter_b) != col_b->bv_values[0])
706 0 : pfree(filter_b);
707 :
708 2 : PG_RETURN_VOID();
709 : }
710 :
711 : /*
712 : * Cache and return inclusion opclass support procedure
713 : *
714 : * Return the procedure corresponding to the given function support number
715 : * or null if it does not exist.
716 : */
717 : static FmgrInfo *
718 76232 : bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
719 : {
720 : BloomOpaque *opaque;
721 76232 : uint16 basenum = procnum - PROCNUM_BASE;
722 :
723 : /*
724 : * We cache these in the opaque struct, to avoid repetitive syscache
725 : * lookups.
726 : */
727 76232 : opaque = (BloomOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
728 :
729 76232 : if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
730 : {
731 768 : if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno,
732 : procnum)))
733 768 : fmgr_info_copy(&opaque->extra_procinfos[basenum],
734 : index_getprocinfo(bdesc->bd_index, attno, procnum),
735 : bdesc->bd_context);
736 : else
737 0 : ereport(ERROR,
738 : errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
739 : errmsg_internal("invalid opclass definition"),
740 : errdetail_internal("The operator class is missing support function %d for column %d.",
741 : procnum, attno));
742 : }
743 :
744 76232 : return &opaque->extra_procinfos[basenum];
745 : }
746 :
747 : Datum
748 750 : brin_bloom_options(PG_FUNCTION_ARGS)
749 : {
750 750 : local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
751 :
752 750 : init_local_reloptions(relopts, sizeof(BloomOptions));
753 :
754 750 : add_local_real_reloption(relopts, "n_distinct_per_range",
755 : "number of distinct items expected in a BRIN page range",
756 : BLOOM_DEFAULT_NDISTINCT_PER_RANGE,
757 : -1.0, INT_MAX, offsetof(BloomOptions, nDistinctPerRange));
758 :
759 750 : add_local_real_reloption(relopts, "false_positive_rate",
760 : "desired false-positive rate for the bloom filters",
761 : BLOOM_DEFAULT_FALSE_POSITIVE_RATE,
762 : BLOOM_MIN_FALSE_POSITIVE_RATE,
763 : BLOOM_MAX_FALSE_POSITIVE_RATE,
764 : offsetof(BloomOptions, falsePositiveRate));
765 :
766 750 : PG_RETURN_VOID();
767 : }
768 :
769 : /*
770 : * brin_bloom_summary_in
771 : * - input routine for type brin_bloom_summary.
772 : *
773 : * brin_bloom_summary is only used internally to represent summaries
774 : * in BRIN bloom indexes, so it has no operations of its own, and we
775 : * disallow input too.
776 : */
777 : Datum
778 0 : brin_bloom_summary_in(PG_FUNCTION_ARGS)
779 : {
780 : /*
781 : * brin_bloom_summary stores the data in binary form and parsing text
782 : * input is not needed, so disallow this.
783 : */
784 0 : ereport(ERROR,
785 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
786 : errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
787 :
788 : PG_RETURN_VOID(); /* keep compiler quiet */
789 : }
790 :
791 :
792 : /*
793 : * brin_bloom_summary_out
794 : * - output routine for type brin_bloom_summary.
795 : *
796 : * BRIN bloom summaries are serialized into a bytea value, but we want
797 : * to output something nicer humans can understand.
798 : */
799 : Datum
800 256 : brin_bloom_summary_out(PG_FUNCTION_ARGS)
801 : {
802 : BloomFilter *filter;
803 : StringInfoData str;
804 :
805 : /* detoast the data to get value with a full 4B header */
806 256 : filter = (BloomFilter *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
807 :
808 256 : initStringInfo(&str);
809 256 : appendStringInfoChar(&str, '{');
810 :
811 256 : appendStringInfo(&str, "mode: hashed nhashes: %u nbits: %u nbits_set: %u",
812 256 : filter->nhashes, filter->nbits, filter->nbits_set);
813 :
814 256 : appendStringInfoChar(&str, '}');
815 :
816 256 : PG_RETURN_CSTRING(str.data);
817 : }
818 :
819 : /*
820 : * brin_bloom_summary_recv
821 : * - binary input routine for type brin_bloom_summary.
822 : */
823 : Datum
824 0 : brin_bloom_summary_recv(PG_FUNCTION_ARGS)
825 : {
826 0 : ereport(ERROR,
827 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
828 : errmsg("cannot accept a value of type %s", "pg_brin_bloom_summary")));
829 :
830 : PG_RETURN_VOID(); /* keep compiler quiet */
831 : }
832 :
833 : /*
834 : * brin_bloom_summary_send
835 : * - binary output routine for type brin_bloom_summary.
836 : *
837 : * BRIN bloom summaries are serialized in a bytea value (although the
838 : * type is named differently), so let's just send that.
839 : */
840 : Datum
841 0 : brin_bloom_summary_send(PG_FUNCTION_ARGS)
842 : {
843 0 : return byteasend(fcinfo);
844 : }
|