Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * blutils.c
4 : * Bloom index utilities.
5 : *
6 : * Portions Copyright (c) 2016-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1990-1993, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * contrib/bloom/blutils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/amapi.h"
17 : #include "access/generic_xlog.h"
18 : #include "access/reloptions.h"
19 : #include "bloom.h"
20 : #include "commands/vacuum.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/memutils.h"
24 : #include "varatt.h"
25 :
26 : /* Signature dealing macros - note i is assumed to be of type int */
27 : #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
28 : #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
29 : #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
30 : #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
31 :
32 194 : PG_FUNCTION_INFO_V1(blhandler);
33 :
34 : /* Kind of relation options for bloom index */
35 : static relopt_kind bl_relopt_kind;
36 :
37 : /* parse table for fillRelOptions */
38 : static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
39 :
40 : static int32 myRand(void);
41 : static void mySrand(uint32 seed);
42 :
43 : /*
44 : * Module initialize function: initialize info about Bloom relation options.
45 : *
46 : * Note: keep this in sync with makeDefaultBloomOptions().
47 : */
48 : void
49 190 : _PG_init(void)
50 : {
51 : int i;
52 : char buf[16];
53 :
54 190 : bl_relopt_kind = add_reloption_kind();
55 :
56 : /* Option for length of signature */
57 190 : add_int_reloption(bl_relopt_kind, "length",
58 : "Length of signature in bits",
59 : DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH,
60 : AccessExclusiveLock);
61 190 : bl_relopt_tab[0].optname = "length";
62 190 : bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
63 190 : bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
64 :
65 : /* Number of bits for each possible index column: col1, col2, ... */
66 6270 : for (i = 0; i < INDEX_MAX_KEYS; i++)
67 : {
68 6080 : snprintf(buf, sizeof(buf), "col%d", i + 1);
69 6080 : add_int_reloption(bl_relopt_kind, buf,
70 : "Number of bits generated for each index column",
71 : DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS,
72 : AccessExclusiveLock);
73 6080 : bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
74 : buf);
75 6080 : bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
76 6080 : bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
77 : }
78 190 : }
79 :
80 : /*
81 : * Construct a default set of Bloom options.
82 : */
83 : static BloomOptions *
84 0 : makeDefaultBloomOptions(void)
85 : {
86 : BloomOptions *opts;
87 : int i;
88 :
89 0 : opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
90 : /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
91 0 : opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
92 0 : for (i = 0; i < INDEX_MAX_KEYS; i++)
93 0 : opts->bitSize[i] = DEFAULT_BLOOM_BITS;
94 0 : SET_VARSIZE(opts, sizeof(BloomOptions));
95 0 : return opts;
96 : }
97 :
98 : /*
99 : * Bloom handler function: return IndexAmRoutine with access method parameters
100 : * and callbacks.
101 : */
102 : Datum
103 236 : blhandler(PG_FUNCTION_ARGS)
104 : {
105 236 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
106 :
107 236 : amroutine->amstrategies = BLOOM_NSTRATEGIES;
108 236 : amroutine->amsupport = BLOOM_NPROC;
109 236 : amroutine->amoptsprocnum = BLOOM_OPTIONS_PROC;
110 236 : amroutine->amcanorder = false;
111 236 : amroutine->amcanorderbyop = false;
112 236 : amroutine->amcanbackward = false;
113 236 : amroutine->amcanunique = false;
114 236 : amroutine->amcanmulticol = true;
115 236 : amroutine->amoptionalkey = true;
116 236 : amroutine->amsearcharray = false;
117 236 : amroutine->amsearchnulls = false;
118 236 : amroutine->amstorage = false;
119 236 : amroutine->amclusterable = false;
120 236 : amroutine->ampredlocks = false;
121 236 : amroutine->amcanparallel = false;
122 236 : amroutine->amcanbuildparallel = false;
123 236 : amroutine->amcaninclude = false;
124 236 : amroutine->amusemaintenanceworkmem = false;
125 236 : amroutine->amparallelvacuumoptions =
126 : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
127 236 : amroutine->amkeytype = InvalidOid;
128 :
129 236 : amroutine->ambuild = blbuild;
130 236 : amroutine->ambuildempty = blbuildempty;
131 236 : amroutine->aminsert = blinsert;
132 236 : amroutine->aminsertcleanup = NULL;
133 236 : amroutine->ambulkdelete = blbulkdelete;
134 236 : amroutine->amvacuumcleanup = blvacuumcleanup;
135 236 : amroutine->amcanreturn = NULL;
136 236 : amroutine->amcostestimate = blcostestimate;
137 236 : amroutine->amgettreeheight = NULL;
138 236 : amroutine->amoptions = bloptions;
139 236 : amroutine->amproperty = NULL;
140 236 : amroutine->ambuildphasename = NULL;
141 236 : amroutine->amvalidate = blvalidate;
142 236 : amroutine->amadjustmembers = NULL;
143 236 : amroutine->ambeginscan = blbeginscan;
144 236 : amroutine->amrescan = blrescan;
145 236 : amroutine->amgettuple = NULL;
146 236 : amroutine->amgetbitmap = blgetbitmap;
147 236 : amroutine->amendscan = blendscan;
148 236 : amroutine->ammarkpos = NULL;
149 236 : amroutine->amrestrpos = NULL;
150 236 : amroutine->amestimateparallelscan = NULL;
151 236 : amroutine->aminitparallelscan = NULL;
152 236 : amroutine->amparallelrescan = NULL;
153 236 : amroutine->amtranslatestrategy = NULL;
154 236 : amroutine->amtranslatecmptype = NULL;
155 :
156 236 : PG_RETURN_POINTER(amroutine);
157 : }
158 :
159 : /*
160 : * Fill BloomState structure for particular index.
161 : */
162 : void
163 208806 : initBloomState(BloomState *state, Relation index)
164 : {
165 : int i;
166 :
167 208806 : state->nColumns = index->rd_att->natts;
168 :
169 : /* Initialize hash function for each attribute */
170 626418 : for (i = 0; i < index->rd_att->natts; i++)
171 : {
172 417612 : fmgr_info_copy(&(state->hashFn[i]),
173 417612 : index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
174 : CurrentMemoryContext);
175 417612 : state->collations[i] = index->rd_indcollation[i];
176 : }
177 :
178 : /* Initialize amcache if needed with options from metapage */
179 208806 : if (!index->rd_amcache)
180 : {
181 : Buffer buffer;
182 : Page page;
183 : BloomMetaPageData *meta;
184 : BloomOptions *opts;
185 :
186 182 : opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
187 :
188 182 : buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
189 182 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
190 :
191 182 : page = BufferGetPage(buffer);
192 :
193 182 : if (!BloomPageIsMeta(page))
194 0 : elog(ERROR, "Relation is not a bloom index");
195 182 : meta = BloomPageGetMeta(BufferGetPage(buffer));
196 :
197 182 : if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
198 0 : elog(ERROR, "Relation is not a bloom index");
199 :
200 182 : *opts = meta->opts;
201 :
202 182 : UnlockReleaseBuffer(buffer);
203 :
204 182 : index->rd_amcache = opts;
205 : }
206 :
207 208806 : memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
208 208806 : state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
209 208806 : sizeof(BloomSignatureWord) * state->opts.bloomLength;
210 208806 : }
211 :
212 : /*
213 : * Random generator copied from FreeBSD. Using own random generator here for
214 : * two reasons:
215 : *
216 : * 1) In this case random numbers are used for on-disk storage. Usage of
217 : * PostgreSQL number generator would obstruct it from all possible changes.
218 : * 2) Changing seed of PostgreSQL random generator would be undesirable side
219 : * effect.
220 : */
221 : static int32 next;
222 :
223 : static int32
224 1730866 : myRand(void)
225 : {
226 : /*----------
227 : * Compute x = (7^5 * x) mod (2^31 - 1)
228 : * without overflowing 31 bits:
229 : * (2^31 - 1) = 127773 * (7^5) + 2836
230 : * From "Random number generators: good ones are hard to find",
231 : * Park and Miller, Communications of the ACM, vol. 31, no. 10,
232 : * October 1988, p. 1195.
233 : *----------
234 : */
235 : int32 hi,
236 : lo,
237 : x;
238 :
239 : /* Must be in [1, 0x7ffffffe] range at this point. */
240 1730866 : hi = next / 127773;
241 1730866 : lo = next % 127773;
242 1730866 : x = 16807 * lo - 2836 * hi;
243 1730866 : if (x < 0)
244 350512 : x += 0x7fffffff;
245 1730866 : next = x;
246 : /* Transform to [0, 0x7ffffffd] range. */
247 1730866 : return (x - 1);
248 : }
249 :
250 : static void
251 984064 : mySrand(uint32 seed)
252 : {
253 984064 : next = seed;
254 : /* Transform to [1, 0x7ffffffe] range. */
255 984064 : next = (next % 0x7ffffffe) + 1;
256 984064 : }
257 :
258 : /*
259 : * Add bits of given value to the signature.
260 : */
261 : void
262 492032 : signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
263 : {
264 : uint32 hashVal;
265 : int nBit,
266 : j;
267 :
268 : /*
269 : * init generator with "column's" number to get "hashed" seed for new
270 : * value. We don't want to map the same numbers from different columns
271 : * into the same bits!
272 : */
273 492032 : mySrand(attno);
274 :
275 : /*
276 : * Init hash sequence to map our value into bits. the same values in
277 : * different columns will be mapped into different bits because of step
278 : * above
279 : */
280 492032 : hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
281 492032 : mySrand(hashVal ^ myRand());
282 :
283 1730866 : for (j = 0; j < state->opts.bitSize[attno]; j++)
284 : {
285 : /* prevent multiple evaluation in SETBIT macro */
286 1238834 : nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
287 1238834 : SETBIT(sign, nBit);
288 : }
289 492032 : }
290 :
291 : /*
292 : * Make bloom tuple from values.
293 : */
294 : BloomTuple *
295 245500 : BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
296 : {
297 : int i;
298 245500 : BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
299 :
300 245500 : res->heapPtr = *iptr;
301 :
302 : /* Blooming each column */
303 736500 : for (i = 0; i < state->nColumns; i++)
304 : {
305 : /* skip nulls */
306 491000 : if (isnull[i])
307 0 : continue;
308 :
309 491000 : signValue(state, res->sign, values[i], i);
310 : }
311 :
312 245500 : return res;
313 : }
314 :
315 : /*
316 : * Add new bloom tuple to the page. Returns true if new tuple was successfully
317 : * added to the page. Returns false if it doesn't fit on the page.
318 : */
319 : bool
320 247076 : BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
321 : {
322 : BloomTuple *itup;
323 : BloomPageOpaque opaque;
324 : Pointer ptr;
325 :
326 : /* We shouldn't be pointed to an invalid page */
327 : Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
328 :
329 : /* Does new tuple fit on the page? */
330 247076 : if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
331 1576 : return false;
332 :
333 : /* Copy new tuple to the end of page */
334 245500 : opaque = BloomPageGetOpaque(page);
335 245500 : itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
336 245500 : memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
337 :
338 : /* Adjust maxoff and pd_lower */
339 245500 : opaque->maxoff++;
340 245500 : ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
341 245500 : ((PageHeader) page)->pd_lower = ptr - page;
342 :
343 : /* Assert we didn't overrun available space */
344 : Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
345 :
346 245500 : return true;
347 : }
348 :
349 : /*
350 : * Allocate a new page (either by recycling, or by extending the index file)
351 : * The returned buffer is already pinned and exclusive-locked
352 : * Caller is responsible for initializing the page by calling BloomInitPage
353 : */
354 : Buffer
355 298 : BloomNewBuffer(Relation index)
356 : {
357 : Buffer buffer;
358 :
359 : /* First, try to get a page from FSM */
360 : for (;;)
361 0 : {
362 298 : BlockNumber blkno = GetFreeIndexPage(index);
363 :
364 298 : if (blkno == InvalidBlockNumber)
365 296 : break;
366 :
367 2 : buffer = ReadBuffer(index, blkno);
368 :
369 : /*
370 : * We have to guard against the possibility that someone else already
371 : * recycled this page; the buffer may be locked if so.
372 : */
373 2 : if (ConditionalLockBuffer(buffer))
374 : {
375 2 : Page page = BufferGetPage(buffer);
376 :
377 2 : if (PageIsNew(page))
378 0 : return buffer; /* OK to use, if never initialized */
379 :
380 2 : if (BloomPageIsDeleted(page))
381 2 : return buffer; /* OK to use */
382 :
383 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
384 : }
385 :
386 : /* Can't use it, so release buffer and try again */
387 0 : ReleaseBuffer(buffer);
388 : }
389 :
390 : /* Must extend the file */
391 296 : buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL,
392 : EB_LOCK_FIRST);
393 :
394 296 : return buffer;
395 : }
396 :
397 : /*
398 : * Initialize any page of a bloom index.
399 : */
400 : void
401 310 : BloomInitPage(Page page, uint16 flags)
402 : {
403 : BloomPageOpaque opaque;
404 :
405 310 : PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
406 :
407 310 : opaque = BloomPageGetOpaque(page);
408 310 : opaque->flags = flags;
409 310 : opaque->bloom_page_id = BLOOM_PAGE_ID;
410 310 : }
411 :
412 : /*
413 : * Fill in metapage for bloom index.
414 : */
415 : void
416 12 : BloomFillMetapage(Relation index, Page metaPage)
417 : {
418 : BloomOptions *opts;
419 : BloomMetaPageData *metadata;
420 :
421 : /*
422 : * Choose the index's options. If reloptions have been assigned, use
423 : * those, otherwise create default options.
424 : */
425 12 : opts = (BloomOptions *) index->rd_options;
426 12 : if (!opts)
427 0 : opts = makeDefaultBloomOptions();
428 :
429 : /*
430 : * Initialize contents of meta page, including a copy of the options,
431 : * which are now frozen for the life of the index.
432 : */
433 12 : BloomInitPage(metaPage, BLOOM_META);
434 12 : metadata = BloomPageGetMeta(metaPage);
435 12 : memset(metadata, 0, sizeof(BloomMetaPageData));
436 12 : metadata->magickNumber = BLOOM_MAGICK_NUMBER;
437 12 : metadata->opts = *opts;
438 12 : ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
439 :
440 : /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
441 : Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
442 12 : }
443 :
444 : /*
445 : * Initialize metapage for bloom index.
446 : */
447 : void
448 12 : BloomInitMetapage(Relation index, ForkNumber forknum)
449 : {
450 : Buffer metaBuffer;
451 : Page metaPage;
452 : GenericXLogState *state;
453 :
454 : /*
455 : * Make a new page; since it is first page it should be associated with
456 : * block number 0 (BLOOM_METAPAGE_BLKNO). No need to hold the extension
457 : * lock because there cannot be concurrent inserters yet.
458 : */
459 12 : metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
460 12 : LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
461 : Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
462 :
463 : /* Initialize contents of meta page */
464 12 : state = GenericXLogStart(index);
465 12 : metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
466 : GENERIC_XLOG_FULL_IMAGE);
467 12 : BloomFillMetapage(index, metaPage);
468 12 : GenericXLogFinish(state);
469 :
470 12 : UnlockReleaseBuffer(metaBuffer);
471 12 : }
472 :
473 : /*
474 : * Parse reloptions for bloom index, producing a BloomOptions struct.
475 : */
476 : bytea *
477 266 : bloptions(Datum reloptions, bool validate)
478 : {
479 : BloomOptions *rdopts;
480 :
481 : /* Parse the user-given reloptions */
482 266 : rdopts = (BloomOptions *) build_reloptions(reloptions, validate,
483 : bl_relopt_kind,
484 : sizeof(BloomOptions),
485 : bl_relopt_tab,
486 : lengthof(bl_relopt_tab));
487 :
488 : /* Convert signature length from # of bits to # to words, rounding up */
489 262 : if (rdopts)
490 262 : rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
491 :
492 262 : return (bytea *) rdopts;
493 : }
|