Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * blutils.c
4 : * Bloom index utilities.
5 : *
6 : * Portions Copyright (c) 2016-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1990-1993, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * contrib/bloom/blutils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/amapi.h"
17 : #include "access/generic_xlog.h"
18 : #include "access/reloptions.h"
19 : #include "bloom.h"
20 : #include "commands/vacuum.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/memutils.h"
24 : #include "varatt.h"
25 :
26 : /* Signature dealing macros - note i is assumed to be of type int */
27 : #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
28 : #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
29 : #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
30 : #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
31 :
32 194 : PG_FUNCTION_INFO_V1(blhandler);
33 :
34 : /* Kind of relation options for bloom index */
35 : static relopt_kind bl_relopt_kind;
36 :
37 : /* parse table for fillRelOptions */
38 : static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
39 :
40 : static int32 myRand(void);
41 : static void mySrand(uint32 seed);
42 :
43 : /*
44 : * Module initialize function: initialize info about Bloom relation options.
45 : *
46 : * Note: keep this in sync with makeDefaultBloomOptions().
47 : */
48 : void
49 190 : _PG_init(void)
50 : {
51 : int i;
52 : char buf[16];
53 :
54 190 : bl_relopt_kind = add_reloption_kind();
55 :
56 : /* Option for length of signature */
57 190 : add_int_reloption(bl_relopt_kind, "length",
58 : "Length of signature in bits",
59 : DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH,
60 : AccessExclusiveLock);
61 190 : bl_relopt_tab[0].optname = "length";
62 190 : bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
63 190 : bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
64 :
65 : /* Number of bits for each possible index column: col1, col2, ... */
66 6270 : for (i = 0; i < INDEX_MAX_KEYS; i++)
67 : {
68 6080 : snprintf(buf, sizeof(buf), "col%d", i + 1);
69 6080 : add_int_reloption(bl_relopt_kind, buf,
70 : "Number of bits generated for each index column",
71 : DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS,
72 : AccessExclusiveLock);
73 6080 : bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
74 : buf);
75 6080 : bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
76 6080 : bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
77 : }
78 190 : }
79 :
80 : /*
81 : * Construct a default set of Bloom options.
82 : */
83 : static BloomOptions *
84 0 : makeDefaultBloomOptions(void)
85 : {
86 : BloomOptions *opts;
87 : int i;
88 :
89 0 : opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
90 : /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
91 0 : opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
92 0 : for (i = 0; i < INDEX_MAX_KEYS; i++)
93 0 : opts->bitSize[i] = DEFAULT_BLOOM_BITS;
94 0 : SET_VARSIZE(opts, sizeof(BloomOptions));
95 0 : return opts;
96 : }
97 :
98 : /*
99 : * Bloom handler function: return IndexAmRoutine with access method parameters
100 : * and callbacks.
101 : */
102 : Datum
103 252 : blhandler(PG_FUNCTION_ARGS)
104 : {
105 252 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
106 :
107 252 : amroutine->amstrategies = BLOOM_NSTRATEGIES;
108 252 : amroutine->amsupport = BLOOM_NPROC;
109 252 : amroutine->amoptsprocnum = BLOOM_OPTIONS_PROC;
110 252 : amroutine->amcanorder = false;
111 252 : amroutine->amcanorderbyop = false;
112 252 : amroutine->amcanhash = false;
113 252 : amroutine->amconsistentequality = false;
114 252 : amroutine->amconsistentordering = false;
115 252 : amroutine->amcanbackward = false;
116 252 : amroutine->amcanunique = false;
117 252 : amroutine->amcanmulticol = true;
118 252 : amroutine->amoptionalkey = true;
119 252 : amroutine->amsearcharray = false;
120 252 : amroutine->amsearchnulls = false;
121 252 : amroutine->amstorage = false;
122 252 : amroutine->amclusterable = false;
123 252 : amroutine->ampredlocks = false;
124 252 : amroutine->amcanparallel = false;
125 252 : amroutine->amcanbuildparallel = false;
126 252 : amroutine->amcaninclude = false;
127 252 : amroutine->amusemaintenanceworkmem = false;
128 252 : amroutine->amparallelvacuumoptions =
129 : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
130 252 : amroutine->amkeytype = InvalidOid;
131 :
132 252 : amroutine->ambuild = blbuild;
133 252 : amroutine->ambuildempty = blbuildempty;
134 252 : amroutine->aminsert = blinsert;
135 252 : amroutine->aminsertcleanup = NULL;
136 252 : amroutine->ambulkdelete = blbulkdelete;
137 252 : amroutine->amvacuumcleanup = blvacuumcleanup;
138 252 : amroutine->amcanreturn = NULL;
139 252 : amroutine->amcostestimate = blcostestimate;
140 252 : amroutine->amgettreeheight = NULL;
141 252 : amroutine->amoptions = bloptions;
142 252 : amroutine->amproperty = NULL;
143 252 : amroutine->ambuildphasename = NULL;
144 252 : amroutine->amvalidate = blvalidate;
145 252 : amroutine->amadjustmembers = NULL;
146 252 : amroutine->ambeginscan = blbeginscan;
147 252 : amroutine->amrescan = blrescan;
148 252 : amroutine->amgettuple = NULL;
149 252 : amroutine->amgetbitmap = blgetbitmap;
150 252 : amroutine->amendscan = blendscan;
151 252 : amroutine->ammarkpos = NULL;
152 252 : amroutine->amrestrpos = NULL;
153 252 : amroutine->amestimateparallelscan = NULL;
154 252 : amroutine->aminitparallelscan = NULL;
155 252 : amroutine->amparallelrescan = NULL;
156 252 : amroutine->amtranslatestrategy = NULL;
157 252 : amroutine->amtranslatecmptype = NULL;
158 :
159 252 : PG_RETURN_POINTER(amroutine);
160 : }
161 :
162 : /*
163 : * Fill BloomState structure for particular index.
164 : */
165 : void
166 208806 : initBloomState(BloomState *state, Relation index)
167 : {
168 : int i;
169 :
170 208806 : state->nColumns = index->rd_att->natts;
171 :
172 : /* Initialize hash function for each attribute */
173 626418 : for (i = 0; i < index->rd_att->natts; i++)
174 : {
175 417612 : fmgr_info_copy(&(state->hashFn[i]),
176 417612 : index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
177 : CurrentMemoryContext);
178 417612 : state->collations[i] = index->rd_indcollation[i];
179 : }
180 :
181 : /* Initialize amcache if needed with options from metapage */
182 208806 : if (!index->rd_amcache)
183 : {
184 : Buffer buffer;
185 : Page page;
186 : BloomMetaPageData *meta;
187 : BloomOptions *opts;
188 :
189 182 : opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
190 :
191 182 : buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
192 182 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
193 :
194 182 : page = BufferGetPage(buffer);
195 :
196 182 : if (!BloomPageIsMeta(page))
197 0 : elog(ERROR, "Relation is not a bloom index");
198 182 : meta = BloomPageGetMeta(BufferGetPage(buffer));
199 :
200 182 : if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
201 0 : elog(ERROR, "Relation is not a bloom index");
202 :
203 182 : *opts = meta->opts;
204 :
205 182 : UnlockReleaseBuffer(buffer);
206 :
207 182 : index->rd_amcache = opts;
208 : }
209 :
210 208806 : memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
211 208806 : state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
212 208806 : sizeof(BloomSignatureWord) * state->opts.bloomLength;
213 208806 : }
214 :
215 : /*
216 : * Random generator copied from FreeBSD. Using own random generator here for
217 : * two reasons:
218 : *
219 : * 1) In this case random numbers are used for on-disk storage. Usage of
220 : * PostgreSQL number generator would obstruct it from all possible changes.
221 : * 2) Changing seed of PostgreSQL random generator would be undesirable side
222 : * effect.
223 : */
224 : static int32 next;
225 :
226 : static int32
227 1730866 : myRand(void)
228 : {
229 : /*----------
230 : * Compute x = (7^5 * x) mod (2^31 - 1)
231 : * without overflowing 31 bits:
232 : * (2^31 - 1) = 127773 * (7^5) + 2836
233 : * From "Random number generators: good ones are hard to find",
234 : * Park and Miller, Communications of the ACM, vol. 31, no. 10,
235 : * October 1988, p. 1195.
236 : *----------
237 : */
238 : int32 hi,
239 : lo,
240 : x;
241 :
242 : /* Must be in [1, 0x7ffffffe] range at this point. */
243 1730866 : hi = next / 127773;
244 1730866 : lo = next % 127773;
245 1730866 : x = 16807 * lo - 2836 * hi;
246 1730866 : if (x < 0)
247 350512 : x += 0x7fffffff;
248 1730866 : next = x;
249 : /* Transform to [0, 0x7ffffffd] range. */
250 1730866 : return (x - 1);
251 : }
252 :
253 : static void
254 984064 : mySrand(uint32 seed)
255 : {
256 984064 : next = seed;
257 : /* Transform to [1, 0x7ffffffe] range. */
258 984064 : next = (next % 0x7ffffffe) + 1;
259 984064 : }
260 :
261 : /*
262 : * Add bits of given value to the signature.
263 : */
264 : void
265 492032 : signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
266 : {
267 : uint32 hashVal;
268 : int nBit,
269 : j;
270 :
271 : /*
272 : * init generator with "column's" number to get "hashed" seed for new
273 : * value. We don't want to map the same numbers from different columns
274 : * into the same bits!
275 : */
276 492032 : mySrand(attno);
277 :
278 : /*
279 : * Init hash sequence to map our value into bits. the same values in
280 : * different columns will be mapped into different bits because of step
281 : * above
282 : */
283 492032 : hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
284 492032 : mySrand(hashVal ^ myRand());
285 :
286 1730866 : for (j = 0; j < state->opts.bitSize[attno]; j++)
287 : {
288 : /* prevent multiple evaluation in SETBIT macro */
289 1238834 : nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
290 1238834 : SETBIT(sign, nBit);
291 : }
292 492032 : }
293 :
294 : /*
295 : * Make bloom tuple from values.
296 : */
297 : BloomTuple *
298 245500 : BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
299 : {
300 : int i;
301 245500 : BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
302 :
303 245500 : res->heapPtr = *iptr;
304 :
305 : /* Blooming each column */
306 736500 : for (i = 0; i < state->nColumns; i++)
307 : {
308 : /* skip nulls */
309 491000 : if (isnull[i])
310 0 : continue;
311 :
312 491000 : signValue(state, res->sign, values[i], i);
313 : }
314 :
315 245500 : return res;
316 : }
317 :
318 : /*
319 : * Add new bloom tuple to the page. Returns true if new tuple was successfully
320 : * added to the page. Returns false if it doesn't fit on the page.
321 : */
322 : bool
323 247076 : BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
324 : {
325 : BloomTuple *itup;
326 : BloomPageOpaque opaque;
327 : Pointer ptr;
328 :
329 : /* We shouldn't be pointed to an invalid page */
330 : Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
331 :
332 : /* Does new tuple fit on the page? */
333 247076 : if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
334 1576 : return false;
335 :
336 : /* Copy new tuple to the end of page */
337 245500 : opaque = BloomPageGetOpaque(page);
338 245500 : itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
339 245500 : memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
340 :
341 : /* Adjust maxoff and pd_lower */
342 245500 : opaque->maxoff++;
343 245500 : ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
344 245500 : ((PageHeader) page)->pd_lower = ptr - page;
345 :
346 : /* Assert we didn't overrun available space */
347 : Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
348 :
349 245500 : return true;
350 : }
351 :
352 : /*
353 : * Allocate a new page (either by recycling, or by extending the index file)
354 : * The returned buffer is already pinned and exclusive-locked
355 : * Caller is responsible for initializing the page by calling BloomInitPage
356 : */
357 : Buffer
358 298 : BloomNewBuffer(Relation index)
359 : {
360 : Buffer buffer;
361 :
362 : /* First, try to get a page from FSM */
363 : for (;;)
364 0 : {
365 298 : BlockNumber blkno = GetFreeIndexPage(index);
366 :
367 298 : if (blkno == InvalidBlockNumber)
368 296 : break;
369 :
370 2 : buffer = ReadBuffer(index, blkno);
371 :
372 : /*
373 : * We have to guard against the possibility that someone else already
374 : * recycled this page; the buffer may be locked if so.
375 : */
376 2 : if (ConditionalLockBuffer(buffer))
377 : {
378 2 : Page page = BufferGetPage(buffer);
379 :
380 2 : if (PageIsNew(page))
381 0 : return buffer; /* OK to use, if never initialized */
382 :
383 2 : if (BloomPageIsDeleted(page))
384 2 : return buffer; /* OK to use */
385 :
386 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
387 : }
388 :
389 : /* Can't use it, so release buffer and try again */
390 0 : ReleaseBuffer(buffer);
391 : }
392 :
393 : /* Must extend the file */
394 296 : buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL,
395 : EB_LOCK_FIRST);
396 :
397 296 : return buffer;
398 : }
399 :
400 : /*
401 : * Initialize any page of a bloom index.
402 : */
403 : void
404 310 : BloomInitPage(Page page, uint16 flags)
405 : {
406 : BloomPageOpaque opaque;
407 :
408 310 : PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
409 :
410 310 : opaque = BloomPageGetOpaque(page);
411 310 : opaque->flags = flags;
412 310 : opaque->bloom_page_id = BLOOM_PAGE_ID;
413 310 : }
414 :
415 : /*
416 : * Fill in metapage for bloom index.
417 : */
418 : void
419 12 : BloomFillMetapage(Relation index, Page metaPage)
420 : {
421 : BloomOptions *opts;
422 : BloomMetaPageData *metadata;
423 :
424 : /*
425 : * Choose the index's options. If reloptions have been assigned, use
426 : * those, otherwise create default options.
427 : */
428 12 : opts = (BloomOptions *) index->rd_options;
429 12 : if (!opts)
430 0 : opts = makeDefaultBloomOptions();
431 :
432 : /*
433 : * Initialize contents of meta page, including a copy of the options,
434 : * which are now frozen for the life of the index.
435 : */
436 12 : BloomInitPage(metaPage, BLOOM_META);
437 12 : metadata = BloomPageGetMeta(metaPage);
438 12 : memset(metadata, 0, sizeof(BloomMetaPageData));
439 12 : metadata->magickNumber = BLOOM_MAGICK_NUMBER;
440 12 : metadata->opts = *opts;
441 12 : ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
442 :
443 : /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
444 : Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
445 12 : }
446 :
447 : /*
448 : * Initialize metapage for bloom index.
449 : */
450 : void
451 12 : BloomInitMetapage(Relation index, ForkNumber forknum)
452 : {
453 : Buffer metaBuffer;
454 : Page metaPage;
455 : GenericXLogState *state;
456 :
457 : /*
458 : * Make a new page; since it is first page it should be associated with
459 : * block number 0 (BLOOM_METAPAGE_BLKNO). No need to hold the extension
460 : * lock because there cannot be concurrent inserters yet.
461 : */
462 12 : metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
463 12 : LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
464 : Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
465 :
466 : /* Initialize contents of meta page */
467 12 : state = GenericXLogStart(index);
468 12 : metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
469 : GENERIC_XLOG_FULL_IMAGE);
470 12 : BloomFillMetapage(index, metaPage);
471 12 : GenericXLogFinish(state);
472 :
473 12 : UnlockReleaseBuffer(metaBuffer);
474 12 : }
475 :
476 : /*
477 : * Parse reloptions for bloom index, producing a BloomOptions struct.
478 : */
479 : bytea *
480 266 : bloptions(Datum reloptions, bool validate)
481 : {
482 : BloomOptions *rdopts;
483 :
484 : /* Parse the user-given reloptions */
485 266 : rdopts = (BloomOptions *) build_reloptions(reloptions, validate,
486 : bl_relopt_kind,
487 : sizeof(BloomOptions),
488 : bl_relopt_tab,
489 : lengthof(bl_relopt_tab));
490 :
491 : /* Convert signature length from # of bits to # to words, rounding up */
492 262 : if (rdopts)
493 262 : rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
494 :
495 262 : return (bytea *) rdopts;
496 : }
|