Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * blutils.c
4 : * Bloom index utilities.
5 : *
6 : * Portions Copyright (c) 2016-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1990-1993, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * contrib/bloom/blutils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/amapi.h"
17 : #include "access/generic_xlog.h"
18 : #include "access/reloptions.h"
19 : #include "bloom.h"
20 : #include "commands/vacuum.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/memutils.h"
24 : #include "varatt.h"
25 :
26 : /* Signature dealing macros - note i is assumed to be of type int */
27 : #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
28 : #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
29 : #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
30 : #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
31 :
32 258 : PG_FUNCTION_INFO_V1(blhandler);
33 :
34 : /* Kind of relation options for bloom index */
35 : static relopt_kind bl_relopt_kind;
36 :
37 : /* parse table for fillRelOptions */
38 : static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
39 :
40 : static int32 myRand(void);
41 : static void mySrand(uint32 seed);
42 :
43 : /*
44 : * Module initialize function: initialize info about Bloom relation options.
45 : *
46 : * Note: keep this in sync with makeDefaultBloomOptions().
47 : */
48 : void
49 254 : _PG_init(void)
50 : {
51 : int i;
52 : char buf[16];
53 :
54 254 : bl_relopt_kind = add_reloption_kind();
55 :
56 : /* Option for length of signature */
57 254 : add_int_reloption(bl_relopt_kind, "length",
58 : "Length of signature in bits",
59 : DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH,
60 : AccessExclusiveLock);
61 254 : bl_relopt_tab[0].optname = "length";
62 254 : bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
63 254 : bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
64 :
65 : /* Number of bits for each possible index column: col1, col2, ... */
66 8382 : for (i = 0; i < INDEX_MAX_KEYS; i++)
67 : {
68 8128 : snprintf(buf, sizeof(buf), "col%d", i + 1);
69 8128 : add_int_reloption(bl_relopt_kind, buf,
70 : "Number of bits generated for each index column",
71 : DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS,
72 : AccessExclusiveLock);
73 8128 : bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
74 : buf);
75 8128 : bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
76 8128 : bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
77 : }
78 254 : }
79 :
80 : /*
81 : * Construct a default set of Bloom options.
82 : */
83 : static BloomOptions *
84 0 : makeDefaultBloomOptions(void)
85 : {
86 : BloomOptions *opts;
87 : int i;
88 :
89 0 : opts = palloc0_object(BloomOptions);
90 : /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
91 0 : opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
92 0 : for (i = 0; i < INDEX_MAX_KEYS; i++)
93 0 : opts->bitSize[i] = DEFAULT_BLOOM_BITS;
94 0 : SET_VARSIZE(opts, sizeof(BloomOptions));
95 0 : return opts;
96 : }
97 :
98 : /*
99 : * Bloom handler function: return IndexAmRoutine with access method parameters
100 : * and callbacks.
101 : */
102 : Datum
103 1456 : blhandler(PG_FUNCTION_ARGS)
104 : {
105 : static const IndexAmRoutine amroutine = {
106 : .type = T_IndexAmRoutine,
107 : .amstrategies = BLOOM_NSTRATEGIES,
108 : .amsupport = BLOOM_NPROC,
109 : .amoptsprocnum = BLOOM_OPTIONS_PROC,
110 : .amcanorder = false,
111 : .amcanorderbyop = false,
112 : .amcanhash = false,
113 : .amconsistentequality = false,
114 : .amconsistentordering = false,
115 : .amcanbackward = false,
116 : .amcanunique = false,
117 : .amcanmulticol = true,
118 : .amoptionalkey = true,
119 : .amsearcharray = false,
120 : .amsearchnulls = false,
121 : .amstorage = false,
122 : .amclusterable = false,
123 : .ampredlocks = false,
124 : .amcanparallel = false,
125 : .amcanbuildparallel = false,
126 : .amcaninclude = false,
127 : .amusemaintenanceworkmem = false,
128 : .amparallelvacuumoptions =
129 : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP,
130 : .amkeytype = InvalidOid,
131 :
132 : .ambuild = blbuild,
133 : .ambuildempty = blbuildempty,
134 : .aminsert = blinsert,
135 : .aminsertcleanup = NULL,
136 : .ambulkdelete = blbulkdelete,
137 : .amvacuumcleanup = blvacuumcleanup,
138 : .amcanreturn = NULL,
139 : .amcostestimate = blcostestimate,
140 : .amgettreeheight = NULL,
141 : .amoptions = bloptions,
142 : .amproperty = NULL,
143 : .ambuildphasename = NULL,
144 : .amvalidate = blvalidate,
145 : .amadjustmembers = NULL,
146 : .ambeginscan = blbeginscan,
147 : .amrescan = blrescan,
148 : .amgettuple = NULL,
149 : .amgetbitmap = blgetbitmap,
150 : .amendscan = blendscan,
151 : .ammarkpos = NULL,
152 : .amrestrpos = NULL,
153 : .amestimateparallelscan = NULL,
154 : .aminitparallelscan = NULL,
155 : .amparallelrescan = NULL,
156 : .amtranslatestrategy = NULL,
157 : .amtranslatecmptype = NULL,
158 : };
159 :
160 1456 : PG_RETURN_POINTER(&amroutine);
161 : }
162 :
163 : /*
164 : * Fill BloomState structure for particular index.
165 : */
166 : void
167 208806 : initBloomState(BloomState *state, Relation index)
168 : {
169 : int i;
170 :
171 208806 : state->nColumns = index->rd_att->natts;
172 :
173 : /* Initialize hash function for each attribute */
174 626418 : for (i = 0; i < index->rd_att->natts; i++)
175 : {
176 417612 : fmgr_info_copy(&(state->hashFn[i]),
177 417612 : index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
178 : CurrentMemoryContext);
179 417612 : state->collations[i] = index->rd_indcollation[i];
180 : }
181 :
182 : /* Initialize amcache if needed with options from metapage */
183 208806 : if (!index->rd_amcache)
184 : {
185 : Buffer buffer;
186 : Page page;
187 : BloomMetaPageData *meta;
188 : BloomOptions *opts;
189 :
190 182 : opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
191 :
192 182 : buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
193 182 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
194 :
195 182 : page = BufferGetPage(buffer);
196 :
197 182 : if (!BloomPageIsMeta(page))
198 0 : elog(ERROR, "Relation is not a bloom index");
199 182 : meta = BloomPageGetMeta(BufferGetPage(buffer));
200 :
201 182 : if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
202 0 : elog(ERROR, "Relation is not a bloom index");
203 :
204 182 : *opts = meta->opts;
205 :
206 182 : UnlockReleaseBuffer(buffer);
207 :
208 182 : index->rd_amcache = opts;
209 : }
210 :
211 208806 : memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
212 208806 : state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
213 208806 : sizeof(BloomSignatureWord) * state->opts.bloomLength;
214 208806 : }
215 :
216 : /*
217 : * Random generator copied from FreeBSD. Using own random generator here for
218 : * two reasons:
219 : *
220 : * 1) In this case random numbers are used for on-disk storage. Usage of
221 : * PostgreSQL number generator would obstruct it from all possible changes.
222 : * 2) Changing seed of PostgreSQL random generator would be undesirable side
223 : * effect.
224 : */
225 : static int32 next;
226 :
227 : static int32
228 1730866 : myRand(void)
229 : {
230 : /*----------
231 : * Compute x = (7^5 * x) mod (2^31 - 1)
232 : * without overflowing 31 bits:
233 : * (2^31 - 1) = 127773 * (7^5) + 2836
234 : * From "Random number generators: good ones are hard to find",
235 : * Park and Miller, Communications of the ACM, vol. 31, no. 10,
236 : * October 1988, p. 1195.
237 : *----------
238 : */
239 : int32 hi,
240 : lo,
241 : x;
242 :
243 : /* Must be in [1, 0x7ffffffe] range at this point. */
244 1730866 : hi = next / 127773;
245 1730866 : lo = next % 127773;
246 1730866 : x = 16807 * lo - 2836 * hi;
247 1730866 : if (x < 0)
248 350512 : x += 0x7fffffff;
249 1730866 : next = x;
250 : /* Transform to [0, 0x7ffffffd] range. */
251 1730866 : return (x - 1);
252 : }
253 :
254 : static void
255 984064 : mySrand(uint32 seed)
256 : {
257 984064 : next = seed;
258 : /* Transform to [1, 0x7ffffffe] range. */
259 984064 : next = (next % 0x7ffffffe) + 1;
260 984064 : }
261 :
262 : /*
263 : * Add bits of given value to the signature.
264 : */
265 : void
266 492032 : signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
267 : {
268 : uint32 hashVal;
269 : int nBit,
270 : j;
271 :
272 : /*
273 : * init generator with "column's" number to get "hashed" seed for new
274 : * value. We don't want to map the same numbers from different columns
275 : * into the same bits!
276 : */
277 492032 : mySrand(attno);
278 :
279 : /*
280 : * Init hash sequence to map our value into bits. the same values in
281 : * different columns will be mapped into different bits because of step
282 : * above
283 : */
284 492032 : hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
285 492032 : mySrand(hashVal ^ myRand());
286 :
287 1730866 : for (j = 0; j < state->opts.bitSize[attno]; j++)
288 : {
289 : /* prevent multiple evaluation in SETBIT macro */
290 1238834 : nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
291 1238834 : SETBIT(sign, nBit);
292 : }
293 492032 : }
294 :
295 : /*
296 : * Make bloom tuple from values.
297 : */
298 : BloomTuple *
299 245500 : BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
300 : {
301 : int i;
302 245500 : BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
303 :
304 245500 : res->heapPtr = *iptr;
305 :
306 : /* Blooming each column */
307 736500 : for (i = 0; i < state->nColumns; i++)
308 : {
309 : /* skip nulls */
310 491000 : if (isnull[i])
311 0 : continue;
312 :
313 491000 : signValue(state, res->sign, values[i], i);
314 : }
315 :
316 245500 : return res;
317 : }
318 :
319 : /*
320 : * Add new bloom tuple to the page. Returns true if new tuple was successfully
321 : * added to the page. Returns false if it doesn't fit on the page.
322 : */
323 : bool
324 247076 : BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
325 : {
326 : BloomTuple *itup;
327 : BloomPageOpaque opaque;
328 : char *ptr;
329 :
330 : /* We shouldn't be pointed to an invalid page */
331 : Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
332 :
333 : /* Does new tuple fit on the page? */
334 247076 : if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
335 1576 : return false;
336 :
337 : /* Copy new tuple to the end of page */
338 245500 : opaque = BloomPageGetOpaque(page);
339 245500 : itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
340 245500 : memcpy(itup, tuple, state->sizeOfBloomTuple);
341 :
342 : /* Adjust maxoff and pd_lower */
343 245500 : opaque->maxoff++;
344 245500 : ptr = (char *) BloomPageGetTuple(state, page, opaque->maxoff + 1);
345 245500 : ((PageHeader) page)->pd_lower = ptr - page;
346 :
347 : /* Assert we didn't overrun available space */
348 : Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
349 :
350 245500 : return true;
351 : }
352 :
353 : /*
354 : * Allocate a new page (either by recycling, or by extending the index file)
355 : * The returned buffer is already pinned and exclusive-locked
356 : * Caller is responsible for initializing the page by calling BloomInitPage
357 : */
358 : Buffer
359 298 : BloomNewBuffer(Relation index)
360 : {
361 : Buffer buffer;
362 :
363 : /* First, try to get a page from FSM */
364 : for (;;)
365 0 : {
366 298 : BlockNumber blkno = GetFreeIndexPage(index);
367 :
368 298 : if (blkno == InvalidBlockNumber)
369 296 : break;
370 :
371 2 : buffer = ReadBuffer(index, blkno);
372 :
373 : /*
374 : * We have to guard against the possibility that someone else already
375 : * recycled this page; the buffer may be locked if so.
376 : */
377 2 : if (ConditionalLockBuffer(buffer))
378 : {
379 2 : Page page = BufferGetPage(buffer);
380 :
381 2 : if (PageIsNew(page))
382 0 : return buffer; /* OK to use, if never initialized */
383 :
384 2 : if (BloomPageIsDeleted(page))
385 2 : return buffer; /* OK to use */
386 :
387 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
388 : }
389 :
390 : /* Can't use it, so release buffer and try again */
391 0 : ReleaseBuffer(buffer);
392 : }
393 :
394 : /* Must extend the file */
395 296 : buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL,
396 : EB_LOCK_FIRST);
397 :
398 296 : return buffer;
399 : }
400 :
401 : /*
402 : * Initialize any page of a bloom index.
403 : */
404 : void
405 310 : BloomInitPage(Page page, uint16 flags)
406 : {
407 : BloomPageOpaque opaque;
408 :
409 310 : PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
410 :
411 310 : opaque = BloomPageGetOpaque(page);
412 310 : opaque->flags = flags;
413 310 : opaque->bloom_page_id = BLOOM_PAGE_ID;
414 310 : }
415 :
416 : /*
417 : * Fill in metapage for bloom index.
418 : */
419 : void
420 12 : BloomFillMetapage(Relation index, Page metaPage)
421 : {
422 : BloomOptions *opts;
423 : BloomMetaPageData *metadata;
424 :
425 : /*
426 : * Choose the index's options. If reloptions have been assigned, use
427 : * those, otherwise create default options.
428 : */
429 12 : opts = (BloomOptions *) index->rd_options;
430 12 : if (!opts)
431 0 : opts = makeDefaultBloomOptions();
432 :
433 : /*
434 : * Initialize contents of meta page, including a copy of the options,
435 : * which are now frozen for the life of the index.
436 : */
437 12 : BloomInitPage(metaPage, BLOOM_META);
438 12 : metadata = BloomPageGetMeta(metaPage);
439 12 : memset(metadata, 0, sizeof(BloomMetaPageData));
440 12 : metadata->magickNumber = BLOOM_MAGICK_NUMBER;
441 12 : metadata->opts = *opts;
442 12 : ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
443 :
444 : /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
445 : Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
446 12 : }
447 :
448 : /*
449 : * Initialize metapage for bloom index.
450 : */
451 : void
452 12 : BloomInitMetapage(Relation index, ForkNumber forknum)
453 : {
454 : Buffer metaBuffer;
455 : Page metaPage;
456 : GenericXLogState *state;
457 :
458 : /*
459 : * Make a new page; since it is first page it should be associated with
460 : * block number 0 (BLOOM_METAPAGE_BLKNO). No need to hold the extension
461 : * lock because there cannot be concurrent inserters yet.
462 : */
463 12 : metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
464 12 : LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
465 : Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
466 :
467 : /* Initialize contents of meta page */
468 12 : state = GenericXLogStart(index);
469 12 : metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
470 : GENERIC_XLOG_FULL_IMAGE);
471 12 : BloomFillMetapage(index, metaPage);
472 12 : GenericXLogFinish(state);
473 :
474 12 : UnlockReleaseBuffer(metaBuffer);
475 12 : }
476 :
477 : /*
478 : * Parse reloptions for bloom index, producing a BloomOptions struct.
479 : */
480 : bytea *
481 266 : bloptions(Datum reloptions, bool validate)
482 : {
483 : BloomOptions *rdopts;
484 :
485 : /* Parse the user-given reloptions */
486 266 : rdopts = (BloomOptions *) build_reloptions(reloptions, validate,
487 : bl_relopt_kind,
488 : sizeof(BloomOptions),
489 : bl_relopt_tab,
490 : lengthof(bl_relopt_tab));
491 :
492 : /* Convert signature length from # of bits to # to words, rounding up */
493 262 : if (rdopts)
494 262 : rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
495 :
496 262 : return (bytea *) rdopts;
497 : }
|