Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * blutils.c
4 : * Bloom index utilities.
5 : *
6 : * Portions Copyright (c) 2016-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1990-1993, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * contrib/bloom/blutils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/amapi.h"
17 : #include "access/generic_xlog.h"
18 : #include "access/reloptions.h"
19 : #include "bloom.h"
20 : #include "commands/vacuum.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/indexfsm.h"
23 : #include "utils/memutils.h"
24 : #include "varatt.h"
25 :
26 : /* Signature dealing macros - note i is assumed to be of type int */
27 : #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
28 : #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
29 : #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
30 : #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
31 :
32 194 : PG_FUNCTION_INFO_V1(blhandler);
33 :
34 : /* Kind of relation options for bloom index */
35 : static relopt_kind bl_relopt_kind;
36 :
37 : /* parse table for fillRelOptions */
38 : static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
39 :
40 : static int32 myRand(void);
41 : static void mySrand(uint32 seed);
42 :
43 : /*
44 : * Module initialize function: initialize info about Bloom relation options.
45 : *
46 : * Note: keep this in sync with makeDefaultBloomOptions().
47 : */
48 : void
49 190 : _PG_init(void)
50 : {
51 : int i;
52 : char buf[16];
53 :
54 190 : bl_relopt_kind = add_reloption_kind();
55 :
56 : /* Option for length of signature */
57 190 : add_int_reloption(bl_relopt_kind, "length",
58 : "Length of signature in bits",
59 : DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH,
60 : AccessExclusiveLock);
61 190 : bl_relopt_tab[0].optname = "length";
62 190 : bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
63 190 : bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
64 :
65 : /* Number of bits for each possible index column: col1, col2, ... */
66 6270 : for (i = 0; i < INDEX_MAX_KEYS; i++)
67 : {
68 6080 : snprintf(buf, sizeof(buf), "col%d", i + 1);
69 6080 : add_int_reloption(bl_relopt_kind, buf,
70 : "Number of bits generated for each index column",
71 : DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS,
72 : AccessExclusiveLock);
73 6080 : bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
74 : buf);
75 6080 : bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
76 6080 : bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
77 : }
78 190 : }
79 :
80 : /*
81 : * Construct a default set of Bloom options.
82 : */
83 : static BloomOptions *
84 0 : makeDefaultBloomOptions(void)
85 : {
86 : BloomOptions *opts;
87 : int i;
88 :
89 0 : opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
90 : /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
91 0 : opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
92 0 : for (i = 0; i < INDEX_MAX_KEYS; i++)
93 0 : opts->bitSize[i] = DEFAULT_BLOOM_BITS;
94 0 : SET_VARSIZE(opts, sizeof(BloomOptions));
95 0 : return opts;
96 : }
97 :
98 : /*
99 : * Bloom handler function: return IndexAmRoutine with access method parameters
100 : * and callbacks.
101 : */
102 : Datum
103 236 : blhandler(PG_FUNCTION_ARGS)
104 : {
105 236 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
106 :
107 236 : amroutine->amstrategies = BLOOM_NSTRATEGIES;
108 236 : amroutine->amsupport = BLOOM_NPROC;
109 236 : amroutine->amoptsprocnum = BLOOM_OPTIONS_PROC;
110 236 : amroutine->amcanorder = false;
111 236 : amroutine->amcanorderbyop = false;
112 236 : amroutine->amcanbackward = false;
113 236 : amroutine->amcanunique = false;
114 236 : amroutine->amcanmulticol = true;
115 236 : amroutine->amoptionalkey = true;
116 236 : amroutine->amsearcharray = false;
117 236 : amroutine->amsearchnulls = false;
118 236 : amroutine->amstorage = false;
119 236 : amroutine->amclusterable = false;
120 236 : amroutine->ampredlocks = false;
121 236 : amroutine->amcanparallel = false;
122 236 : amroutine->amcanbuildparallel = false;
123 236 : amroutine->amcaninclude = false;
124 236 : amroutine->amusemaintenanceworkmem = false;
125 236 : amroutine->amparallelvacuumoptions =
126 : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP;
127 236 : amroutine->amkeytype = InvalidOid;
128 :
129 236 : amroutine->ambuild = blbuild;
130 236 : amroutine->ambuildempty = blbuildempty;
131 236 : amroutine->aminsert = blinsert;
132 236 : amroutine->aminsertcleanup = NULL;
133 236 : amroutine->ambulkdelete = blbulkdelete;
134 236 : amroutine->amvacuumcleanup = blvacuumcleanup;
135 236 : amroutine->amcanreturn = NULL;
136 236 : amroutine->amcostestimate = blcostestimate;
137 236 : amroutine->amgettreeheight = NULL;
138 236 : amroutine->amoptions = bloptions;
139 236 : amroutine->amproperty = NULL;
140 236 : amroutine->ambuildphasename = NULL;
141 236 : amroutine->amvalidate = blvalidate;
142 236 : amroutine->amadjustmembers = NULL;
143 236 : amroutine->ambeginscan = blbeginscan;
144 236 : amroutine->amrescan = blrescan;
145 236 : amroutine->amgettuple = NULL;
146 236 : amroutine->amgetbitmap = blgetbitmap;
147 236 : amroutine->amendscan = blendscan;
148 236 : amroutine->ammarkpos = NULL;
149 236 : amroutine->amrestrpos = NULL;
150 236 : amroutine->amestimateparallelscan = NULL;
151 236 : amroutine->aminitparallelscan = NULL;
152 236 : amroutine->amparallelrescan = NULL;
153 :
154 236 : PG_RETURN_POINTER(amroutine);
155 : }
156 :
157 : /*
158 : * Fill BloomState structure for particular index.
159 : */
160 : void
161 208806 : initBloomState(BloomState *state, Relation index)
162 : {
163 : int i;
164 :
165 208806 : state->nColumns = index->rd_att->natts;
166 :
167 : /* Initialize hash function for each attribute */
168 626418 : for (i = 0; i < index->rd_att->natts; i++)
169 : {
170 417612 : fmgr_info_copy(&(state->hashFn[i]),
171 417612 : index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
172 : CurrentMemoryContext);
173 417612 : state->collations[i] = index->rd_indcollation[i];
174 : }
175 :
176 : /* Initialize amcache if needed with options from metapage */
177 208806 : if (!index->rd_amcache)
178 : {
179 : Buffer buffer;
180 : Page page;
181 : BloomMetaPageData *meta;
182 : BloomOptions *opts;
183 :
184 182 : opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
185 :
186 182 : buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
187 182 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
188 :
189 182 : page = BufferGetPage(buffer);
190 :
191 182 : if (!BloomPageIsMeta(page))
192 0 : elog(ERROR, "Relation is not a bloom index");
193 182 : meta = BloomPageGetMeta(BufferGetPage(buffer));
194 :
195 182 : if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
196 0 : elog(ERROR, "Relation is not a bloom index");
197 :
198 182 : *opts = meta->opts;
199 :
200 182 : UnlockReleaseBuffer(buffer);
201 :
202 182 : index->rd_amcache = opts;
203 : }
204 :
205 208806 : memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
206 208806 : state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
207 208806 : sizeof(BloomSignatureWord) * state->opts.bloomLength;
208 208806 : }
209 :
210 : /*
211 : * Random generator copied from FreeBSD. Using own random generator here for
212 : * two reasons:
213 : *
214 : * 1) In this case random numbers are used for on-disk storage. Usage of
215 : * PostgreSQL number generator would obstruct it from all possible changes.
216 : * 2) Changing seed of PostgreSQL random generator would be undesirable side
217 : * effect.
218 : */
219 : static int32 next;
220 :
221 : static int32
222 1730866 : myRand(void)
223 : {
224 : /*----------
225 : * Compute x = (7^5 * x) mod (2^31 - 1)
226 : * without overflowing 31 bits:
227 : * (2^31 - 1) = 127773 * (7^5) + 2836
228 : * From "Random number generators: good ones are hard to find",
229 : * Park and Miller, Communications of the ACM, vol. 31, no. 10,
230 : * October 1988, p. 1195.
231 : *----------
232 : */
233 : int32 hi,
234 : lo,
235 : x;
236 :
237 : /* Must be in [1, 0x7ffffffe] range at this point. */
238 1730866 : hi = next / 127773;
239 1730866 : lo = next % 127773;
240 1730866 : x = 16807 * lo - 2836 * hi;
241 1730866 : if (x < 0)
242 350512 : x += 0x7fffffff;
243 1730866 : next = x;
244 : /* Transform to [0, 0x7ffffffd] range. */
245 1730866 : return (x - 1);
246 : }
247 :
248 : static void
249 984064 : mySrand(uint32 seed)
250 : {
251 984064 : next = seed;
252 : /* Transform to [1, 0x7ffffffe] range. */
253 984064 : next = (next % 0x7ffffffe) + 1;
254 984064 : }
255 :
256 : /*
257 : * Add bits of given value to the signature.
258 : */
259 : void
260 492032 : signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
261 : {
262 : uint32 hashVal;
263 : int nBit,
264 : j;
265 :
266 : /*
267 : * init generator with "column's" number to get "hashed" seed for new
268 : * value. We don't want to map the same numbers from different columns
269 : * into the same bits!
270 : */
271 492032 : mySrand(attno);
272 :
273 : /*
274 : * Init hash sequence to map our value into bits. the same values in
275 : * different columns will be mapped into different bits because of step
276 : * above
277 : */
278 492032 : hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
279 492032 : mySrand(hashVal ^ myRand());
280 :
281 1730866 : for (j = 0; j < state->opts.bitSize[attno]; j++)
282 : {
283 : /* prevent multiple evaluation in SETBIT macro */
284 1238834 : nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
285 1238834 : SETBIT(sign, nBit);
286 : }
287 492032 : }
288 :
289 : /*
290 : * Make bloom tuple from values.
291 : */
292 : BloomTuple *
293 245500 : BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
294 : {
295 : int i;
296 245500 : BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
297 :
298 245500 : res->heapPtr = *iptr;
299 :
300 : /* Blooming each column */
301 736500 : for (i = 0; i < state->nColumns; i++)
302 : {
303 : /* skip nulls */
304 491000 : if (isnull[i])
305 0 : continue;
306 :
307 491000 : signValue(state, res->sign, values[i], i);
308 : }
309 :
310 245500 : return res;
311 : }
312 :
313 : /*
314 : * Add new bloom tuple to the page. Returns true if new tuple was successfully
315 : * added to the page. Returns false if it doesn't fit on the page.
316 : */
317 : bool
318 247076 : BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
319 : {
320 : BloomTuple *itup;
321 : BloomPageOpaque opaque;
322 : Pointer ptr;
323 :
324 : /* We shouldn't be pointed to an invalid page */
325 : Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
326 :
327 : /* Does new tuple fit on the page? */
328 247076 : if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
329 1576 : return false;
330 :
331 : /* Copy new tuple to the end of page */
332 245500 : opaque = BloomPageGetOpaque(page);
333 245500 : itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
334 245500 : memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
335 :
336 : /* Adjust maxoff and pd_lower */
337 245500 : opaque->maxoff++;
338 245500 : ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
339 245500 : ((PageHeader) page)->pd_lower = ptr - page;
340 :
341 : /* Assert we didn't overrun available space */
342 : Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
343 :
344 245500 : return true;
345 : }
346 :
347 : /*
348 : * Allocate a new page (either by recycling, or by extending the index file)
349 : * The returned buffer is already pinned and exclusive-locked
350 : * Caller is responsible for initializing the page by calling BloomInitPage
351 : */
352 : Buffer
353 298 : BloomNewBuffer(Relation index)
354 : {
355 : Buffer buffer;
356 :
357 : /* First, try to get a page from FSM */
358 : for (;;)
359 0 : {
360 298 : BlockNumber blkno = GetFreeIndexPage(index);
361 :
362 298 : if (blkno == InvalidBlockNumber)
363 296 : break;
364 :
365 2 : buffer = ReadBuffer(index, blkno);
366 :
367 : /*
368 : * We have to guard against the possibility that someone else already
369 : * recycled this page; the buffer may be locked if so.
370 : */
371 2 : if (ConditionalLockBuffer(buffer))
372 : {
373 2 : Page page = BufferGetPage(buffer);
374 :
375 2 : if (PageIsNew(page))
376 0 : return buffer; /* OK to use, if never initialized */
377 :
378 2 : if (BloomPageIsDeleted(page))
379 2 : return buffer; /* OK to use */
380 :
381 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
382 : }
383 :
384 : /* Can't use it, so release buffer and try again */
385 0 : ReleaseBuffer(buffer);
386 : }
387 :
388 : /* Must extend the file */
389 296 : buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL,
390 : EB_LOCK_FIRST);
391 :
392 296 : return buffer;
393 : }
394 :
395 : /*
396 : * Initialize any page of a bloom index.
397 : */
398 : void
399 310 : BloomInitPage(Page page, uint16 flags)
400 : {
401 : BloomPageOpaque opaque;
402 :
403 310 : PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
404 :
405 310 : opaque = BloomPageGetOpaque(page);
406 310 : opaque->flags = flags;
407 310 : opaque->bloom_page_id = BLOOM_PAGE_ID;
408 310 : }
409 :
410 : /*
411 : * Fill in metapage for bloom index.
412 : */
413 : void
414 12 : BloomFillMetapage(Relation index, Page metaPage)
415 : {
416 : BloomOptions *opts;
417 : BloomMetaPageData *metadata;
418 :
419 : /*
420 : * Choose the index's options. If reloptions have been assigned, use
421 : * those, otherwise create default options.
422 : */
423 12 : opts = (BloomOptions *) index->rd_options;
424 12 : if (!opts)
425 0 : opts = makeDefaultBloomOptions();
426 :
427 : /*
428 : * Initialize contents of meta page, including a copy of the options,
429 : * which are now frozen for the life of the index.
430 : */
431 12 : BloomInitPage(metaPage, BLOOM_META);
432 12 : metadata = BloomPageGetMeta(metaPage);
433 12 : memset(metadata, 0, sizeof(BloomMetaPageData));
434 12 : metadata->magickNumber = BLOOM_MAGICK_NUMBER;
435 12 : metadata->opts = *opts;
436 12 : ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
437 :
438 : /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
439 : Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
440 12 : }
441 :
442 : /*
443 : * Initialize metapage for bloom index.
444 : */
445 : void
446 12 : BloomInitMetapage(Relation index, ForkNumber forknum)
447 : {
448 : Buffer metaBuffer;
449 : Page metaPage;
450 : GenericXLogState *state;
451 :
452 : /*
453 : * Make a new page; since it is first page it should be associated with
454 : * block number 0 (BLOOM_METAPAGE_BLKNO). No need to hold the extension
455 : * lock because there cannot be concurrent inserters yet.
456 : */
457 12 : metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL);
458 12 : LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
459 : Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
460 :
461 : /* Initialize contents of meta page */
462 12 : state = GenericXLogStart(index);
463 12 : metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
464 : GENERIC_XLOG_FULL_IMAGE);
465 12 : BloomFillMetapage(index, metaPage);
466 12 : GenericXLogFinish(state);
467 :
468 12 : UnlockReleaseBuffer(metaBuffer);
469 12 : }
470 :
471 : /*
472 : * Parse reloptions for bloom index, producing a BloomOptions struct.
473 : */
474 : bytea *
475 266 : bloptions(Datum reloptions, bool validate)
476 : {
477 : BloomOptions *rdopts;
478 :
479 : /* Parse the user-given reloptions */
480 266 : rdopts = (BloomOptions *) build_reloptions(reloptions, validate,
481 : bl_relopt_kind,
482 : sizeof(BloomOptions),
483 : bl_relopt_tab,
484 : lengthof(bl_relopt_tab));
485 :
486 : /* Convert signature length from # of bits to # to words, rounding up */
487 262 : if (rdopts)
488 262 : rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
489 :
490 262 : return (bytea *) rdopts;
491 : }
|