Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bitmapset.c
4 : * PostgreSQL generic bitmap set package
5 : *
6 : * A bitmap set can represent any set of nonnegative integers, although
7 : * it is mainly intended for sets where the maximum value is not large,
8 : * say at most a few hundred. By convention, we always represent a set with
9 : * the minimum possible number of words, i.e, there are never any trailing
10 : * zero words. Enforcing this requires that an empty set is represented as
11 : * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
12 : * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
13 : * speed up various loops over the Bitmapset's words array by using "do while"
14 : * loops instead of "for" loops. This means the code does not waste time
15 : * checking the loop condition before the first iteration. For Bitmapsets
16 : * containing only a single word (likely the majority of them) this halves the
17 : * number of loop condition checks.
18 : *
19 : * Callers must ensure that the set returned by functions in this file which
20 : * adjust the members of an existing set is assigned to all pointers pointing
21 : * to that existing set. No guarantees are made that we'll ever modify the
22 : * existing set in-place and return it.
23 : *
24 : * To help find bugs caused by callers failing to record the return value of
25 : * the function which manipulates an existing set, we support building with
26 : * REALLOCATE_BITMAPSETS. This results in the set being reallocated each time
27 : * the set is altered and the existing being pfreed. This is useful as if any
28 : * references still exist to the old set, we're more likely to notice as
29 : * any users of the old set will be accessing pfree'd memory. This option is
30 : * only intended to be used for debugging.
31 : *
32 : * Copyright (c) 2003-2025, PostgreSQL Global Development Group
33 : *
34 : * IDENTIFICATION
35 : * src/backend/nodes/bitmapset.c
36 : *
37 : *-------------------------------------------------------------------------
38 : */
39 : #include "postgres.h"
40 :
41 : #include "common/hashfn.h"
42 : #include "nodes/bitmapset.h"
43 : #include "nodes/pg_list.h"
44 : #include "port/pg_bitutils.h"
45 :
46 :
47 : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
48 : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
49 :
50 : #define BITMAPSET_SIZE(nwords) \
51 : (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
52 :
53 : /*----------
54 : * This is a well-known cute trick for isolating the rightmost one-bit
55 : * in a word. It assumes two's complement arithmetic. Consider any
56 : * nonzero value, and focus attention on the rightmost one. The value is
57 : * then something like
58 : * xxxxxx10000
59 : * where x's are unspecified bits. The two's complement negative is formed
60 : * by inverting all the bits and adding one. Inversion gives
61 : * yyyyyy01111
62 : * where each y is the inverse of the corresponding x. Incrementing gives
63 : * yyyyyy10000
64 : * and then ANDing with the original value gives
65 : * 00000010000
66 : * This works for all cases except original value = zero, where of course
67 : * we get zero.
68 : *----------
69 : */
70 : #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
71 :
72 : #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
73 :
74 : #ifdef USE_ASSERT_CHECKING
75 : /*
76 : * bms_is_valid_set - for cassert builds to check for valid sets
77 : */
78 : static bool
79 : bms_is_valid_set(const Bitmapset *a)
80 : {
81 : /* NULL is the correct representation of an empty set */
82 : if (a == NULL)
83 : return true;
84 :
85 : /* check the node tag is set correctly. pfree'd pointer, maybe? */
86 : if (!IsA(a, Bitmapset))
87 : return false;
88 :
89 : /* trailing zero words are not allowed */
90 : if (a->words[a->nwords - 1] == 0)
91 : return false;
92 :
93 : return true;
94 : }
95 : #endif
96 :
97 : #ifdef REALLOCATE_BITMAPSETS
98 : /*
99 : * bms_copy_and_free
100 : * Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way
101 : * to return a freshly allocated set and pfree the original.
102 : *
103 : * Note: callers which accept multiple sets must be careful when calling this
104 : * function to clone one parameter as other parameters may point to the same
105 : * set. A good option is to call this just before returning the resulting
106 : * set.
107 : */
108 : static Bitmapset *
109 : bms_copy_and_free(Bitmapset *a)
110 : {
111 : Bitmapset *c = bms_copy(a);
112 :
113 : bms_free(a);
114 : return c;
115 : }
116 : #endif
117 :
118 : /*
119 : * bms_copy - make a palloc'd copy of a bitmapset
120 : */
121 : Bitmapset *
122 54912500 : bms_copy(const Bitmapset *a)
123 : {
124 : Bitmapset *result;
125 : size_t size;
126 :
127 : Assert(bms_is_valid_set(a));
128 :
129 54912500 : if (a == NULL)
130 35834440 : return NULL;
131 :
132 19078060 : size = BITMAPSET_SIZE(a->nwords);
133 19078060 : result = (Bitmapset *) palloc(size);
134 19078060 : memcpy(result, a, size);
135 19078060 : return result;
136 : }
137 :
138 : /*
139 : * bms_equal - are two bitmapsets equal? or both NULL?
140 : */
141 : bool
142 18499708 : bms_equal(const Bitmapset *a, const Bitmapset *b)
143 : {
144 : int i;
145 :
146 : Assert(bms_is_valid_set(a));
147 : Assert(bms_is_valid_set(b));
148 :
149 : /* Handle cases where either input is NULL */
150 18499708 : if (a == NULL)
151 : {
152 12587852 : if (b == NULL)
153 12523568 : return true;
154 64284 : return false;
155 : }
156 5911856 : else if (b == NULL)
157 23076 : return false;
158 :
159 : /* can't be equal if the word counts don't match */
160 5888780 : if (a->nwords != b->nwords)
161 4 : return false;
162 :
163 : /* check each word matches */
164 5888776 : i = 0;
165 : do
166 : {
167 5888980 : if (a->words[i] != b->words[i])
168 2945352 : return false;
169 2943628 : } while (++i < a->nwords);
170 :
171 2943424 : return true;
172 : }
173 :
174 : /*
175 : * bms_compare - qsort-style comparator for bitmapsets
176 : *
177 : * This guarantees to report values as equal iff bms_equal would say they are
178 : * equal. Otherwise, the highest-numbered bit that is set in one value but
179 : * not the other determines the result. (This rule means that, for example,
180 : * {6} is greater than {5}, which seems plausible.)
181 : */
182 : int
183 25098 : bms_compare(const Bitmapset *a, const Bitmapset *b)
184 : {
185 : int i;
186 :
187 : Assert(bms_is_valid_set(a));
188 : Assert(bms_is_valid_set(b));
189 :
190 : /* Handle cases where either input is NULL */
191 25098 : if (a == NULL)
192 8 : return (b == NULL) ? 0 : -1;
193 25090 : else if (b == NULL)
194 4 : return +1;
195 :
196 : /* the set with the most words must be greater */
197 25086 : if (a->nwords != b->nwords)
198 2 : return (a->nwords > b->nwords) ? +1 : -1;
199 :
200 25084 : i = a->nwords - 1;
201 : do
202 : {
203 25084 : bitmapword aw = a->words[i];
204 25084 : bitmapword bw = b->words[i];
205 :
206 25084 : if (aw != bw)
207 25082 : return (aw > bw) ? +1 : -1;
208 2 : } while (--i >= 0);
209 2 : return 0;
210 : }
211 :
212 : /*
213 : * bms_make_singleton - build a bitmapset containing a single member
214 : */
215 : Bitmapset *
216 15739648 : bms_make_singleton(int x)
217 : {
218 : Bitmapset *result;
219 : int wordnum,
220 : bitnum;
221 :
222 15739648 : if (x < 0)
223 2 : elog(ERROR, "negative bitmapset member not allowed");
224 15739646 : wordnum = WORDNUM(x);
225 15739646 : bitnum = BITNUM(x);
226 15739646 : result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
227 15739646 : result->type = T_Bitmapset;
228 15739646 : result->nwords = wordnum + 1;
229 15739646 : result->words[wordnum] = ((bitmapword) 1 << bitnum);
230 15739646 : return result;
231 : }
232 :
233 : /*
234 : * bms_free - free a bitmapset
235 : *
236 : * Same as pfree except for allowing NULL input
237 : */
238 : void
239 21452482 : bms_free(Bitmapset *a)
240 : {
241 21452482 : if (a)
242 9488358 : pfree(a);
243 21452482 : }
244 :
245 :
246 : /*
247 : * bms_union - create and return a new set containing all members from both
248 : * input sets. Both inputs are left unmodified.
249 : */
250 : Bitmapset *
251 8551930 : bms_union(const Bitmapset *a, const Bitmapset *b)
252 : {
253 : Bitmapset *result;
254 : const Bitmapset *other;
255 : int otherlen;
256 : int i;
257 :
258 : Assert(bms_is_valid_set(a));
259 : Assert(bms_is_valid_set(b));
260 :
261 : /* Handle cases where either input is NULL */
262 8551930 : if (a == NULL)
263 5466092 : return bms_copy(b);
264 3085838 : if (b == NULL)
265 1316832 : return bms_copy(a);
266 : /* Identify shorter and longer input; copy the longer one */
267 1769006 : if (a->nwords <= b->nwords)
268 : {
269 1769004 : result = bms_copy(b);
270 1769004 : other = a;
271 : }
272 : else
273 : {
274 2 : result = bms_copy(a);
275 2 : other = b;
276 : }
277 : /* And union the shorter input into the result */
278 1769006 : otherlen = other->nwords;
279 1769006 : i = 0;
280 : do
281 : {
282 1771564 : result->words[i] |= other->words[i];
283 1771564 : } while (++i < otherlen);
284 1769006 : return result;
285 : }
286 :
287 : /*
288 : * bms_intersect - create and return a new set containing members which both
289 : * input sets have in common. Both inputs are left unmodified.
290 : */
291 : Bitmapset *
292 1315872 : bms_intersect(const Bitmapset *a, const Bitmapset *b)
293 : {
294 : Bitmapset *result;
295 : const Bitmapset *other;
296 : int lastnonzero;
297 : int resultlen;
298 : int i;
299 :
300 : Assert(bms_is_valid_set(a));
301 : Assert(bms_is_valid_set(b));
302 :
303 : /* Handle cases where either input is NULL */
304 1315872 : if (a == NULL || b == NULL)
305 220700 : return NULL;
306 :
307 : /* Identify shorter and longer input; copy the shorter one */
308 1095172 : if (a->nwords <= b->nwords)
309 : {
310 1095170 : result = bms_copy(a);
311 1095170 : other = b;
312 : }
313 : else
314 : {
315 2 : result = bms_copy(b);
316 2 : other = a;
317 : }
318 : /* And intersect the longer input with the result */
319 1095172 : resultlen = result->nwords;
320 1095172 : lastnonzero = -1;
321 1095172 : i = 0;
322 : do
323 : {
324 1097730 : result->words[i] &= other->words[i];
325 :
326 1097730 : if (result->words[i] != 0)
327 1086886 : lastnonzero = i;
328 1097730 : } while (++i < resultlen);
329 : /* If we computed an empty result, we must return NULL */
330 1095172 : if (lastnonzero == -1)
331 : {
332 8560 : pfree(result);
333 8560 : return NULL;
334 : }
335 :
336 : /* get rid of trailing zero words */
337 1086612 : result->nwords = lastnonzero + 1;
338 1086612 : return result;
339 : }
340 :
341 : /*
342 : * bms_difference - create and return a new set containing all the members of
343 : * 'a' without the members of 'b'.
344 : */
345 : Bitmapset *
346 2261316 : bms_difference(const Bitmapset *a, const Bitmapset *b)
347 : {
348 : Bitmapset *result;
349 : int i;
350 :
351 : Assert(bms_is_valid_set(a));
352 : Assert(bms_is_valid_set(b));
353 :
354 : /* Handle cases where either input is NULL */
355 2261316 : if (a == NULL)
356 609550 : return NULL;
357 1651766 : if (b == NULL)
358 1195156 : return bms_copy(a);
359 :
360 : /*
361 : * In Postgres' usage, an empty result is a very common case, so it's
362 : * worth optimizing for that by testing bms_nonempty_difference(). This
363 : * saves us a palloc/pfree cycle compared to checking after-the-fact.
364 : */
365 456610 : if (!bms_nonempty_difference(a, b))
366 136896 : return NULL;
367 :
368 : /* Copy the left input */
369 319714 : result = bms_copy(a);
370 :
371 : /* And remove b's bits from result */
372 319714 : if (result->nwords > b->nwords)
373 : {
374 : /*
375 : * We'll never need to remove trailing zero words when 'a' has more
376 : * words than 'b' as the additional words must be non-zero.
377 : */
378 4 : i = 0;
379 : do
380 : {
381 4 : result->words[i] &= ~b->words[i];
382 4 : } while (++i < b->nwords);
383 : }
384 : else
385 : {
386 319710 : int lastnonzero = -1;
387 :
388 : /* we may need to remove trailing zero words from the result. */
389 319710 : i = 0;
390 : do
391 : {
392 319712 : result->words[i] &= ~b->words[i];
393 :
394 : /* remember the last non-zero word */
395 319712 : if (result->words[i] != 0)
396 319710 : lastnonzero = i;
397 319712 : } while (++i < result->nwords);
398 :
399 : /* trim off trailing zero words */
400 319710 : result->nwords = lastnonzero + 1;
401 : }
402 : Assert(result->nwords != 0);
403 :
404 : /* Need not check for empty result, since we handled that case above */
405 319714 : return result;
406 : }
407 :
408 : /*
409 : * bms_is_subset - is A a subset of B?
410 : */
411 : bool
412 21207636 : bms_is_subset(const Bitmapset *a, const Bitmapset *b)
413 : {
414 : int i;
415 :
416 : Assert(bms_is_valid_set(a));
417 : Assert(bms_is_valid_set(b));
418 :
419 : /* Handle cases where either input is NULL */
420 21207636 : if (a == NULL)
421 1950572 : return true; /* empty set is a subset of anything */
422 19257064 : if (b == NULL)
423 418700 : return false;
424 :
425 : /* 'a' can't be a subset of 'b' if it contains more words */
426 18838364 : if (a->nwords > b->nwords)
427 4 : return false;
428 :
429 : /* Check all 'a' members are set in 'b' */
430 18838360 : i = 0;
431 : do
432 : {
433 18838360 : if ((a->words[i] & ~b->words[i]) != 0)
434 7269304 : return false;
435 11569056 : } while (++i < a->nwords);
436 11569056 : return true;
437 : }
438 :
439 : /*
440 : * bms_subset_compare - compare A and B for equality/subset relationships
441 : *
442 : * This is more efficient than testing bms_is_subset in both directions.
443 : */
444 : BMS_Comparison
445 2707544 : bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
446 : {
447 : BMS_Comparison result;
448 : int shortlen;
449 : int i;
450 :
451 : Assert(bms_is_valid_set(a));
452 : Assert(bms_is_valid_set(b));
453 :
454 : /* Handle cases where either input is NULL */
455 2707544 : if (a == NULL)
456 : {
457 2291754 : if (b == NULL)
458 2221246 : return BMS_EQUAL;
459 70508 : return BMS_SUBSET1;
460 : }
461 415790 : if (b == NULL)
462 198352 : return BMS_SUBSET2;
463 :
464 : /* Check common words */
465 217438 : result = BMS_EQUAL; /* status so far */
466 217438 : shortlen = Min(a->nwords, b->nwords);
467 217438 : i = 0;
468 : do
469 : {
470 217444 : bitmapword aword = a->words[i];
471 217444 : bitmapword bword = b->words[i];
472 :
473 217444 : if ((aword & ~bword) != 0)
474 : {
475 : /* a is not a subset of b */
476 58384 : if (result == BMS_SUBSET1)
477 4 : return BMS_DIFFERENT;
478 58380 : result = BMS_SUBSET2;
479 : }
480 217440 : if ((bword & ~aword) != 0)
481 : {
482 : /* b is not a subset of a */
483 58274 : if (result == BMS_SUBSET2)
484 54502 : return BMS_DIFFERENT;
485 3772 : result = BMS_SUBSET1;
486 : }
487 162938 : } while (++i < shortlen);
488 : /* Check extra words */
489 162932 : if (a->nwords > b->nwords)
490 : {
491 : /* if a has more words then a is not a subset of b */
492 4 : if (result == BMS_SUBSET1)
493 2 : return BMS_DIFFERENT;
494 2 : return BMS_SUBSET2;
495 : }
496 162928 : else if (a->nwords < b->nwords)
497 : {
498 : /* if b has more words then b is not a subset of a */
499 8 : if (result == BMS_SUBSET2)
500 4 : return BMS_DIFFERENT;
501 4 : return BMS_SUBSET1;
502 : }
503 162920 : return result;
504 : }
505 :
506 : /*
507 : * bms_is_member - is X a member of A?
508 : */
509 : bool
510 16628294 : bms_is_member(int x, const Bitmapset *a)
511 : {
512 : int wordnum,
513 : bitnum;
514 :
515 : Assert(bms_is_valid_set(a));
516 :
517 : /* XXX better to just return false for x<0 ? */
518 16628294 : if (x < 0)
519 2 : elog(ERROR, "negative bitmapset member not allowed");
520 16628292 : if (a == NULL)
521 9210104 : return false;
522 :
523 7418188 : wordnum = WORDNUM(x);
524 7418188 : bitnum = BITNUM(x);
525 7418188 : if (wordnum >= a->nwords)
526 394 : return false;
527 7417794 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
528 5408632 : return true;
529 2009162 : return false;
530 : }
531 :
532 : /*
533 : * bms_member_index
534 : * determine 0-based index of member x in the bitmap
535 : *
536 : * Returns (-1) when x is not a member.
537 : */
538 : int
539 5318 : bms_member_index(Bitmapset *a, int x)
540 : {
541 : int i;
542 : int bitnum;
543 : int wordnum;
544 5318 : int result = 0;
545 : bitmapword mask;
546 :
547 : Assert(bms_is_valid_set(a));
548 :
549 : /* return -1 if not a member of the bitmap */
550 5318 : if (!bms_is_member(x, a))
551 4 : return -1;
552 :
553 5314 : wordnum = WORDNUM(x);
554 5314 : bitnum = BITNUM(x);
555 :
556 : /* count bits in preceding words */
557 5328 : for (i = 0; i < wordnum; i++)
558 : {
559 14 : bitmapword w = a->words[i];
560 :
561 : /* No need to count the bits in a zero word */
562 14 : if (w != 0)
563 6 : result += bmw_popcount(w);
564 : }
565 :
566 : /*
567 : * Now add bits of the last word, but only those before the item. We can
568 : * do that by applying a mask and then using popcount again. To get
569 : * 0-based index, we want to count only preceding bits, not the item
570 : * itself, so we subtract 1.
571 : */
572 5314 : mask = ((bitmapword) 1 << bitnum) - 1;
573 5314 : result += bmw_popcount(a->words[wordnum] & mask);
574 :
575 5314 : return result;
576 : }
577 :
578 : /*
579 : * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
580 : */
581 : bool
582 14487178 : bms_overlap(const Bitmapset *a, const Bitmapset *b)
583 : {
584 : int shortlen;
585 : int i;
586 :
587 : Assert(bms_is_valid_set(a));
588 : Assert(bms_is_valid_set(b));
589 :
590 : /* Handle cases where either input is NULL */
591 14487178 : if (a == NULL || b == NULL)
592 6150646 : return false;
593 : /* Check words in common */
594 8336532 : shortlen = Min(a->nwords, b->nwords);
595 8336532 : i = 0;
596 : do
597 : {
598 8336532 : if ((a->words[i] & b->words[i]) != 0)
599 5357512 : return true;
600 2979020 : } while (++i < shortlen);
601 2979020 : return false;
602 : }
603 :
604 : /*
605 : * bms_overlap_list - does a set overlap an integer list?
606 : */
607 : bool
608 1604 : bms_overlap_list(const Bitmapset *a, const List *b)
609 : {
610 : ListCell *lc;
611 : int wordnum,
612 : bitnum;
613 :
614 : Assert(bms_is_valid_set(a));
615 :
616 1604 : if (a == NULL || b == NIL)
617 1452 : return false;
618 :
619 284 : foreach(lc, b)
620 : {
621 222 : int x = lfirst_int(lc);
622 :
623 222 : if (x < 0)
624 4 : elog(ERROR, "negative bitmapset member not allowed");
625 218 : wordnum = WORDNUM(x);
626 218 : bitnum = BITNUM(x);
627 218 : if (wordnum < a->nwords)
628 218 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
629 86 : return true;
630 : }
631 :
632 62 : return false;
633 : }
634 :
635 : /*
636 : * bms_nonempty_difference - do sets have a nonempty difference?
637 : *
638 : * i.e., are any members set in 'a' that are not also set in 'b'.
639 : */
640 : bool
641 3027056 : bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
642 : {
643 : int i;
644 :
645 : Assert(bms_is_valid_set(a));
646 : Assert(bms_is_valid_set(b));
647 :
648 : /* Handle cases where either input is NULL */
649 3027056 : if (a == NULL)
650 5350 : return false;
651 3021706 : if (b == NULL)
652 2 : return true;
653 : /* if 'a' has more words then it must contain additional members */
654 3021704 : if (a->nwords > b->nwords)
655 6 : return true;
656 : /* Check all 'a' members are set in 'b' */
657 3021698 : i = 0;
658 : do
659 : {
660 3021698 : if ((a->words[i] & ~b->words[i]) != 0)
661 1740164 : return true;
662 1281534 : } while (++i < a->nwords);
663 1281534 : return false;
664 : }
665 :
666 : /*
667 : * bms_singleton_member - return the sole integer member of set
668 : *
669 : * Raises error if |a| is not 1.
670 : */
671 : int
672 21814 : bms_singleton_member(const Bitmapset *a)
673 : {
674 21814 : int result = -1;
675 : int nwords;
676 : int wordnum;
677 :
678 : Assert(bms_is_valid_set(a));
679 :
680 21814 : if (a == NULL)
681 2 : elog(ERROR, "bitmapset is empty");
682 :
683 21812 : nwords = a->nwords;
684 21812 : wordnum = 0;
685 : do
686 : {
687 21812 : bitmapword w = a->words[wordnum];
688 :
689 21812 : if (w != 0)
690 : {
691 21812 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
692 2 : elog(ERROR, "bitmapset has multiple members");
693 21810 : result = wordnum * BITS_PER_BITMAPWORD;
694 21810 : result += bmw_rightmost_one_pos(w);
695 : }
696 21810 : } while (++wordnum < nwords);
697 :
698 : /* we don't expect non-NULL sets to be empty */
699 : Assert(result >= 0);
700 21810 : return result;
701 : }
702 :
703 : /*
704 : * bms_get_singleton_member
705 : *
706 : * Test whether the given set is a singleton.
707 : * If so, set *member to the value of its sole member, and return true.
708 : * If not, return false, without changing *member.
709 : *
710 : * This is more convenient and faster than calling bms_membership() and then
711 : * bms_singleton_member(), if we don't care about distinguishing empty sets
712 : * from multiple-member sets.
713 : */
714 : bool
715 2580022 : bms_get_singleton_member(const Bitmapset *a, int *member)
716 : {
717 2580022 : int result = -1;
718 : int nwords;
719 : int wordnum;
720 :
721 : Assert(bms_is_valid_set(a));
722 :
723 2580022 : if (a == NULL)
724 4 : return false;
725 :
726 2580018 : nwords = a->nwords;
727 2580018 : wordnum = 0;
728 : do
729 : {
730 2580030 : bitmapword w = a->words[wordnum];
731 :
732 2580030 : if (w != 0)
733 : {
734 2580018 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
735 462450 : return false;
736 2117568 : result = wordnum * BITS_PER_BITMAPWORD;
737 2117568 : result += bmw_rightmost_one_pos(w);
738 : }
739 2117580 : } while (++wordnum < nwords);
740 :
741 : /* we don't expect non-NULL sets to be empty */
742 : Assert(result >= 0);
743 2117568 : *member = result;
744 2117568 : return true;
745 : }
746 :
747 : /*
748 : * bms_num_members - count members of set
749 : */
750 : int
751 1950154 : bms_num_members(const Bitmapset *a)
752 : {
753 1950154 : int result = 0;
754 : int nwords;
755 : int wordnum;
756 :
757 : Assert(bms_is_valid_set(a));
758 :
759 1950154 : if (a == NULL)
760 210724 : return 0;
761 :
762 1739430 : nwords = a->nwords;
763 1739430 : wordnum = 0;
764 : do
765 : {
766 1739432 : bitmapword w = a->words[wordnum];
767 :
768 : /* No need to count the bits in a zero word */
769 1739432 : if (w != 0)
770 1739432 : result += bmw_popcount(w);
771 1739432 : } while (++wordnum < nwords);
772 1739430 : return result;
773 : }
774 :
775 : /*
776 : * bms_membership - does a set have zero, one, or multiple members?
777 : *
778 : * This is faster than making an exact count with bms_num_members().
779 : */
780 : BMS_Membership
781 1336800 : bms_membership(const Bitmapset *a)
782 : {
783 1336800 : BMS_Membership result = BMS_EMPTY_SET;
784 : int nwords;
785 : int wordnum;
786 :
787 : Assert(bms_is_valid_set(a));
788 :
789 1336800 : if (a == NULL)
790 384 : return BMS_EMPTY_SET;
791 :
792 1336416 : nwords = a->nwords;
793 1336416 : wordnum = 0;
794 : do
795 : {
796 1336416 : bitmapword w = a->words[wordnum];
797 :
798 1336416 : if (w != 0)
799 : {
800 1336416 : if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
801 474458 : return BMS_MULTIPLE;
802 861958 : result = BMS_SINGLETON;
803 : }
804 861958 : } while (++wordnum < nwords);
805 861958 : return result;
806 : }
807 :
808 :
809 : /*
810 : * bms_add_member - add a specified member to set
811 : *
812 : * 'a' is recycled when possible.
813 : */
814 : Bitmapset *
815 22174786 : bms_add_member(Bitmapset *a, int x)
816 : {
817 : int wordnum,
818 : bitnum;
819 :
820 : Assert(bms_is_valid_set(a));
821 :
822 22174786 : if (x < 0)
823 4 : elog(ERROR, "negative bitmapset member not allowed");
824 22174782 : if (a == NULL)
825 11536058 : return bms_make_singleton(x);
826 :
827 10638724 : wordnum = WORDNUM(x);
828 10638724 : bitnum = BITNUM(x);
829 :
830 : /* enlarge the set if necessary */
831 10638724 : if (wordnum >= a->nwords)
832 : {
833 486 : int oldnwords = a->nwords;
834 : int i;
835 :
836 486 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
837 486 : a->nwords = wordnum + 1;
838 : /* zero out the enlarged portion */
839 486 : i = oldnwords;
840 : do
841 : {
842 30044 : a->words[i] = 0;
843 30044 : } while (++i < a->nwords);
844 : }
845 :
846 10638724 : a->words[wordnum] |= ((bitmapword) 1 << bitnum);
847 :
848 : #ifdef REALLOCATE_BITMAPSETS
849 :
850 : /*
851 : * There's no guarantee that the repalloc returned a new pointer, so copy
852 : * and free unconditionally here.
853 : */
854 : a = bms_copy_and_free(a);
855 : #endif
856 :
857 10638724 : return a;
858 : }
859 :
860 : /*
861 : * bms_del_member - remove a specified member from set
862 : *
863 : * No error if x is not currently a member of set
864 : *
865 : * 'a' is recycled when possible.
866 : */
867 : Bitmapset *
868 1757884 : bms_del_member(Bitmapset *a, int x)
869 : {
870 : int wordnum,
871 : bitnum;
872 :
873 : Assert(bms_is_valid_set(a));
874 :
875 1757884 : if (x < 0)
876 2 : elog(ERROR, "negative bitmapset member not allowed");
877 1757882 : if (a == NULL)
878 640248 : return NULL;
879 :
880 1117634 : wordnum = WORDNUM(x);
881 1117634 : bitnum = BITNUM(x);
882 :
883 : #ifdef REALLOCATE_BITMAPSETS
884 : a = bms_copy_and_free(a);
885 : #endif
886 :
887 : /* member can't exist. Return 'a' unmodified */
888 1117634 : if (unlikely(wordnum >= a->nwords))
889 0 : return a;
890 :
891 1117634 : a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
892 :
893 : /* when last word becomes empty, trim off all trailing empty words */
894 1117634 : if (a->words[wordnum] == 0 && wordnum == a->nwords - 1)
895 : {
896 : /* find the last non-empty word and make that the new final word */
897 610474 : for (int i = wordnum - 1; i >= 0; i--)
898 : {
899 56186 : if (a->words[i] != 0)
900 : {
901 152 : a->nwords = i + 1;
902 152 : return a;
903 : }
904 : }
905 :
906 : /* the set is now empty */
907 554288 : pfree(a);
908 554288 : return NULL;
909 : }
910 563194 : return a;
911 : }
912 :
913 : /*
914 : * bms_add_members - like bms_union, but left input is recycled when possible
915 : */
916 : Bitmapset *
917 15047576 : bms_add_members(Bitmapset *a, const Bitmapset *b)
918 : {
919 : Bitmapset *result;
920 : const Bitmapset *other;
921 : int otherlen;
922 : int i;
923 :
924 : Assert(bms_is_valid_set(a));
925 : Assert(bms_is_valid_set(b));
926 :
927 : /* Handle cases where either input is NULL */
928 15047576 : if (a == NULL)
929 7857742 : return bms_copy(b);
930 7189834 : if (b == NULL)
931 : {
932 : #ifdef REALLOCATE_BITMAPSETS
933 : a = bms_copy_and_free(a);
934 : #endif
935 :
936 4629228 : return a;
937 : }
938 : /* Identify shorter and longer input; copy the longer one if needed */
939 2560606 : if (a->nwords < b->nwords)
940 : {
941 2 : result = bms_copy(b);
942 2 : other = a;
943 : }
944 : else
945 : {
946 2560604 : result = a;
947 2560604 : other = b;
948 : }
949 : /* And union the shorter input into the result */
950 2560606 : otherlen = other->nwords;
951 2560606 : i = 0;
952 : do
953 : {
954 2560606 : result->words[i] |= other->words[i];
955 2560606 : } while (++i < otherlen);
956 2560606 : if (result != a)
957 2 : pfree(a);
958 : #ifdef REALLOCATE_BITMAPSETS
959 : else
960 : result = bms_copy_and_free(result);
961 : #endif
962 :
963 2560606 : return result;
964 : }
965 :
966 : /*
967 : * bms_replace_members
968 : * Remove all existing members from 'a' and repopulate the set with members
969 : * from 'b', recycling 'a', when possible.
970 : */
971 : Bitmapset *
972 13494 : bms_replace_members(Bitmapset *a, const Bitmapset *b)
973 : {
974 : int i;
975 :
976 : Assert(bms_is_valid_set(a));
977 : Assert(bms_is_valid_set(b));
978 :
979 13494 : if (a == NULL)
980 2 : return bms_copy(b);
981 13492 : if (b == NULL)
982 : {
983 2 : pfree(a);
984 2 : return NULL;
985 : }
986 :
987 13490 : if (a->nwords < b->nwords)
988 2 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(b->nwords));
989 :
990 13490 : i = 0;
991 : do
992 : {
993 13508 : a->words[i] = b->words[i];
994 13508 : } while (++i < b->nwords);
995 :
996 13490 : a->nwords = b->nwords;
997 :
998 : #ifdef REALLOCATE_BITMAPSETS
999 :
1000 : /*
1001 : * There's no guarantee that the repalloc returned a new pointer, so copy
1002 : * and free unconditionally here.
1003 : */
1004 : a = bms_copy_and_free(a);
1005 : #endif
1006 :
1007 13490 : return a;
1008 : }
1009 :
1010 : /*
1011 : * bms_add_range
1012 : * Add members in the range of 'lower' to 'upper' to the set.
1013 : *
1014 : * Note this could also be done by calling bms_add_member in a loop, however,
1015 : * using this function will be faster when the range is large as we work at
1016 : * the bitmapword level rather than at bit level.
1017 : */
1018 : Bitmapset *
1019 65154 : bms_add_range(Bitmapset *a, int lower, int upper)
1020 : {
1021 : int lwordnum,
1022 : lbitnum,
1023 : uwordnum,
1024 : ushiftbits,
1025 : wordnum;
1026 :
1027 : Assert(bms_is_valid_set(a));
1028 :
1029 : /* do nothing if nothing is called for, without further checking */
1030 65154 : if (upper < lower)
1031 : {
1032 : #ifdef REALLOCATE_BITMAPSETS
1033 : a = bms_copy_and_free(a);
1034 : #endif
1035 :
1036 30 : return a;
1037 : }
1038 :
1039 65124 : if (lower < 0)
1040 2 : elog(ERROR, "negative bitmapset member not allowed");
1041 65122 : uwordnum = WORDNUM(upper);
1042 :
1043 65122 : if (a == NULL)
1044 : {
1045 45118 : a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
1046 45118 : a->type = T_Bitmapset;
1047 45118 : a->nwords = uwordnum + 1;
1048 : }
1049 20004 : else if (uwordnum >= a->nwords)
1050 : {
1051 6 : int oldnwords = a->nwords;
1052 : int i;
1053 :
1054 : /* ensure we have enough words to store the upper bit */
1055 6 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
1056 6 : a->nwords = uwordnum + 1;
1057 : /* zero out the enlarged portion */
1058 6 : i = oldnwords;
1059 : do
1060 : {
1061 10 : a->words[i] = 0;
1062 10 : } while (++i < a->nwords);
1063 : }
1064 :
1065 65122 : wordnum = lwordnum = WORDNUM(lower);
1066 :
1067 65122 : lbitnum = BITNUM(lower);
1068 65122 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
1069 :
1070 : /*
1071 : * Special case when lwordnum is the same as uwordnum we must perform the
1072 : * upper and lower masking on the word.
1073 : */
1074 65122 : if (lwordnum == uwordnum)
1075 : {
1076 63290 : a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
1077 63290 : & (~(bitmapword) 0) >> ushiftbits;
1078 : }
1079 : else
1080 : {
1081 : /* turn on lbitnum and all bits left of it */
1082 1832 : a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
1083 :
1084 : /* turn on all bits for any intermediate words */
1085 1868 : while (wordnum < uwordnum)
1086 36 : a->words[wordnum++] = ~(bitmapword) 0;
1087 :
1088 : /* turn on upper's bit and all bits right of it. */
1089 1832 : a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
1090 : }
1091 :
1092 : #ifdef REALLOCATE_BITMAPSETS
1093 :
1094 : /*
1095 : * There's no guarantee that the repalloc returned a new pointer, so copy
1096 : * and free unconditionally here.
1097 : */
1098 : a = bms_copy_and_free(a);
1099 : #endif
1100 :
1101 65122 : return a;
1102 : }
1103 :
1104 : /*
1105 : * bms_int_members - like bms_intersect, but left input is recycled when
1106 : * possible
1107 : */
1108 : Bitmapset *
1109 711848 : bms_int_members(Bitmapset *a, const Bitmapset *b)
1110 : {
1111 : int lastnonzero;
1112 : int shortlen;
1113 : int i;
1114 :
1115 : Assert(bms_is_valid_set(a));
1116 : Assert(bms_is_valid_set(b));
1117 :
1118 : /* Handle cases where either input is NULL */
1119 711848 : if (a == NULL)
1120 28096 : return NULL;
1121 683752 : if (b == NULL)
1122 : {
1123 5096 : pfree(a);
1124 5096 : return NULL;
1125 : }
1126 :
1127 : /* Intersect b into a; we need never copy */
1128 678656 : shortlen = Min(a->nwords, b->nwords);
1129 678656 : lastnonzero = -1;
1130 678656 : i = 0;
1131 : do
1132 : {
1133 678658 : a->words[i] &= b->words[i];
1134 :
1135 678658 : if (a->words[i] != 0)
1136 561268 : lastnonzero = i;
1137 678658 : } while (++i < shortlen);
1138 :
1139 : /* If we computed an empty result, we must return NULL */
1140 678656 : if (lastnonzero == -1)
1141 : {
1142 117390 : pfree(a);
1143 117390 : return NULL;
1144 : }
1145 :
1146 : /* get rid of trailing zero words */
1147 561266 : a->nwords = lastnonzero + 1;
1148 :
1149 : #ifdef REALLOCATE_BITMAPSETS
1150 : a = bms_copy_and_free(a);
1151 : #endif
1152 :
1153 561266 : return a;
1154 : }
1155 :
1156 : /*
1157 : * bms_del_members - delete members in 'a' that are set in 'b'. 'a' is
1158 : * recycled when possible.
1159 : */
1160 : Bitmapset *
1161 2271384 : bms_del_members(Bitmapset *a, const Bitmapset *b)
1162 : {
1163 : int i;
1164 :
1165 : Assert(bms_is_valid_set(a));
1166 : Assert(bms_is_valid_set(b));
1167 :
1168 : /* Handle cases where either input is NULL */
1169 2271384 : if (a == NULL)
1170 969564 : return NULL;
1171 1301820 : if (b == NULL)
1172 : {
1173 : #ifdef REALLOCATE_BITMAPSETS
1174 : a = bms_copy_and_free(a);
1175 : #endif
1176 :
1177 202828 : return a;
1178 : }
1179 :
1180 : /* Remove b's bits from a; we need never copy */
1181 1098992 : if (a->nwords > b->nwords)
1182 : {
1183 : /*
1184 : * We'll never need to remove trailing zero words when 'a' has more
1185 : * words than 'b'.
1186 : */
1187 2 : i = 0;
1188 : do
1189 : {
1190 2 : a->words[i] &= ~b->words[i];
1191 2 : } while (++i < b->nwords);
1192 : }
1193 : else
1194 : {
1195 1098990 : int lastnonzero = -1;
1196 :
1197 : /* we may need to remove trailing zero words from the result. */
1198 1098990 : i = 0;
1199 : do
1200 : {
1201 1099006 : a->words[i] &= ~b->words[i];
1202 :
1203 : /* remember the last non-zero word */
1204 1099006 : if (a->words[i] != 0)
1205 236878 : lastnonzero = i;
1206 1099006 : } while (++i < a->nwords);
1207 :
1208 : /* check if 'a' has become empty */
1209 1098990 : if (lastnonzero == -1)
1210 : {
1211 862116 : pfree(a);
1212 862116 : return NULL;
1213 : }
1214 :
1215 : /* trim off any trailing zero words */
1216 236874 : a->nwords = lastnonzero + 1;
1217 : }
1218 :
1219 : #ifdef REALLOCATE_BITMAPSETS
1220 : a = bms_copy_and_free(a);
1221 : #endif
1222 :
1223 236876 : return a;
1224 : }
1225 :
1226 : /*
1227 : * bms_join - like bms_union, but *either* input *may* be recycled
1228 : */
1229 : Bitmapset *
1230 1779732 : bms_join(Bitmapset *a, Bitmapset *b)
1231 : {
1232 : Bitmapset *result;
1233 : Bitmapset *other;
1234 : int otherlen;
1235 : int i;
1236 :
1237 : Assert(bms_is_valid_set(a));
1238 : Assert(bms_is_valid_set(b));
1239 :
1240 : /* Handle cases where either input is NULL */
1241 1779732 : if (a == NULL)
1242 : {
1243 : #ifdef REALLOCATE_BITMAPSETS
1244 : b = bms_copy_and_free(b);
1245 : #endif
1246 :
1247 734420 : return b;
1248 : }
1249 1045312 : if (b == NULL)
1250 : {
1251 : #ifdef REALLOCATE_BITMAPSETS
1252 : a = bms_copy_and_free(a);
1253 : #endif
1254 :
1255 190702 : return a;
1256 : }
1257 :
1258 : /* Identify shorter and longer input; use longer one as result */
1259 854610 : if (a->nwords < b->nwords)
1260 : {
1261 4 : result = b;
1262 4 : other = a;
1263 : }
1264 : else
1265 : {
1266 854606 : result = a;
1267 854606 : other = b;
1268 : }
1269 : /* And union the shorter input into the result */
1270 854610 : otherlen = other->nwords;
1271 854610 : i = 0;
1272 : do
1273 : {
1274 854610 : result->words[i] |= other->words[i];
1275 854610 : } while (++i < otherlen);
1276 854610 : if (other != result) /* pure paranoia */
1277 854610 : pfree(other);
1278 :
1279 : #ifdef REALLOCATE_BITMAPSETS
1280 : result = bms_copy_and_free(result);
1281 : #endif
1282 :
1283 854610 : return result;
1284 : }
1285 :
1286 : /*
1287 : * bms_next_member - find next member of a set
1288 : *
1289 : * Returns smallest member greater than "prevbit", or -2 if there is none.
1290 : * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1291 : *
1292 : * This is intended as support for iterating through the members of a set.
1293 : * The typical pattern is
1294 : *
1295 : * x = -1;
1296 : * while ((x = bms_next_member(inputset, x)) >= 0)
1297 : * process member x;
1298 : *
1299 : * Notice that when there are no more members, we return -2, not -1 as you
1300 : * might expect. The rationale for that is to allow distinguishing the
1301 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1302 : * It makes no difference in simple loop usage, but complex iteration logic
1303 : * might need such an ability.
1304 : */
1305 : int
1306 23356848 : bms_next_member(const Bitmapset *a, int prevbit)
1307 : {
1308 : int nwords;
1309 : int wordnum;
1310 : bitmapword mask;
1311 :
1312 : Assert(bms_is_valid_set(a));
1313 :
1314 23356848 : if (a == NULL)
1315 5515460 : return -2;
1316 17841388 : nwords = a->nwords;
1317 17841388 : prevbit++;
1318 17841388 : mask = (~(bitmapword) 0) << BITNUM(prevbit);
1319 23508320 : for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1320 : {
1321 17844080 : bitmapword w = a->words[wordnum];
1322 :
1323 : /* ignore bits before prevbit */
1324 17844080 : w &= mask;
1325 :
1326 17844080 : if (w != 0)
1327 : {
1328 : int result;
1329 :
1330 12177148 : result = wordnum * BITS_PER_BITMAPWORD;
1331 12177148 : result += bmw_rightmost_one_pos(w);
1332 12177148 : return result;
1333 : }
1334 :
1335 : /* in subsequent words, consider all bits */
1336 5666932 : mask = (~(bitmapword) 0);
1337 : }
1338 5664240 : return -2;
1339 : }
1340 :
1341 : /*
1342 : * bms_prev_member - find prev member of a set
1343 : *
1344 : * Returns largest member less than "prevbit", or -2 if there is none.
1345 : * "prevbit" must NOT be more than one above the highest possible bit that can
1346 : * be set in the Bitmapset at its current size.
1347 : *
1348 : * To ease finding the highest set bit for the initial loop, the special
1349 : * prevbit value of -1 can be passed to have the function find the highest
1350 : * valued member in the set.
1351 : *
1352 : * This is intended as support for iterating through the members of a set in
1353 : * reverse. The typical pattern is
1354 : *
1355 : * x = -1;
1356 : * while ((x = bms_prev_member(inputset, x)) >= 0)
1357 : * process member x;
1358 : *
1359 : * Notice that when there are no more members, we return -2, not -1 as you
1360 : * might expect. The rationale for that is to allow distinguishing the
1361 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1362 : * It makes no difference in simple loop usage, but complex iteration logic
1363 : * might need such an ability.
1364 : */
1365 :
1366 : int
1367 30 : bms_prev_member(const Bitmapset *a, int prevbit)
1368 : {
1369 : int wordnum;
1370 : int ushiftbits;
1371 : bitmapword mask;
1372 :
1373 : Assert(bms_is_valid_set(a));
1374 :
1375 : /*
1376 : * If set is NULL or if there are no more bits to the right then we've
1377 : * nothing to do.
1378 : */
1379 30 : if (a == NULL || prevbit == 0)
1380 4 : return -2;
1381 :
1382 : /* Validate callers didn't give us something out of range */
1383 : Assert(prevbit <= a->nwords * BITS_PER_BITMAPWORD);
1384 : Assert(prevbit >= -1);
1385 :
1386 : /* transform -1 to the highest possible bit we could have set */
1387 26 : if (prevbit == -1)
1388 2 : prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1389 : else
1390 24 : prevbit--;
1391 :
1392 26 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1393 26 : mask = (~(bitmapword) 0) >> ushiftbits;
1394 34 : for (wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1395 : {
1396 26 : bitmapword w = a->words[wordnum];
1397 :
1398 : /* mask out bits left of prevbit */
1399 26 : w &= mask;
1400 :
1401 26 : if (w != 0)
1402 : {
1403 : int result;
1404 :
1405 18 : result = wordnum * BITS_PER_BITMAPWORD;
1406 18 : result += bmw_leftmost_one_pos(w);
1407 18 : return result;
1408 : }
1409 :
1410 : /* in subsequent words, consider all bits */
1411 8 : mask = (~(bitmapword) 0);
1412 : }
1413 8 : return -2;
1414 : }
1415 :
1416 : /*
1417 : * bms_hash_value - compute a hash key for a Bitmapset
1418 : */
1419 : uint32
1420 7126 : bms_hash_value(const Bitmapset *a)
1421 : {
1422 : Assert(bms_is_valid_set(a));
1423 :
1424 7126 : if (a == NULL)
1425 8 : return 0; /* All empty sets hash to 0 */
1426 7118 : return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1427 7118 : a->nwords * sizeof(bitmapword)));
1428 : }
1429 :
1430 : /*
1431 : * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1432 : *
1433 : * Note: don't forget to specify bitmap_match as the match function!
1434 : */
1435 : uint32
1436 7112 : bitmap_hash(const void *key, Size keysize)
1437 : {
1438 : Assert(keysize == sizeof(Bitmapset *));
1439 7112 : return bms_hash_value(*((const Bitmapset *const *) key));
1440 : }
1441 :
1442 : /*
1443 : * bitmap_match - match function to use with bitmap_hash
1444 : */
1445 : int
1446 4182 : bitmap_match(const void *key1, const void *key2, Size keysize)
1447 : {
1448 : Assert(keysize == sizeof(Bitmapset *));
1449 4182 : return !bms_equal(*((const Bitmapset *const *) key1),
1450 : *((const Bitmapset *const *) key2));
1451 : }
|