Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bitmapset.c
4 : * PostgreSQL generic bitmap set package
5 : *
6 : * A bitmap set can represent any set of nonnegative integers, although
7 : * it is mainly intended for sets where the maximum value is not large,
8 : * say at most a few hundred. By convention, we always represent a set with
9 : * the minimum possible number of words, i.e, there are never any trailing
10 : * zero words. Enforcing this requires that an empty set is represented as
11 : * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
12 : * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
13 : * speed up various loops over the Bitmapset's words array by using "do while"
14 : * loops instead of "for" loops. This means the code does not waste time
15 : * checking the loop condition before the first iteration. For Bitmapsets
16 : * containing only a single word (likely the majority of them) this halves the
17 : * number of loop condition checks.
18 : *
19 : * Callers must ensure that the set returned by functions in this file which
20 : * adjust the members of an existing set is assigned to all pointers pointing
21 : * to that existing set. No guarantees are made that we'll ever modify the
22 : * existing set in-place and return it.
23 : *
24 : * To help find bugs caused by callers failing to record the return value of
25 : * the function which manipulates an existing set, we support building with
26 : * REALLOCATE_BITMAPSETS. This results in the set being reallocated each time
27 : * the set is altered and the existing being pfreed. This is useful as if any
28 : * references still exist to the old set, we're more likely to notice as
29 : * any users of the old set will be accessing pfree'd memory. This option is
30 : * only intended to be used for debugging.
31 : *
32 : * Copyright (c) 2003-2026, PostgreSQL Global Development Group
33 : *
34 : * IDENTIFICATION
35 : * src/backend/nodes/bitmapset.c
36 : *
37 : *-------------------------------------------------------------------------
38 : */
39 : #include "postgres.h"
40 :
41 : #include "common/hashfn.h"
42 : #include "nodes/bitmapset.h"
43 : #include "nodes/pg_list.h"
44 : #include "port/pg_bitutils.h"
45 :
46 :
47 : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
48 : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
49 :
50 : #define BITMAPSET_SIZE(nwords) \
51 : (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
52 :
53 : /*----------
54 : * This is a well-known cute trick for isolating the rightmost one-bit
55 : * in a word. It assumes two's complement arithmetic. Consider any
56 : * nonzero value, and focus attention on the rightmost one. The value is
57 : * then something like
58 : * xxxxxx10000
59 : * where x's are unspecified bits. The two's complement negative is formed
60 : * by inverting all the bits and adding one. Inversion gives
61 : * yyyyyy01111
62 : * where each y is the inverse of the corresponding x. Incrementing gives
63 : * yyyyyy10000
64 : * and then ANDing with the original value gives
65 : * 00000010000
66 : * This works for all cases except original value = zero, where of course
67 : * we get zero.
68 : *----------
69 : */
70 : #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
71 :
72 : #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
73 :
74 : #ifdef USE_ASSERT_CHECKING
75 : /*
76 : * bms_is_valid_set - for cassert builds to check for valid sets
77 : */
78 : static bool
79 : bms_is_valid_set(const Bitmapset *a)
80 : {
81 : /* NULL is the correct representation of an empty set */
82 : if (a == NULL)
83 : return true;
84 :
85 : /* check the node tag is set correctly. pfree'd pointer, maybe? */
86 : if (!IsA(a, Bitmapset))
87 : return false;
88 :
89 : /* trailing zero words are not allowed */
90 : if (a->words[a->nwords - 1] == 0)
91 : return false;
92 :
93 : return true;
94 : }
95 : #endif
96 :
97 : #ifdef REALLOCATE_BITMAPSETS
98 : /*
99 : * bms_copy_and_free
100 : * Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way
101 : * to return a freshly allocated set and pfree the original.
102 : *
103 : * Note: callers which accept multiple sets must be careful when calling this
104 : * function to clone one parameter as other parameters may point to the same
105 : * set. A good option is to call this just before returning the resulting
106 : * set.
107 : */
108 : static Bitmapset *
109 : bms_copy_and_free(Bitmapset *a)
110 : {
111 : Bitmapset *c = bms_copy(a);
112 :
113 : bms_free(a);
114 : return c;
115 : }
116 : #endif
117 :
118 : /*
119 : * bms_copy - make a palloc'd copy of a bitmapset
120 : */
121 : Bitmapset *
122 41933912 : bms_copy(const Bitmapset *a)
123 : {
124 : Bitmapset *result;
125 : size_t size;
126 :
127 : Assert(bms_is_valid_set(a));
128 :
129 41933912 : if (a == NULL)
130 27046342 : return NULL;
131 :
132 14887570 : size = BITMAPSET_SIZE(a->nwords);
133 14887570 : result = (Bitmapset *) palloc(size);
134 14887570 : memcpy(result, a, size);
135 14887570 : return result;
136 : }
137 :
138 : /*
139 : * bms_equal - are two bitmapsets equal? or both NULL?
140 : */
141 : bool
142 19378402 : bms_equal(const Bitmapset *a, const Bitmapset *b)
143 : {
144 : int i;
145 :
146 : Assert(bms_is_valid_set(a));
147 : Assert(bms_is_valid_set(b));
148 :
149 : /* Handle cases where either input is NULL */
150 19378402 : if (a == NULL)
151 : {
152 13602611 : if (b == NULL)
153 13551633 : return true;
154 50978 : return false;
155 : }
156 5775791 : else if (b == NULL)
157 17728 : return false;
158 :
159 : /* can't be equal if the word counts don't match */
160 5758063 : if (a->nwords != b->nwords)
161 1160 : return false;
162 :
163 : /* check each word matches */
164 5756903 : i = 0;
165 : do
166 : {
167 5757355 : if (a->words[i] != b->words[i])
168 2580684 : return false;
169 3176671 : } while (++i < a->nwords);
170 :
171 3176219 : return true;
172 : }
173 :
174 : /*
175 : * bms_compare - qsort-style comparator for bitmapsets
176 : *
177 : * This guarantees to report values as equal iff bms_equal would say they are
178 : * equal. Otherwise, the highest-numbered bit that is set in one value but
179 : * not the other determines the result. (This rule means that, for example,
180 : * {6} is greater than {5}, which seems plausible.)
181 : */
182 : int
183 22592 : bms_compare(const Bitmapset *a, const Bitmapset *b)
184 : {
185 : int i;
186 :
187 : Assert(bms_is_valid_set(a));
188 : Assert(bms_is_valid_set(b));
189 :
190 : /* Handle cases where either input is NULL */
191 22592 : if (a == NULL)
192 4 : return (b == NULL) ? 0 : -1;
193 22588 : else if (b == NULL)
194 2 : return +1;
195 :
196 : /* the set with the most words must be greater */
197 22586 : if (a->nwords != b->nwords)
198 11 : return (a->nwords > b->nwords) ? +1 : -1;
199 :
200 22575 : i = a->nwords - 1;
201 : do
202 : {
203 22575 : bitmapword aw = a->words[i];
204 22575 : bitmapword bw = b->words[i];
205 :
206 22575 : if (aw != bw)
207 22574 : return (aw > bw) ? +1 : -1;
208 1 : } while (--i >= 0);
209 1 : return 0;
210 : }
211 :
212 : /*
213 : * bms_make_singleton - build a bitmapset containing a single member
214 : */
215 : Bitmapset *
216 14437360 : bms_make_singleton(int x)
217 : {
218 : Bitmapset *result;
219 : int wordnum,
220 : bitnum;
221 :
222 14437360 : if (x < 0)
223 1 : elog(ERROR, "negative bitmapset member not allowed");
224 14437359 : wordnum = WORDNUM(x);
225 14437359 : bitnum = BITNUM(x);
226 14437359 : result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
227 14437359 : result->type = T_Bitmapset;
228 14437359 : result->nwords = wordnum + 1;
229 14437359 : result->words[wordnum] = ((bitmapword) 1 << bitnum);
230 14437359 : return result;
231 : }
232 :
233 : /*
234 : * bms_free - free a bitmapset
235 : *
236 : * Same as pfree except for allowing NULL input
237 : */
238 : void
239 15122490 : bms_free(Bitmapset *a)
240 : {
241 15122490 : if (a)
242 6771102 : pfree(a);
243 15122490 : }
244 :
245 :
246 : /*
247 : * bms_union - create and return a new set containing all members from both
248 : * input sets. Both inputs are left unmodified.
249 : */
250 : Bitmapset *
251 6768089 : bms_union(const Bitmapset *a, const Bitmapset *b)
252 : {
253 : Bitmapset *result;
254 : const Bitmapset *other;
255 : int otherlen;
256 : int i;
257 :
258 : Assert(bms_is_valid_set(a));
259 : Assert(bms_is_valid_set(b));
260 :
261 : /* Handle cases where either input is NULL */
262 6768089 : if (a == NULL)
263 4180059 : return bms_copy(b);
264 2588030 : if (b == NULL)
265 1020247 : return bms_copy(a);
266 : /* Identify shorter and longer input; copy the longer one */
267 1567783 : if (a->nwords <= b->nwords)
268 : {
269 1567781 : result = bms_copy(b);
270 1567781 : other = a;
271 : }
272 : else
273 : {
274 2 : result = bms_copy(a);
275 2 : other = b;
276 : }
277 : /* And union the shorter input into the result */
278 1567783 : otherlen = other->nwords;
279 1567783 : i = 0;
280 : do
281 : {
282 1569061 : result->words[i] |= other->words[i];
283 1569061 : } while (++i < otherlen);
284 1567783 : return result;
285 : }
286 :
287 : /*
288 : * bms_intersect - create and return a new set containing members which both
289 : * input sets have in common. Both inputs are left unmodified.
290 : */
291 : Bitmapset *
292 931100 : bms_intersect(const Bitmapset *a, const Bitmapset *b)
293 : {
294 : Bitmapset *result;
295 : const Bitmapset *other;
296 : int lastnonzero;
297 : int resultlen;
298 : int i;
299 :
300 : Assert(bms_is_valid_set(a));
301 : Assert(bms_is_valid_set(b));
302 :
303 : /* Handle cases where either input is NULL */
304 931100 : if (a == NULL || b == NULL)
305 160782 : return NULL;
306 :
307 : /* Identify shorter and longer input; copy the shorter one */
308 770318 : if (a->nwords <= b->nwords)
309 : {
310 770316 : result = bms_copy(a);
311 770316 : other = b;
312 : }
313 : else
314 : {
315 2 : result = bms_copy(b);
316 2 : other = a;
317 : }
318 : /* And intersect the longer input with the result */
319 770318 : resultlen = result->nwords;
320 770318 : lastnonzero = -1;
321 770318 : i = 0;
322 : do
323 : {
324 771596 : result->words[i] &= other->words[i];
325 :
326 771596 : if (result->words[i] != 0)
327 763597 : lastnonzero = i;
328 771596 : } while (++i < resultlen);
329 : /* If we computed an empty result, we must return NULL */
330 770318 : if (lastnonzero == -1)
331 : {
332 6864 : pfree(result);
333 6864 : return NULL;
334 : }
335 :
336 : /* get rid of trailing zero words */
337 763454 : result->nwords = lastnonzero + 1;
338 763454 : return result;
339 : }
340 :
341 : /*
342 : * bms_difference - create and return a new set containing all the members of
343 : * 'a' without the members of 'b'.
344 : */
345 : Bitmapset *
346 1787525 : bms_difference(const Bitmapset *a, const Bitmapset *b)
347 : {
348 : Bitmapset *result;
349 : int i;
350 :
351 : Assert(bms_is_valid_set(a));
352 : Assert(bms_is_valid_set(b));
353 :
354 : /* Handle cases where either input is NULL */
355 1787525 : if (a == NULL)
356 465026 : return NULL;
357 1322499 : if (b == NULL)
358 975699 : return bms_copy(a);
359 :
360 : /*
361 : * In Postgres' usage, an empty result is a very common case, so it's
362 : * worth optimizing for that by testing bms_nonempty_difference(). This
363 : * saves us a palloc/pfree cycle compared to checking after-the-fact.
364 : */
365 346800 : if (!bms_nonempty_difference(a, b))
366 109622 : return NULL;
367 :
368 : /* Copy the left input */
369 237178 : result = bms_copy(a);
370 :
371 : /* And remove b's bits from result */
372 237178 : if (result->nwords > b->nwords)
373 : {
374 : /*
375 : * We'll never need to remove trailing zero words when 'a' has more
376 : * words than 'b' as the additional words must be non-zero.
377 : */
378 2 : i = 0;
379 : do
380 : {
381 2 : result->words[i] &= ~b->words[i];
382 2 : } while (++i < b->nwords);
383 : }
384 : else
385 : {
386 237176 : int lastnonzero = -1;
387 :
388 : /* we may need to remove trailing zero words from the result. */
389 237176 : i = 0;
390 : do
391 : {
392 237177 : result->words[i] &= ~b->words[i];
393 :
394 : /* remember the last non-zero word */
395 237177 : if (result->words[i] != 0)
396 237176 : lastnonzero = i;
397 237177 : } while (++i < result->nwords);
398 :
399 : /* trim off trailing zero words */
400 237176 : result->nwords = lastnonzero + 1;
401 : }
402 : Assert(result->nwords != 0);
403 :
404 : /* Need not check for empty result, since we handled that case above */
405 237178 : return result;
406 : }
407 :
408 : /*
409 : * bms_is_subset - is A a subset of B?
410 : */
411 : bool
412 16742734 : bms_is_subset(const Bitmapset *a, const Bitmapset *b)
413 : {
414 : int i;
415 :
416 : Assert(bms_is_valid_set(a));
417 : Assert(bms_is_valid_set(b));
418 :
419 : /* Handle cases where either input is NULL */
420 16742734 : if (a == NULL)
421 1509244 : return true; /* empty set is a subset of anything */
422 15233490 : if (b == NULL)
423 313309 : return false;
424 :
425 : /* 'a' can't be a subset of 'b' if it contains more words */
426 14920181 : if (a->nwords > b->nwords)
427 2 : return false;
428 :
429 : /* Check all 'a' members are set in 'b' */
430 14920179 : i = 0;
431 : do
432 : {
433 14920179 : if ((a->words[i] & ~b->words[i]) != 0)
434 5683825 : return false;
435 9236354 : } while (++i < a->nwords);
436 9236354 : return true;
437 : }
438 :
439 : /*
440 : * bms_subset_compare - compare A and B for equality/subset relationships
441 : *
442 : * This is more efficient than testing bms_is_subset in both directions.
443 : */
444 : BMS_Comparison
445 2133763 : bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
446 : {
447 : BMS_Comparison result;
448 : int shortlen;
449 : int i;
450 :
451 : Assert(bms_is_valid_set(a));
452 : Assert(bms_is_valid_set(b));
453 :
454 : /* Handle cases where either input is NULL */
455 2133763 : if (a == NULL)
456 : {
457 1779921 : if (b == NULL)
458 1726649 : return BMS_EQUAL;
459 53272 : return BMS_SUBSET1;
460 : }
461 353842 : if (b == NULL)
462 174595 : return BMS_SUBSET2;
463 :
464 : /* Check common words */
465 179247 : result = BMS_EQUAL; /* status so far */
466 179247 : shortlen = Min(a->nwords, b->nwords);
467 179247 : i = 0;
468 : do
469 : {
470 179250 : bitmapword aword = a->words[i];
471 179250 : bitmapword bword = b->words[i];
472 :
473 179250 : if ((aword & ~bword) != 0)
474 : {
475 : /* a is not a subset of b */
476 48251 : if (result == BMS_SUBSET1)
477 2 : return BMS_DIFFERENT;
478 48249 : result = BMS_SUBSET2;
479 : }
480 179248 : if ((bword & ~aword) != 0)
481 : {
482 : /* b is not a subset of a */
483 49965 : if (result == BMS_SUBSET2)
484 44903 : return BMS_DIFFERENT;
485 5062 : result = BMS_SUBSET1;
486 : }
487 134345 : } while (++i < shortlen);
488 : /* Check extra words */
489 134342 : if (a->nwords > b->nwords)
490 : {
491 : /* if a has more words then a is not a subset of b */
492 2 : if (result == BMS_SUBSET1)
493 1 : return BMS_DIFFERENT;
494 1 : return BMS_SUBSET2;
495 : }
496 134340 : else if (a->nwords < b->nwords)
497 : {
498 : /* if b has more words then b is not a subset of a */
499 4 : if (result == BMS_SUBSET2)
500 2 : return BMS_DIFFERENT;
501 2 : return BMS_SUBSET1;
502 : }
503 134336 : return result;
504 : }
505 :
506 : /*
507 : * bms_is_member - is X a member of A?
508 : */
509 : bool
510 13180229 : bms_is_member(int x, const Bitmapset *a)
511 : {
512 : int wordnum,
513 : bitnum;
514 :
515 : Assert(bms_is_valid_set(a));
516 :
517 : /* XXX better to just return false for x<0 ? */
518 13180229 : if (x < 0)
519 1 : elog(ERROR, "negative bitmapset member not allowed");
520 13180228 : if (a == NULL)
521 6509516 : return false;
522 :
523 6670712 : wordnum = WORDNUM(x);
524 6670712 : bitnum = BITNUM(x);
525 6670712 : if (wordnum >= a->nwords)
526 212 : return false;
527 6670500 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
528 5258349 : return true;
529 1412151 : return false;
530 : }
531 :
532 : /*
533 : * bms_member_index
534 : * determine 0-based index of member x in the bitmap
535 : *
536 : * Returns (-1) when x is not a member.
537 : */
538 : int
539 4427 : bms_member_index(Bitmapset *a, int x)
540 : {
541 : int bitnum;
542 : int wordnum;
543 4427 : int result = 0;
544 : bitmapword mask;
545 :
546 : Assert(bms_is_valid_set(a));
547 :
548 : /* return -1 if not a member of the bitmap */
549 4427 : if (!bms_is_member(x, a))
550 2 : return -1;
551 :
552 4425 : wordnum = WORDNUM(x);
553 4425 : bitnum = BITNUM(x);
554 :
555 : /* count bits in preceding words */
556 4425 : result += pg_popcount((const char *) a->words,
557 : wordnum * sizeof(bitmapword));
558 :
559 : /*
560 : * Now add bits of the last word, but only those before the item. We can
561 : * do that by applying a mask and then using popcount again. To get
562 : * 0-based index, we want to count only preceding bits, not the item
563 : * itself, so we subtract 1.
564 : */
565 4425 : mask = ((bitmapword) 1 << bitnum) - 1;
566 4425 : result += bmw_popcount(a->words[wordnum] & mask);
567 :
568 4425 : return result;
569 : }
570 :
571 : /*
572 : * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
573 : */
574 : bool
575 11042282 : bms_overlap(const Bitmapset *a, const Bitmapset *b)
576 : {
577 : int shortlen;
578 : int i;
579 :
580 : Assert(bms_is_valid_set(a));
581 : Assert(bms_is_valid_set(b));
582 :
583 : /* Handle cases where either input is NULL */
584 11042282 : if (a == NULL || b == NULL)
585 4494979 : return false;
586 : /* Check words in common */
587 6547303 : shortlen = Min(a->nwords, b->nwords);
588 6547303 : i = 0;
589 : do
590 : {
591 6547303 : if ((a->words[i] & b->words[i]) != 0)
592 4184210 : return true;
593 2363093 : } while (++i < shortlen);
594 2363093 : return false;
595 : }
596 :
597 : /*
598 : * bms_overlap_list - does a set overlap an integer list?
599 : */
600 : bool
601 1429 : bms_overlap_list(const Bitmapset *a, const List *b)
602 : {
603 : ListCell *lc;
604 : int wordnum,
605 : bitnum;
606 :
607 : Assert(bms_is_valid_set(a));
608 :
609 1429 : if (a == NULL || b == NIL)
610 1312 : return false;
611 :
612 219 : foreach(lc, b)
613 : {
614 170 : int x = lfirst_int(lc);
615 :
616 170 : if (x < 0)
617 2 : elog(ERROR, "negative bitmapset member not allowed");
618 168 : wordnum = WORDNUM(x);
619 168 : bitnum = BITNUM(x);
620 168 : if (wordnum < a->nwords)
621 168 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
622 66 : return true;
623 : }
624 :
625 49 : return false;
626 : }
627 :
628 : /*
629 : * bms_nonempty_difference - do sets have a nonempty difference?
630 : *
631 : * i.e., are any members set in 'a' that are not also set in 'b'.
632 : */
633 : bool
634 2329762 : bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
635 : {
636 : int i;
637 :
638 : Assert(bms_is_valid_set(a));
639 : Assert(bms_is_valid_set(b));
640 :
641 : /* Handle cases where either input is NULL */
642 2329762 : if (a == NULL)
643 4349 : return false;
644 2325413 : if (b == NULL)
645 1 : return true;
646 : /* if 'a' has more words then it must contain additional members */
647 2325412 : if (a->nwords > b->nwords)
648 3 : return true;
649 : /* Check all 'a' members are set in 'b' */
650 2325409 : i = 0;
651 : do
652 : {
653 2325409 : if ((a->words[i] & ~b->words[i]) != 0)
654 1344381 : return true;
655 981028 : } while (++i < a->nwords);
656 981028 : return false;
657 : }
658 :
659 : /*
660 : * bms_singleton_member - return the sole integer member of set
661 : *
662 : * Raises error if |a| is not 1.
663 : */
664 : int
665 17645 : bms_singleton_member(const Bitmapset *a)
666 : {
667 17645 : int result = -1;
668 : int nwords;
669 : int wordnum;
670 :
671 : Assert(bms_is_valid_set(a));
672 :
673 17645 : if (a == NULL)
674 1 : elog(ERROR, "bitmapset is empty");
675 :
676 17644 : nwords = a->nwords;
677 17644 : wordnum = 0;
678 : do
679 : {
680 17644 : bitmapword w = a->words[wordnum];
681 :
682 17644 : if (w != 0)
683 : {
684 17644 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
685 1 : elog(ERROR, "bitmapset has multiple members");
686 17643 : result = wordnum * BITS_PER_BITMAPWORD;
687 17643 : result += bmw_rightmost_one_pos(w);
688 : }
689 17643 : } while (++wordnum < nwords);
690 :
691 : /* we don't expect non-NULL sets to be empty */
692 : Assert(result >= 0);
693 17643 : return result;
694 : }
695 :
696 : /*
697 : * bms_get_singleton_member
698 : *
699 : * Test whether the given set is a singleton.
700 : * If so, set *member to the value of its sole member, and return true.
701 : * If not, return false, without changing *member.
702 : *
703 : * This is more convenient and faster than calling bms_membership() and then
704 : * bms_singleton_member(), if we don't care about distinguishing empty sets
705 : * from multiple-member sets.
706 : */
707 : bool
708 2135072 : bms_get_singleton_member(const Bitmapset *a, int *member)
709 : {
710 2135072 : int result = -1;
711 : int nwords;
712 : int wordnum;
713 :
714 : Assert(bms_is_valid_set(a));
715 :
716 2135072 : if (a == NULL)
717 2 : return false;
718 :
719 2135070 : nwords = a->nwords;
720 2135070 : wordnum = 0;
721 : do
722 : {
723 2135076 : bitmapword w = a->words[wordnum];
724 :
725 2135076 : if (w != 0)
726 : {
727 2135070 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
728 398329 : return false;
729 1736741 : result = wordnum * BITS_PER_BITMAPWORD;
730 1736741 : result += bmw_rightmost_one_pos(w);
731 : }
732 1736747 : } while (++wordnum < nwords);
733 :
734 : /* we don't expect non-NULL sets to be empty */
735 : Assert(result >= 0);
736 1736741 : *member = result;
737 1736741 : return true;
738 : }
739 :
740 : /*
741 : * bms_num_members - count members of set
742 : */
743 : int
744 1465320 : bms_num_members(const Bitmapset *a)
745 : {
746 : Assert(bms_is_valid_set(a));
747 :
748 1465320 : if (a == NULL)
749 146166 : return 0;
750 :
751 : /* fast-path for common case */
752 1319154 : if (a->nwords == 1)
753 1319088 : return bmw_popcount(a->words[0]);
754 :
755 66 : return pg_popcount((const char *) a->words,
756 66 : a->nwords * sizeof(bitmapword));
757 : }
758 :
759 : /*
760 : * bms_membership - does a set have zero, one, or multiple members?
761 : *
762 : * This is faster than making an exact count with bms_num_members().
763 : */
764 : BMS_Membership
765 1152896 : bms_membership(const Bitmapset *a)
766 : {
767 1152896 : BMS_Membership result = BMS_EMPTY_SET;
768 : int nwords;
769 : int wordnum;
770 :
771 : Assert(bms_is_valid_set(a));
772 :
773 1152896 : if (a == NULL)
774 403 : return BMS_EMPTY_SET;
775 :
776 1152493 : nwords = a->nwords;
777 1152493 : wordnum = 0;
778 : do
779 : {
780 1152533 : bitmapword w = a->words[wordnum];
781 :
782 1152533 : if (w != 0)
783 : {
784 1152493 : if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
785 393776 : return BMS_MULTIPLE;
786 758717 : result = BMS_SINGLETON;
787 : }
788 758757 : } while (++wordnum < nwords);
789 758717 : return result;
790 : }
791 :
792 :
793 : /*
794 : * bms_add_member - add a specified member to set
795 : *
796 : * 'a' is recycled when possible.
797 : */
798 : Bitmapset *
799 20635778 : bms_add_member(Bitmapset *a, int x)
800 : {
801 : int wordnum,
802 : bitnum;
803 :
804 : Assert(bms_is_valid_set(a));
805 :
806 20635778 : if (x < 0)
807 2 : elog(ERROR, "negative bitmapset member not allowed");
808 20635776 : if (a == NULL)
809 11095723 : return bms_make_singleton(x);
810 :
811 9540053 : wordnum = WORDNUM(x);
812 9540053 : bitnum = BITNUM(x);
813 :
814 : /* enlarge the set if necessary */
815 9540053 : if (wordnum >= a->nwords)
816 : {
817 297 : int oldnwords = a->nwords;
818 : int i;
819 :
820 297 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
821 297 : a->nwords = wordnum + 1;
822 : /* zero out the enlarged portion */
823 297 : i = oldnwords;
824 : do
825 : {
826 5754 : a->words[i] = 0;
827 5754 : } while (++i < a->nwords);
828 : }
829 :
830 9540053 : a->words[wordnum] |= ((bitmapword) 1 << bitnum);
831 :
832 : #ifdef REALLOCATE_BITMAPSETS
833 :
834 : /*
835 : * There's no guarantee that the repalloc returned a new pointer, so copy
836 : * and free unconditionally here.
837 : */
838 : a = bms_copy_and_free(a);
839 : #endif
840 :
841 9540053 : return a;
842 : }
843 :
844 : /*
845 : * bms_del_member - remove a specified member from set
846 : *
847 : * No error if x is not currently a member of set
848 : *
849 : * 'a' is recycled when possible.
850 : */
851 : Bitmapset *
852 1414991 : bms_del_member(Bitmapset *a, int x)
853 : {
854 : int wordnum,
855 : bitnum;
856 :
857 : Assert(bms_is_valid_set(a));
858 :
859 1414991 : if (x < 0)
860 1 : elog(ERROR, "negative bitmapset member not allowed");
861 1414990 : if (a == NULL)
862 505318 : return NULL;
863 :
864 909672 : wordnum = WORDNUM(x);
865 909672 : bitnum = BITNUM(x);
866 :
867 : #ifdef REALLOCATE_BITMAPSETS
868 : a = bms_copy_and_free(a);
869 : #endif
870 :
871 : /* member can't exist. Return 'a' unmodified */
872 909672 : if (unlikely(wordnum >= a->nwords))
873 0 : return a;
874 :
875 909672 : a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
876 :
877 : /* when last word becomes empty, trim off all trailing empty words */
878 909672 : if (a->words[wordnum] == 0 && wordnum == a->nwords - 1)
879 : {
880 : /* find the last non-empty word and make that the new final word */
881 447091 : for (int i = wordnum - 1; i >= 0; i--)
882 : {
883 3958 : if (a->words[i] != 0)
884 : {
885 59 : a->nwords = i + 1;
886 59 : return a;
887 : }
888 : }
889 :
890 : /* the set is now empty */
891 443133 : pfree(a);
892 443133 : return NULL;
893 : }
894 466480 : return a;
895 : }
896 :
897 : /*
898 : * bms_add_members - like bms_union, but left input is recycled when possible
899 : */
900 : Bitmapset *
901 11657836 : bms_add_members(Bitmapset *a, const Bitmapset *b)
902 : {
903 : Bitmapset *result;
904 : const Bitmapset *other;
905 : int otherlen;
906 : int i;
907 :
908 : Assert(bms_is_valid_set(a));
909 : Assert(bms_is_valid_set(b));
910 :
911 : /* Handle cases where either input is NULL */
912 11657836 : if (a == NULL)
913 6116705 : return bms_copy(b);
914 5541131 : if (b == NULL)
915 : {
916 : #ifdef REALLOCATE_BITMAPSETS
917 : a = bms_copy_and_free(a);
918 : #endif
919 :
920 3551724 : return a;
921 : }
922 : /* Identify shorter and longer input; copy the longer one if needed */
923 1989407 : if (a->nwords < b->nwords)
924 : {
925 6 : result = bms_copy(b);
926 6 : other = a;
927 : }
928 : else
929 : {
930 1989401 : result = a;
931 1989401 : other = b;
932 : }
933 : /* And union the shorter input into the result */
934 1989407 : otherlen = other->nwords;
935 1989407 : i = 0;
936 : do
937 : {
938 1989478 : result->words[i] |= other->words[i];
939 1989478 : } while (++i < otherlen);
940 1989407 : if (result != a)
941 6 : pfree(a);
942 : #ifdef REALLOCATE_BITMAPSETS
943 : else
944 : result = bms_copy_and_free(result);
945 : #endif
946 :
947 1989407 : return result;
948 : }
949 :
950 : /*
951 : * bms_replace_members
952 : * Remove all existing members from 'a' and repopulate the set with members
953 : * from 'b', recycling 'a', when possible.
954 : */
955 : Bitmapset *
956 11241 : bms_replace_members(Bitmapset *a, const Bitmapset *b)
957 : {
958 : int i;
959 :
960 : Assert(bms_is_valid_set(a));
961 : Assert(bms_is_valid_set(b));
962 :
963 11241 : if (a == NULL)
964 1 : return bms_copy(b);
965 11240 : if (b == NULL)
966 : {
967 1 : pfree(a);
968 1 : return NULL;
969 : }
970 :
971 11239 : if (a->nwords < b->nwords)
972 1 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(b->nwords));
973 :
974 11239 : i = 0;
975 : do
976 : {
977 11248 : a->words[i] = b->words[i];
978 11248 : } while (++i < b->nwords);
979 :
980 11239 : a->nwords = b->nwords;
981 :
982 : #ifdef REALLOCATE_BITMAPSETS
983 :
984 : /*
985 : * There's no guarantee that the repalloc returned a new pointer, so copy
986 : * and free unconditionally here.
987 : */
988 : a = bms_copy_and_free(a);
989 : #endif
990 :
991 11239 : return a;
992 : }
993 :
994 : /*
995 : * bms_add_range
996 : * Add members in the range of 'lower' to 'upper' to the set.
997 : *
998 : * Note this could also be done by calling bms_add_member in a loop, however,
999 : * using this function will be faster when the range is large as we work at
1000 : * the bitmapword level rather than at bit level.
1001 : */
1002 : Bitmapset *
1003 44645 : bms_add_range(Bitmapset *a, int lower, int upper)
1004 : {
1005 : int lwordnum,
1006 : lbitnum,
1007 : uwordnum,
1008 : ushiftbits,
1009 : wordnum;
1010 :
1011 : Assert(bms_is_valid_set(a));
1012 :
1013 : /* do nothing if nothing is called for, without further checking */
1014 44645 : if (upper < lower)
1015 : {
1016 : #ifdef REALLOCATE_BITMAPSETS
1017 : a = bms_copy_and_free(a);
1018 : #endif
1019 :
1020 23 : return a;
1021 : }
1022 :
1023 44622 : if (lower < 0)
1024 1 : elog(ERROR, "negative bitmapset member not allowed");
1025 44621 : uwordnum = WORDNUM(upper);
1026 :
1027 44621 : if (a == NULL)
1028 : {
1029 34591 : a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
1030 34591 : a->type = T_Bitmapset;
1031 34591 : a->nwords = uwordnum + 1;
1032 : }
1033 10030 : else if (uwordnum >= a->nwords)
1034 : {
1035 3 : int oldnwords = a->nwords;
1036 : int i;
1037 :
1038 : /* ensure we have enough words to store the upper bit */
1039 3 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
1040 3 : a->nwords = uwordnum + 1;
1041 : /* zero out the enlarged portion */
1042 3 : i = oldnwords;
1043 : do
1044 : {
1045 5 : a->words[i] = 0;
1046 5 : } while (++i < a->nwords);
1047 : }
1048 :
1049 44621 : wordnum = lwordnum = WORDNUM(lower);
1050 :
1051 44621 : lbitnum = BITNUM(lower);
1052 44621 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
1053 :
1054 : /*
1055 : * Special case when lwordnum is the same as uwordnum we must perform the
1056 : * upper and lower masking on the word.
1057 : */
1058 44621 : if (lwordnum == uwordnum)
1059 : {
1060 43688 : a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
1061 43688 : & (~(bitmapword) 0) >> ushiftbits;
1062 : }
1063 : else
1064 : {
1065 : /* turn on lbitnum and all bits left of it */
1066 933 : a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
1067 :
1068 : /* turn on all bits for any intermediate words */
1069 951 : while (wordnum < uwordnum)
1070 18 : a->words[wordnum++] = ~(bitmapword) 0;
1071 :
1072 : /* turn on upper's bit and all bits right of it. */
1073 933 : a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
1074 : }
1075 :
1076 : #ifdef REALLOCATE_BITMAPSETS
1077 :
1078 : /*
1079 : * There's no guarantee that the repalloc returned a new pointer, so copy
1080 : * and free unconditionally here.
1081 : */
1082 : a = bms_copy_and_free(a);
1083 : #endif
1084 :
1085 44621 : return a;
1086 : }
1087 :
1088 : /*
1089 : * bms_int_members - like bms_intersect, but left input is recycled when
1090 : * possible
1091 : */
1092 : Bitmapset *
1093 621020 : bms_int_members(Bitmapset *a, const Bitmapset *b)
1094 : {
1095 : int lastnonzero;
1096 : int shortlen;
1097 : int i;
1098 :
1099 : Assert(bms_is_valid_set(a));
1100 : Assert(bms_is_valid_set(b));
1101 :
1102 : /* Handle cases where either input is NULL */
1103 621020 : if (a == NULL)
1104 20373 : return NULL;
1105 600647 : if (b == NULL)
1106 : {
1107 3810 : pfree(a);
1108 3810 : return NULL;
1109 : }
1110 :
1111 : /* Intersect b into a; we need never copy */
1112 596837 : shortlen = Min(a->nwords, b->nwords);
1113 596837 : lastnonzero = -1;
1114 596837 : i = 0;
1115 : do
1116 : {
1117 596838 : a->words[i] &= b->words[i];
1118 :
1119 596838 : if (a->words[i] != 0)
1120 502173 : lastnonzero = i;
1121 596838 : } while (++i < shortlen);
1122 :
1123 : /* If we computed an empty result, we must return NULL */
1124 596837 : if (lastnonzero == -1)
1125 : {
1126 94665 : pfree(a);
1127 94665 : return NULL;
1128 : }
1129 :
1130 : /* get rid of trailing zero words */
1131 502172 : a->nwords = lastnonzero + 1;
1132 :
1133 : #ifdef REALLOCATE_BITMAPSETS
1134 : a = bms_copy_and_free(a);
1135 : #endif
1136 :
1137 502172 : return a;
1138 : }
1139 :
1140 : /*
1141 : * bms_del_members - delete members in 'a' that are set in 'b'. 'a' is
1142 : * recycled when possible.
1143 : */
1144 : Bitmapset *
1145 1811035 : bms_del_members(Bitmapset *a, const Bitmapset *b)
1146 : {
1147 : int i;
1148 :
1149 : Assert(bms_is_valid_set(a));
1150 : Assert(bms_is_valid_set(b));
1151 :
1152 : /* Handle cases where either input is NULL */
1153 1811035 : if (a == NULL)
1154 790797 : return NULL;
1155 1020238 : if (b == NULL)
1156 : {
1157 : #ifdef REALLOCATE_BITMAPSETS
1158 : a = bms_copy_and_free(a);
1159 : #endif
1160 :
1161 161650 : return a;
1162 : }
1163 :
1164 : /* Remove b's bits from a; we need never copy */
1165 858588 : if (a->nwords > b->nwords)
1166 : {
1167 : /*
1168 : * We'll never need to remove trailing zero words when 'a' has more
1169 : * words than 'b'.
1170 : */
1171 1 : i = 0;
1172 : do
1173 : {
1174 1 : a->words[i] &= ~b->words[i];
1175 1 : } while (++i < b->nwords);
1176 : }
1177 : else
1178 : {
1179 858587 : int lastnonzero = -1;
1180 :
1181 : /* we may need to remove trailing zero words from the result. */
1182 858587 : i = 0;
1183 : do
1184 : {
1185 858595 : a->words[i] &= ~b->words[i];
1186 :
1187 : /* remember the last non-zero word */
1188 858595 : if (a->words[i] != 0)
1189 193121 : lastnonzero = i;
1190 858595 : } while (++i < a->nwords);
1191 :
1192 : /* check if 'a' has become empty */
1193 858587 : if (lastnonzero == -1)
1194 : {
1195 665468 : pfree(a);
1196 665468 : return NULL;
1197 : }
1198 :
1199 : /* trim off any trailing zero words */
1200 193119 : a->nwords = lastnonzero + 1;
1201 : }
1202 :
1203 : #ifdef REALLOCATE_BITMAPSETS
1204 : a = bms_copy_and_free(a);
1205 : #endif
1206 :
1207 193120 : return a;
1208 : }
1209 :
1210 : /*
1211 : * bms_join - like bms_union, but *either* input *may* be recycled
1212 : */
1213 : Bitmapset *
1214 1306990 : bms_join(Bitmapset *a, Bitmapset *b)
1215 : {
1216 : Bitmapset *result;
1217 : Bitmapset *other;
1218 : int otherlen;
1219 : int i;
1220 :
1221 : Assert(bms_is_valid_set(a));
1222 : Assert(bms_is_valid_set(b));
1223 :
1224 : /* Handle cases where either input is NULL */
1225 1306990 : if (a == NULL)
1226 : {
1227 : #ifdef REALLOCATE_BITMAPSETS
1228 : b = bms_copy_and_free(b);
1229 : #endif
1230 :
1231 522694 : return b;
1232 : }
1233 784296 : if (b == NULL)
1234 : {
1235 : #ifdef REALLOCATE_BITMAPSETS
1236 : a = bms_copy_and_free(a);
1237 : #endif
1238 :
1239 146130 : return a;
1240 : }
1241 :
1242 : /* Identify shorter and longer input; use longer one as result */
1243 638166 : if (a->nwords < b->nwords)
1244 : {
1245 2 : result = b;
1246 2 : other = a;
1247 : }
1248 : else
1249 : {
1250 638164 : result = a;
1251 638164 : other = b;
1252 : }
1253 : /* And union the shorter input into the result */
1254 638166 : otherlen = other->nwords;
1255 638166 : i = 0;
1256 : do
1257 : {
1258 638166 : result->words[i] |= other->words[i];
1259 638166 : } while (++i < otherlen);
1260 638166 : if (other != result) /* pure paranoia */
1261 638166 : pfree(other);
1262 :
1263 : #ifdef REALLOCATE_BITMAPSETS
1264 : result = bms_copy_and_free(result);
1265 : #endif
1266 :
1267 638166 : return result;
1268 : }
1269 :
1270 : /*
1271 : * bms_next_member - find next member of a set
1272 : *
1273 : * Returns smallest member greater than "prevbit", or -2 if there is none.
1274 : * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1275 : *
1276 : * This is intended as support for iterating through the members of a set.
1277 : * The typical pattern is
1278 : *
1279 : * x = -1;
1280 : * while ((x = bms_next_member(inputset, x)) >= 0)
1281 : * process member x;
1282 : *
1283 : * Notice that when there are no more members, we return -2, not -1 as you
1284 : * might expect. The rationale for that is to allow distinguishing the
1285 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1286 : * It makes no difference in simple loop usage, but complex iteration logic
1287 : * might need such an ability.
1288 : */
1289 : int
1290 36259742 : bms_next_member(const Bitmapset *a, int prevbit)
1291 : {
1292 : int nwords;
1293 : bitmapword mask;
1294 :
1295 : Assert(bms_is_valid_set(a));
1296 :
1297 36259742 : if (a == NULL)
1298 14669015 : return -2;
1299 21590727 : nwords = a->nwords;
1300 21590727 : prevbit++;
1301 21590727 : mask = (~(bitmapword) 0) << BITNUM(prevbit);
1302 28899603 : for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1303 : {
1304 21592465 : bitmapword w = a->words[wordnum];
1305 :
1306 : /* ignore bits before prevbit */
1307 21592465 : w &= mask;
1308 :
1309 21592465 : if (w != 0)
1310 : {
1311 : int result;
1312 :
1313 14283589 : result = wordnum * BITS_PER_BITMAPWORD;
1314 14283589 : result += bmw_rightmost_one_pos(w);
1315 14283589 : return result;
1316 : }
1317 :
1318 : /* in subsequent words, consider all bits */
1319 7308876 : mask = (~(bitmapword) 0);
1320 : }
1321 7307138 : return -2;
1322 : }
1323 :
1324 : /*
1325 : * bms_prev_member - find prev member of a set
1326 : *
1327 : * Returns largest member less than "prevbit", or -2 if there is none.
1328 : * "prevbit" must NOT be more than one above the highest possible bit that can
1329 : * be set in the Bitmapset at its current size.
1330 : *
1331 : * To ease finding the highest set bit for the initial loop, the special
1332 : * prevbit value of -1 can be passed to have the function find the highest
1333 : * valued member in the set.
1334 : *
1335 : * This is intended as support for iterating through the members of a set in
1336 : * reverse. The typical pattern is
1337 : *
1338 : * x = -1;
1339 : * while ((x = bms_prev_member(inputset, x)) >= 0)
1340 : * process member x;
1341 : *
1342 : * Notice that when there are no more members, we return -2, not -1 as you
1343 : * might expect. The rationale for that is to allow distinguishing the
1344 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1345 : * It makes no difference in simple loop usage, but complex iteration logic
1346 : * might need such an ability.
1347 : */
1348 :
1349 : int
1350 18 : bms_prev_member(const Bitmapset *a, int prevbit)
1351 : {
1352 : int ushiftbits;
1353 : bitmapword mask;
1354 :
1355 : Assert(bms_is_valid_set(a));
1356 :
1357 : /*
1358 : * If set is NULL or if there are no more bits to the right then we've
1359 : * nothing to do.
1360 : */
1361 18 : if (a == NULL || prevbit == 0)
1362 2 : return -2;
1363 :
1364 : /* Validate callers didn't give us something out of range */
1365 : Assert(prevbit <= a->nwords * BITS_PER_BITMAPWORD);
1366 : Assert(prevbit >= -1);
1367 :
1368 : /* transform -1 to the highest possible bit we could have set */
1369 16 : if (prevbit == -1)
1370 1 : prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1371 : else
1372 15 : prevbit--;
1373 :
1374 16 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1375 16 : mask = (~(bitmapword) 0) >> ushiftbits;
1376 21 : for (int wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1377 : {
1378 16 : bitmapword w = a->words[wordnum];
1379 :
1380 : /* mask out bits left of prevbit */
1381 16 : w &= mask;
1382 :
1383 16 : if (w != 0)
1384 : {
1385 : int result;
1386 :
1387 11 : result = wordnum * BITS_PER_BITMAPWORD;
1388 11 : result += bmw_leftmost_one_pos(w);
1389 11 : return result;
1390 : }
1391 :
1392 : /* in subsequent words, consider all bits */
1393 5 : mask = (~(bitmapword) 0);
1394 : }
1395 5 : return -2;
1396 : }
1397 :
1398 : /*
1399 : * bms_hash_value - compute a hash key for a Bitmapset
1400 : */
1401 : uint32
1402 5121 : bms_hash_value(const Bitmapset *a)
1403 : {
1404 : Assert(bms_is_valid_set(a));
1405 :
1406 5121 : if (a == NULL)
1407 4 : return 0; /* All empty sets hash to 0 */
1408 5117 : return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1409 5117 : a->nwords * sizeof(bitmapword)));
1410 : }
1411 :
1412 : /*
1413 : * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1414 : *
1415 : * Note: don't forget to specify bitmap_match as the match function!
1416 : */
1417 : uint32
1418 5114 : bitmap_hash(const void *key, Size keysize)
1419 : {
1420 : Assert(keysize == sizeof(Bitmapset *));
1421 5114 : return bms_hash_value(*((const Bitmapset *const *) key));
1422 : }
1423 :
1424 : /*
1425 : * bitmap_match - match function to use with bitmap_hash
1426 : */
1427 : int
1428 2885 : bitmap_match(const void *key1, const void *key2, Size keysize)
1429 : {
1430 : Assert(keysize == sizeof(Bitmapset *));
1431 2885 : return !bms_equal(*((const Bitmapset *const *) key1),
1432 : *((const Bitmapset *const *) key2));
1433 : }
|