Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bitmapset.c
4 : * PostgreSQL generic bitmap set package
5 : *
6 : * A bitmap set can represent any set of nonnegative integers, although
7 : * it is mainly intended for sets where the maximum value is not large,
8 : * say at most a few hundred. By convention, we always represent a set with
9 : * the minimum possible number of words, i.e, there are never any trailing
10 : * zero words. Enforcing this requires that an empty set is represented as
11 : * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
12 : * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
13 : * speed up various loops over the Bitmapset's words array by using "do while"
14 : * loops instead of "for" loops. This means the code does not waste time
15 : * checking the loop condition before the first iteration. For Bitmapsets
16 : * containing only a single word (likely the majority of them) this halves the
17 : * number of loop condition checks.
18 : *
19 : * Callers must ensure that the set returned by functions in this file which
20 : * adjust the members of an existing set is assigned to all pointers pointing
21 : * to that existing set. No guarantees are made that we'll ever modify the
22 : * existing set in-place and return it.
23 : *
24 : * To help find bugs caused by callers failing to record the return value of
25 : * the function which manipulates an existing set, we support building with
26 : * REALLOCATE_BITMAPSETS. This results in the set being reallocated each time
27 : * the set is altered and the existing being pfreed. This is useful as if any
28 : * references still exist to the old set, we're more likely to notice as
29 : * any users of the old set will be accessing pfree'd memory. This option is
30 : * only intended to be used for debugging.
31 : *
32 : * Copyright (c) 2003-2025, PostgreSQL Global Development Group
33 : *
34 : * IDENTIFICATION
35 : * src/backend/nodes/bitmapset.c
36 : *
37 : *-------------------------------------------------------------------------
38 : */
39 : #include "postgres.h"
40 :
41 : #include "common/hashfn.h"
42 : #include "nodes/bitmapset.h"
43 : #include "nodes/pg_list.h"
44 : #include "port/pg_bitutils.h"
45 :
46 :
47 : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
48 : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
49 :
50 : #define BITMAPSET_SIZE(nwords) \
51 : (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
52 :
53 : /*----------
54 : * This is a well-known cute trick for isolating the rightmost one-bit
55 : * in a word. It assumes two's complement arithmetic. Consider any
56 : * nonzero value, and focus attention on the rightmost one. The value is
57 : * then something like
58 : * xxxxxx10000
59 : * where x's are unspecified bits. The two's complement negative is formed
60 : * by inverting all the bits and adding one. Inversion gives
61 : * yyyyyy01111
62 : * where each y is the inverse of the corresponding x. Incrementing gives
63 : * yyyyyy10000
64 : * and then ANDing with the original value gives
65 : * 00000010000
66 : * This works for all cases except original value = zero, where of course
67 : * we get zero.
68 : *----------
69 : */
70 : #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
71 :
72 : #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
73 :
74 : #ifdef USE_ASSERT_CHECKING
75 : /*
76 : * bms_is_valid_set - for cassert builds to check for valid sets
77 : */
78 : static bool
79 : bms_is_valid_set(const Bitmapset *a)
80 : {
81 : /* NULL is the correct representation of an empty set */
82 : if (a == NULL)
83 : return true;
84 :
85 : /* check the node tag is set correctly. pfree'd pointer, maybe? */
86 : if (!IsA(a, Bitmapset))
87 : return false;
88 :
89 : /* trailing zero words are not allowed */
90 : if (a->words[a->nwords - 1] == 0)
91 : return false;
92 :
93 : return true;
94 : }
95 : #endif
96 :
97 : #ifdef REALLOCATE_BITMAPSETS
98 : /*
99 : * bms_copy_and_free
100 : * Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way
101 : * to return a freshly allocated set and pfree the original.
102 : *
103 : * Note: callers which accept multiple sets must be careful when calling this
104 : * function to clone one parameter as other parameters may point to the same
105 : * set. A good option is to call this just before returning the resulting
106 : * set.
107 : */
108 : static Bitmapset *
109 : bms_copy_and_free(Bitmapset *a)
110 : {
111 : Bitmapset *c = bms_copy(a);
112 :
113 : bms_free(a);
114 : return c;
115 : }
116 : #endif
117 :
118 : /*
119 : * bms_copy - make a palloc'd copy of a bitmapset
120 : */
121 : Bitmapset *
122 55040754 : bms_copy(const Bitmapset *a)
123 : {
124 : Bitmapset *result;
125 : size_t size;
126 :
127 : Assert(bms_is_valid_set(a));
128 :
129 55040754 : if (a == NULL)
130 35905010 : return NULL;
131 :
132 19135744 : size = BITMAPSET_SIZE(a->nwords);
133 19135744 : result = (Bitmapset *) palloc(size);
134 19135744 : memcpy(result, a, size);
135 19135744 : return result;
136 : }
137 :
138 : /*
139 : * bms_equal - are two bitmapsets equal? or both NULL?
140 : */
141 : bool
142 18545966 : bms_equal(const Bitmapset *a, const Bitmapset *b)
143 : {
144 : int i;
145 :
146 : Assert(bms_is_valid_set(a));
147 : Assert(bms_is_valid_set(b));
148 :
149 : /* Handle cases where either input is NULL */
150 18545966 : if (a == NULL)
151 : {
152 12620442 : if (b == NULL)
153 12555770 : return true;
154 64672 : return false;
155 : }
156 5925524 : else if (b == NULL)
157 24466 : return false;
158 :
159 : /* can't be equal if the word counts don't match */
160 5901058 : if (a->nwords != b->nwords)
161 4 : return false;
162 :
163 : /* check each word matches */
164 5901054 : i = 0;
165 : do
166 : {
167 5901258 : if (a->words[i] != b->words[i])
168 2949360 : return false;
169 2951898 : } while (++i < a->nwords);
170 :
171 2951694 : return true;
172 : }
173 :
174 : /*
175 : * bms_compare - qsort-style comparator for bitmapsets
176 : *
177 : * This guarantees to report values as equal iff bms_equal would say they are
178 : * equal. Otherwise, the highest-numbered bit that is set in one value but
179 : * not the other determines the result. (This rule means that, for example,
180 : * {6} is greater than {5}, which seems plausible.)
181 : */
182 : int
183 25080 : bms_compare(const Bitmapset *a, const Bitmapset *b)
184 : {
185 : int i;
186 :
187 : Assert(bms_is_valid_set(a));
188 : Assert(bms_is_valid_set(b));
189 :
190 : /* Handle cases where either input is NULL */
191 25080 : if (a == NULL)
192 8 : return (b == NULL) ? 0 : -1;
193 25072 : else if (b == NULL)
194 4 : return +1;
195 :
196 : /* the set with the most words must be greater */
197 25068 : if (a->nwords != b->nwords)
198 2 : return (a->nwords > b->nwords) ? +1 : -1;
199 :
200 25066 : i = a->nwords - 1;
201 : do
202 : {
203 25066 : bitmapword aw = a->words[i];
204 25066 : bitmapword bw = b->words[i];
205 :
206 25066 : if (aw != bw)
207 25064 : return (aw > bw) ? +1 : -1;
208 2 : } while (--i >= 0);
209 2 : return 0;
210 : }
211 :
212 : /*
213 : * bms_make_singleton - build a bitmapset containing a single member
214 : */
215 : Bitmapset *
216 15786112 : bms_make_singleton(int x)
217 : {
218 : Bitmapset *result;
219 : int wordnum,
220 : bitnum;
221 :
222 15786112 : if (x < 0)
223 2 : elog(ERROR, "negative bitmapset member not allowed");
224 15786110 : wordnum = WORDNUM(x);
225 15786110 : bitnum = BITNUM(x);
226 15786110 : result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
227 15786110 : result->type = T_Bitmapset;
228 15786110 : result->nwords = wordnum + 1;
229 15786110 : result->words[wordnum] = ((bitmapword) 1 << bitnum);
230 15786110 : return result;
231 : }
232 :
233 : /*
234 : * bms_free - free a bitmapset
235 : *
236 : * Same as pfree except for allowing NULL input
237 : */
238 : void
239 21603694 : bms_free(Bitmapset *a)
240 : {
241 21603694 : if (a)
242 9509024 : pfree(a);
243 21603694 : }
244 :
245 :
246 : /*
247 : * bms_union - create and return a new set containing all members from both
248 : * input sets. Both inputs are left unmodified.
249 : */
250 : Bitmapset *
251 8566640 : bms_union(const Bitmapset *a, const Bitmapset *b)
252 : {
253 : Bitmapset *result;
254 : const Bitmapset *other;
255 : int otherlen;
256 : int i;
257 :
258 : Assert(bms_is_valid_set(a));
259 : Assert(bms_is_valid_set(b));
260 :
261 : /* Handle cases where either input is NULL */
262 8566640 : if (a == NULL)
263 5471558 : return bms_copy(b);
264 3095082 : if (b == NULL)
265 1318924 : return bms_copy(a);
266 : /* Identify shorter and longer input; copy the longer one */
267 1776158 : if (a->nwords <= b->nwords)
268 : {
269 1776156 : result = bms_copy(b);
270 1776156 : other = a;
271 : }
272 : else
273 : {
274 2 : result = bms_copy(a);
275 2 : other = b;
276 : }
277 : /* And union the shorter input into the result */
278 1776158 : otherlen = other->nwords;
279 1776158 : i = 0;
280 : do
281 : {
282 1778716 : result->words[i] |= other->words[i];
283 1778716 : } while (++i < otherlen);
284 1776158 : return result;
285 : }
286 :
287 : /*
288 : * bms_intersect - create and return a new set containing members which both
289 : * input sets have in common. Both inputs are left unmodified.
290 : */
291 : Bitmapset *
292 1329792 : bms_intersect(const Bitmapset *a, const Bitmapset *b)
293 : {
294 : Bitmapset *result;
295 : const Bitmapset *other;
296 : int lastnonzero;
297 : int resultlen;
298 : int i;
299 :
300 : Assert(bms_is_valid_set(a));
301 : Assert(bms_is_valid_set(b));
302 :
303 : /* Handle cases where either input is NULL */
304 1329792 : if (a == NULL || b == NULL)
305 220830 : return NULL;
306 :
307 : /* Identify shorter and longer input; copy the shorter one */
308 1108962 : if (a->nwords <= b->nwords)
309 : {
310 1108960 : result = bms_copy(a);
311 1108960 : other = b;
312 : }
313 : else
314 : {
315 2 : result = bms_copy(b);
316 2 : other = a;
317 : }
318 : /* And intersect the longer input with the result */
319 1108962 : resultlen = result->nwords;
320 1108962 : lastnonzero = -1;
321 1108962 : i = 0;
322 : do
323 : {
324 1111520 : result->words[i] &= other->words[i];
325 :
326 1111520 : if (result->words[i] != 0)
327 1100686 : lastnonzero = i;
328 1111520 : } while (++i < resultlen);
329 : /* If we computed an empty result, we must return NULL */
330 1108962 : if (lastnonzero == -1)
331 : {
332 8558 : pfree(result);
333 8558 : return NULL;
334 : }
335 :
336 : /* get rid of trailing zero words */
337 1100404 : result->nwords = lastnonzero + 1;
338 1100404 : return result;
339 : }
340 :
341 : /*
342 : * bms_difference - create and return a new set containing all the members of
343 : * 'a' without the members of 'b'.
344 : */
345 : Bitmapset *
346 2264794 : bms_difference(const Bitmapset *a, const Bitmapset *b)
347 : {
348 : Bitmapset *result;
349 : int i;
350 :
351 : Assert(bms_is_valid_set(a));
352 : Assert(bms_is_valid_set(b));
353 :
354 : /* Handle cases where either input is NULL */
355 2264794 : if (a == NULL)
356 611686 : return NULL;
357 1653108 : if (b == NULL)
358 1195302 : return bms_copy(a);
359 :
360 : /*
361 : * In Postgres' usage, an empty result is a very common case, so it's
362 : * worth optimizing for that by testing bms_nonempty_difference(). This
363 : * saves us a palloc/pfree cycle compared to checking after-the-fact.
364 : */
365 457806 : if (!bms_nonempty_difference(a, b))
366 136866 : return NULL;
367 :
368 : /* Copy the left input */
369 320940 : result = bms_copy(a);
370 :
371 : /* And remove b's bits from result */
372 320940 : if (result->nwords > b->nwords)
373 : {
374 : /*
375 : * We'll never need to remove trailing zero words when 'a' has more
376 : * words than 'b' as the additional words must be non-zero.
377 : */
378 4 : i = 0;
379 : do
380 : {
381 4 : result->words[i] &= ~b->words[i];
382 4 : } while (++i < b->nwords);
383 : }
384 : else
385 : {
386 320936 : int lastnonzero = -1;
387 :
388 : /* we may need to remove trailing zero words from the result. */
389 320936 : i = 0;
390 : do
391 : {
392 320938 : result->words[i] &= ~b->words[i];
393 :
394 : /* remember the last non-zero word */
395 320938 : if (result->words[i] != 0)
396 320936 : lastnonzero = i;
397 320938 : } while (++i < result->nwords);
398 :
399 : /* trim off trailing zero words */
400 320936 : result->nwords = lastnonzero + 1;
401 : }
402 : Assert(result->nwords != 0);
403 :
404 : /* Need not check for empty result, since we handled that case above */
405 320940 : return result;
406 : }
407 :
408 : /*
409 : * bms_is_subset - is A a subset of B?
410 : */
411 : bool
412 21270054 : bms_is_subset(const Bitmapset *a, const Bitmapset *b)
413 : {
414 : int i;
415 :
416 : Assert(bms_is_valid_set(a));
417 : Assert(bms_is_valid_set(b));
418 :
419 : /* Handle cases where either input is NULL */
420 21270054 : if (a == NULL)
421 1956198 : return true; /* empty set is a subset of anything */
422 19313856 : if (b == NULL)
423 420268 : return false;
424 :
425 : /* 'a' can't be a subset of 'b' if it contains more words */
426 18893588 : if (a->nwords > b->nwords)
427 4 : return false;
428 :
429 : /* Check all 'a' members are set in 'b' */
430 18893584 : i = 0;
431 : do
432 : {
433 18893584 : if ((a->words[i] & ~b->words[i]) != 0)
434 7288172 : return false;
435 11605412 : } while (++i < a->nwords);
436 11605412 : return true;
437 : }
438 :
439 : /*
440 : * bms_subset_compare - compare A and B for equality/subset relationships
441 : *
442 : * This is more efficient than testing bms_is_subset in both directions.
443 : */
444 : BMS_Comparison
445 2719250 : bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
446 : {
447 : BMS_Comparison result;
448 : int shortlen;
449 : int i;
450 :
451 : Assert(bms_is_valid_set(a));
452 : Assert(bms_is_valid_set(b));
453 :
454 : /* Handle cases where either input is NULL */
455 2719250 : if (a == NULL)
456 : {
457 2299846 : if (b == NULL)
458 2229556 : return BMS_EQUAL;
459 70290 : return BMS_SUBSET1;
460 : }
461 419404 : if (b == NULL)
462 201000 : return BMS_SUBSET2;
463 :
464 : /* Check common words */
465 218404 : result = BMS_EQUAL; /* status so far */
466 218404 : shortlen = Min(a->nwords, b->nwords);
467 218404 : i = 0;
468 : do
469 : {
470 218410 : bitmapword aword = a->words[i];
471 218410 : bitmapword bword = b->words[i];
472 :
473 218410 : if ((aword & ~bword) != 0)
474 : {
475 : /* a is not a subset of b */
476 58692 : if (result == BMS_SUBSET1)
477 4 : return BMS_DIFFERENT;
478 58688 : result = BMS_SUBSET2;
479 : }
480 218406 : if ((bword & ~aword) != 0)
481 : {
482 : /* b is not a subset of a */
483 59176 : if (result == BMS_SUBSET2)
484 54786 : return BMS_DIFFERENT;
485 4390 : result = BMS_SUBSET1;
486 : }
487 163620 : } while (++i < shortlen);
488 : /* Check extra words */
489 163614 : if (a->nwords > b->nwords)
490 : {
491 : /* if a has more words then a is not a subset of b */
492 4 : if (result == BMS_SUBSET1)
493 2 : return BMS_DIFFERENT;
494 2 : return BMS_SUBSET2;
495 : }
496 163610 : else if (a->nwords < b->nwords)
497 : {
498 : /* if b has more words then b is not a subset of a */
499 8 : if (result == BMS_SUBSET2)
500 4 : return BMS_DIFFERENT;
501 4 : return BMS_SUBSET1;
502 : }
503 163602 : return result;
504 : }
505 :
506 : /*
507 : * bms_is_member - is X a member of A?
508 : */
509 : bool
510 16230508 : bms_is_member(int x, const Bitmapset *a)
511 : {
512 : int wordnum,
513 : bitnum;
514 :
515 : Assert(bms_is_valid_set(a));
516 :
517 : /* XXX better to just return false for x<0 ? */
518 16230508 : if (x < 0)
519 2 : elog(ERROR, "negative bitmapset member not allowed");
520 16230506 : if (a == NULL)
521 9233996 : return false;
522 :
523 6996510 : wordnum = WORDNUM(x);
524 6996510 : bitnum = BITNUM(x);
525 6996510 : if (wordnum >= a->nwords)
526 780 : return false;
527 6995730 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
528 4980278 : return true;
529 2015452 : return false;
530 : }
531 :
532 : /*
533 : * bms_member_index
534 : * determine 0-based index of member x in the bitmap
535 : *
536 : * Returns (-1) when x is not a member.
537 : */
538 : int
539 5318 : bms_member_index(Bitmapset *a, int x)
540 : {
541 : int bitnum;
542 : int wordnum;
543 5318 : int result = 0;
544 : bitmapword mask;
545 :
546 : Assert(bms_is_valid_set(a));
547 :
548 : /* return -1 if not a member of the bitmap */
549 5318 : if (!bms_is_member(x, a))
550 4 : return -1;
551 :
552 5314 : wordnum = WORDNUM(x);
553 5314 : bitnum = BITNUM(x);
554 :
555 : /* count bits in preceding words */
556 5328 : for (int i = 0; i < wordnum; i++)
557 : {
558 14 : bitmapword w = a->words[i];
559 :
560 : /* No need to count the bits in a zero word */
561 14 : if (w != 0)
562 6 : result += bmw_popcount(w);
563 : }
564 :
565 : /*
566 : * Now add bits of the last word, but only those before the item. We can
567 : * do that by applying a mask and then using popcount again. To get
568 : * 0-based index, we want to count only preceding bits, not the item
569 : * itself, so we subtract 1.
570 : */
571 5314 : mask = ((bitmapword) 1 << bitnum) - 1;
572 5314 : result += bmw_popcount(a->words[wordnum] & mask);
573 :
574 5314 : return result;
575 : }
576 :
577 : /*
578 : * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
579 : */
580 : bool
581 14528002 : bms_overlap(const Bitmapset *a, const Bitmapset *b)
582 : {
583 : int shortlen;
584 : int i;
585 :
586 : Assert(bms_is_valid_set(a));
587 : Assert(bms_is_valid_set(b));
588 :
589 : /* Handle cases where either input is NULL */
590 14528002 : if (a == NULL || b == NULL)
591 6165298 : return false;
592 : /* Check words in common */
593 8362704 : shortlen = Min(a->nwords, b->nwords);
594 8362704 : i = 0;
595 : do
596 : {
597 8362704 : if ((a->words[i] & b->words[i]) != 0)
598 5377078 : return true;
599 2985626 : } while (++i < shortlen);
600 2985626 : return false;
601 : }
602 :
603 : /*
604 : * bms_overlap_list - does a set overlap an integer list?
605 : */
606 : bool
607 1604 : bms_overlap_list(const Bitmapset *a, const List *b)
608 : {
609 : ListCell *lc;
610 : int wordnum,
611 : bitnum;
612 :
613 : Assert(bms_is_valid_set(a));
614 :
615 1604 : if (a == NULL || b == NIL)
616 1452 : return false;
617 :
618 284 : foreach(lc, b)
619 : {
620 222 : int x = lfirst_int(lc);
621 :
622 222 : if (x < 0)
623 4 : elog(ERROR, "negative bitmapset member not allowed");
624 218 : wordnum = WORDNUM(x);
625 218 : bitnum = BITNUM(x);
626 218 : if (wordnum < a->nwords)
627 218 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
628 86 : return true;
629 : }
630 :
631 62 : return false;
632 : }
633 :
634 : /*
635 : * bms_nonempty_difference - do sets have a nonempty difference?
636 : *
637 : * i.e., are any members set in 'a' that are not also set in 'b'.
638 : */
639 : bool
640 3045516 : bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
641 : {
642 : int i;
643 :
644 : Assert(bms_is_valid_set(a));
645 : Assert(bms_is_valid_set(b));
646 :
647 : /* Handle cases where either input is NULL */
648 3045516 : if (a == NULL)
649 5372 : return false;
650 3040144 : if (b == NULL)
651 2 : return true;
652 : /* if 'a' has more words then it must contain additional members */
653 3040142 : if (a->nwords > b->nwords)
654 6 : return true;
655 : /* Check all 'a' members are set in 'b' */
656 3040136 : i = 0;
657 : do
658 : {
659 3040136 : if ((a->words[i] & ~b->words[i]) != 0)
660 1744182 : return true;
661 1295954 : } while (++i < a->nwords);
662 1295954 : return false;
663 : }
664 :
665 : /*
666 : * bms_singleton_member - return the sole integer member of set
667 : *
668 : * Raises error if |a| is not 1.
669 : */
670 : int
671 21816 : bms_singleton_member(const Bitmapset *a)
672 : {
673 21816 : int result = -1;
674 : int nwords;
675 : int wordnum;
676 :
677 : Assert(bms_is_valid_set(a));
678 :
679 21816 : if (a == NULL)
680 2 : elog(ERROR, "bitmapset is empty");
681 :
682 21814 : nwords = a->nwords;
683 21814 : wordnum = 0;
684 : do
685 : {
686 21814 : bitmapword w = a->words[wordnum];
687 :
688 21814 : if (w != 0)
689 : {
690 21814 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
691 2 : elog(ERROR, "bitmapset has multiple members");
692 21812 : result = wordnum * BITS_PER_BITMAPWORD;
693 21812 : result += bmw_rightmost_one_pos(w);
694 : }
695 21812 : } while (++wordnum < nwords);
696 :
697 : /* we don't expect non-NULL sets to be empty */
698 : Assert(result >= 0);
699 21812 : return result;
700 : }
701 :
702 : /*
703 : * bms_get_singleton_member
704 : *
705 : * Test whether the given set is a singleton.
706 : * If so, set *member to the value of its sole member, and return true.
707 : * If not, return false, without changing *member.
708 : *
709 : * This is more convenient and faster than calling bms_membership() and then
710 : * bms_singleton_member(), if we don't care about distinguishing empty sets
711 : * from multiple-member sets.
712 : */
713 : bool
714 2585456 : bms_get_singleton_member(const Bitmapset *a, int *member)
715 : {
716 2585456 : int result = -1;
717 : int nwords;
718 : int wordnum;
719 :
720 : Assert(bms_is_valid_set(a));
721 :
722 2585456 : if (a == NULL)
723 4 : return false;
724 :
725 2585452 : nwords = a->nwords;
726 2585452 : wordnum = 0;
727 : do
728 : {
729 2585464 : bitmapword w = a->words[wordnum];
730 :
731 2585464 : if (w != 0)
732 : {
733 2585452 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
734 463256 : return false;
735 2122196 : result = wordnum * BITS_PER_BITMAPWORD;
736 2122196 : result += bmw_rightmost_one_pos(w);
737 : }
738 2122208 : } while (++wordnum < nwords);
739 :
740 : /* we don't expect non-NULL sets to be empty */
741 : Assert(result >= 0);
742 2122196 : *member = result;
743 2122196 : return true;
744 : }
745 :
746 : /*
747 : * bms_num_members - count members of set
748 : */
749 : int
750 1951402 : bms_num_members(const Bitmapset *a)
751 : {
752 1951402 : int result = 0;
753 : int nwords;
754 : int wordnum;
755 :
756 : Assert(bms_is_valid_set(a));
757 :
758 1951402 : if (a == NULL)
759 210806 : return 0;
760 :
761 1740596 : nwords = a->nwords;
762 1740596 : wordnum = 0;
763 : do
764 : {
765 1740598 : bitmapword w = a->words[wordnum];
766 :
767 : /* No need to count the bits in a zero word */
768 1740598 : if (w != 0)
769 1740598 : result += bmw_popcount(w);
770 1740598 : } while (++wordnum < nwords);
771 1740596 : return result;
772 : }
773 :
774 : /*
775 : * bms_membership - does a set have zero, one, or multiple members?
776 : *
777 : * This is faster than making an exact count with bms_num_members().
778 : */
779 : BMS_Membership
780 1340062 : bms_membership(const Bitmapset *a)
781 : {
782 1340062 : BMS_Membership result = BMS_EMPTY_SET;
783 : int nwords;
784 : int wordnum;
785 :
786 : Assert(bms_is_valid_set(a));
787 :
788 1340062 : if (a == NULL)
789 486 : return BMS_EMPTY_SET;
790 :
791 1339576 : nwords = a->nwords;
792 1339576 : wordnum = 0;
793 : do
794 : {
795 1339576 : bitmapword w = a->words[wordnum];
796 :
797 1339576 : if (w != 0)
798 : {
799 1339576 : if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
800 476382 : return BMS_MULTIPLE;
801 863194 : result = BMS_SINGLETON;
802 : }
803 863194 : } while (++wordnum < nwords);
804 863194 : return result;
805 : }
806 :
807 :
808 : /*
809 : * bms_add_member - add a specified member to set
810 : *
811 : * 'a' is recycled when possible.
812 : */
813 : Bitmapset *
814 22198950 : bms_add_member(Bitmapset *a, int x)
815 : {
816 : int wordnum,
817 : bitnum;
818 :
819 : Assert(bms_is_valid_set(a));
820 :
821 22198950 : if (x < 0)
822 4 : elog(ERROR, "negative bitmapset member not allowed");
823 22198946 : if (a == NULL)
824 11556898 : return bms_make_singleton(x);
825 :
826 10642048 : wordnum = WORDNUM(x);
827 10642048 : bitnum = BITNUM(x);
828 :
829 : /* enlarge the set if necessary */
830 10642048 : if (wordnum >= a->nwords)
831 : {
832 872 : int oldnwords = a->nwords;
833 : int i;
834 :
835 872 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
836 872 : a->nwords = wordnum + 1;
837 : /* zero out the enlarged portion */
838 872 : i = oldnwords;
839 : do
840 : {
841 111690 : a->words[i] = 0;
842 111690 : } while (++i < a->nwords);
843 : }
844 :
845 10642048 : a->words[wordnum] |= ((bitmapword) 1 << bitnum);
846 :
847 : #ifdef REALLOCATE_BITMAPSETS
848 :
849 : /*
850 : * There's no guarantee that the repalloc returned a new pointer, so copy
851 : * and free unconditionally here.
852 : */
853 : a = bms_copy_and_free(a);
854 : #endif
855 :
856 10642048 : return a;
857 : }
858 :
859 : /*
860 : * bms_del_member - remove a specified member from set
861 : *
862 : * No error if x is not currently a member of set
863 : *
864 : * 'a' is recycled when possible.
865 : */
866 : Bitmapset *
867 1758490 : bms_del_member(Bitmapset *a, int x)
868 : {
869 : int wordnum,
870 : bitnum;
871 :
872 : Assert(bms_is_valid_set(a));
873 :
874 1758490 : if (x < 0)
875 2 : elog(ERROR, "negative bitmapset member not allowed");
876 1758488 : if (a == NULL)
877 639548 : return NULL;
878 :
879 1118940 : wordnum = WORDNUM(x);
880 1118940 : bitnum = BITNUM(x);
881 :
882 : #ifdef REALLOCATE_BITMAPSETS
883 : a = bms_copy_and_free(a);
884 : #endif
885 :
886 : /* member can't exist. Return 'a' unmodified */
887 1118940 : if (unlikely(wordnum >= a->nwords))
888 0 : return a;
889 :
890 1118940 : a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
891 :
892 : /* when last word becomes empty, trim off all trailing empty words */
893 1118940 : if (a->words[wordnum] == 0 && wordnum == a->nwords - 1)
894 : {
895 : /* find the last non-empty word and make that the new final word */
896 794526 : for (int i = wordnum - 1; i >= 0; i--)
897 : {
898 239758 : if (a->words[i] != 0)
899 : {
900 552 : a->nwords = i + 1;
901 552 : return a;
902 : }
903 : }
904 :
905 : /* the set is now empty */
906 554768 : pfree(a);
907 554768 : return NULL;
908 : }
909 563620 : return a;
910 : }
911 :
912 : /*
913 : * bms_add_members - like bms_union, but left input is recycled when possible
914 : */
915 : Bitmapset *
916 15071026 : bms_add_members(Bitmapset *a, const Bitmapset *b)
917 : {
918 : Bitmapset *result;
919 : const Bitmapset *other;
920 : int otherlen;
921 : int i;
922 :
923 : Assert(bms_is_valid_set(a));
924 : Assert(bms_is_valid_set(b));
925 :
926 : /* Handle cases where either input is NULL */
927 15071026 : if (a == NULL)
928 7866438 : return bms_copy(b);
929 7204588 : if (b == NULL)
930 : {
931 : #ifdef REALLOCATE_BITMAPSETS
932 : a = bms_copy_and_free(a);
933 : #endif
934 :
935 4638088 : return a;
936 : }
937 : /* Identify shorter and longer input; copy the longer one if needed */
938 2566500 : if (a->nwords < b->nwords)
939 : {
940 2 : result = bms_copy(b);
941 2 : other = a;
942 : }
943 : else
944 : {
945 2566498 : result = a;
946 2566498 : other = b;
947 : }
948 : /* And union the shorter input into the result */
949 2566500 : otherlen = other->nwords;
950 2566500 : i = 0;
951 : do
952 : {
953 2566500 : result->words[i] |= other->words[i];
954 2566500 : } while (++i < otherlen);
955 2566500 : if (result != a)
956 2 : pfree(a);
957 : #ifdef REALLOCATE_BITMAPSETS
958 : else
959 : result = bms_copy_and_free(result);
960 : #endif
961 :
962 2566500 : return result;
963 : }
964 :
965 : /*
966 : * bms_replace_members
967 : * Remove all existing members from 'a' and repopulate the set with members
968 : * from 'b', recycling 'a', when possible.
969 : */
970 : Bitmapset *
971 13494 : bms_replace_members(Bitmapset *a, const Bitmapset *b)
972 : {
973 : int i;
974 :
975 : Assert(bms_is_valid_set(a));
976 : Assert(bms_is_valid_set(b));
977 :
978 13494 : if (a == NULL)
979 2 : return bms_copy(b);
980 13492 : if (b == NULL)
981 : {
982 2 : pfree(a);
983 2 : return NULL;
984 : }
985 :
986 13490 : if (a->nwords < b->nwords)
987 2 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(b->nwords));
988 :
989 13490 : i = 0;
990 : do
991 : {
992 13508 : a->words[i] = b->words[i];
993 13508 : } while (++i < b->nwords);
994 :
995 13490 : a->nwords = b->nwords;
996 :
997 : #ifdef REALLOCATE_BITMAPSETS
998 :
999 : /*
1000 : * There's no guarantee that the repalloc returned a new pointer, so copy
1001 : * and free unconditionally here.
1002 : */
1003 : a = bms_copy_and_free(a);
1004 : #endif
1005 :
1006 13490 : return a;
1007 : }
1008 :
1009 : /*
1010 : * bms_add_range
1011 : * Add members in the range of 'lower' to 'upper' to the set.
1012 : *
1013 : * Note this could also be done by calling bms_add_member in a loop, however,
1014 : * using this function will be faster when the range is large as we work at
1015 : * the bitmapword level rather than at bit level.
1016 : */
1017 : Bitmapset *
1018 65220 : bms_add_range(Bitmapset *a, int lower, int upper)
1019 : {
1020 : int lwordnum,
1021 : lbitnum,
1022 : uwordnum,
1023 : ushiftbits,
1024 : wordnum;
1025 :
1026 : Assert(bms_is_valid_set(a));
1027 :
1028 : /* do nothing if nothing is called for, without further checking */
1029 65220 : if (upper < lower)
1030 : {
1031 : #ifdef REALLOCATE_BITMAPSETS
1032 : a = bms_copy_and_free(a);
1033 : #endif
1034 :
1035 30 : return a;
1036 : }
1037 :
1038 65190 : if (lower < 0)
1039 2 : elog(ERROR, "negative bitmapset member not allowed");
1040 65188 : uwordnum = WORDNUM(upper);
1041 :
1042 65188 : if (a == NULL)
1043 : {
1044 45142 : a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
1045 45142 : a->type = T_Bitmapset;
1046 45142 : a->nwords = uwordnum + 1;
1047 : }
1048 20046 : else if (uwordnum >= a->nwords)
1049 : {
1050 4 : int oldnwords = a->nwords;
1051 : int i;
1052 :
1053 : /* ensure we have enough words to store the upper bit */
1054 4 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
1055 4 : a->nwords = uwordnum + 1;
1056 : /* zero out the enlarged portion */
1057 4 : i = oldnwords;
1058 : do
1059 : {
1060 8 : a->words[i] = 0;
1061 8 : } while (++i < a->nwords);
1062 : }
1063 :
1064 65188 : wordnum = lwordnum = WORDNUM(lower);
1065 :
1066 65188 : lbitnum = BITNUM(lower);
1067 65188 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
1068 :
1069 : /*
1070 : * Special case when lwordnum is the same as uwordnum we must perform the
1071 : * upper and lower masking on the word.
1072 : */
1073 65188 : if (lwordnum == uwordnum)
1074 : {
1075 63278 : a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
1076 63278 : & (~(bitmapword) 0) >> ushiftbits;
1077 : }
1078 : else
1079 : {
1080 : /* turn on lbitnum and all bits left of it */
1081 1910 : a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
1082 :
1083 : /* turn on all bits for any intermediate words */
1084 1946 : while (wordnum < uwordnum)
1085 36 : a->words[wordnum++] = ~(bitmapword) 0;
1086 :
1087 : /* turn on upper's bit and all bits right of it. */
1088 1910 : a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
1089 : }
1090 :
1091 : #ifdef REALLOCATE_BITMAPSETS
1092 :
1093 : /*
1094 : * There's no guarantee that the repalloc returned a new pointer, so copy
1095 : * and free unconditionally here.
1096 : */
1097 : a = bms_copy_and_free(a);
1098 : #endif
1099 :
1100 65188 : return a;
1101 : }
1102 :
1103 : /*
1104 : * bms_int_members - like bms_intersect, but left input is recycled when
1105 : * possible
1106 : */
1107 : Bitmapset *
1108 714140 : bms_int_members(Bitmapset *a, const Bitmapset *b)
1109 : {
1110 : int lastnonzero;
1111 : int shortlen;
1112 : int i;
1113 :
1114 : Assert(bms_is_valid_set(a));
1115 : Assert(bms_is_valid_set(b));
1116 :
1117 : /* Handle cases where either input is NULL */
1118 714140 : if (a == NULL)
1119 27806 : return NULL;
1120 686334 : if (b == NULL)
1121 : {
1122 5228 : pfree(a);
1123 5228 : return NULL;
1124 : }
1125 :
1126 : /* Intersect b into a; we need never copy */
1127 681106 : shortlen = Min(a->nwords, b->nwords);
1128 681106 : lastnonzero = -1;
1129 681106 : i = 0;
1130 : do
1131 : {
1132 681108 : a->words[i] &= b->words[i];
1133 :
1134 681108 : if (a->words[i] != 0)
1135 563430 : lastnonzero = i;
1136 681108 : } while (++i < shortlen);
1137 :
1138 : /* If we computed an empty result, we must return NULL */
1139 681106 : if (lastnonzero == -1)
1140 : {
1141 117678 : pfree(a);
1142 117678 : return NULL;
1143 : }
1144 :
1145 : /* get rid of trailing zero words */
1146 563428 : a->nwords = lastnonzero + 1;
1147 :
1148 : #ifdef REALLOCATE_BITMAPSETS
1149 : a = bms_copy_and_free(a);
1150 : #endif
1151 :
1152 563428 : return a;
1153 : }
1154 :
1155 : /*
1156 : * bms_del_members - delete members in 'a' that are set in 'b'. 'a' is
1157 : * recycled when possible.
1158 : */
1159 : Bitmapset *
1160 2273616 : bms_del_members(Bitmapset *a, const Bitmapset *b)
1161 : {
1162 : int i;
1163 :
1164 : Assert(bms_is_valid_set(a));
1165 : Assert(bms_is_valid_set(b));
1166 :
1167 : /* Handle cases where either input is NULL */
1168 2273616 : if (a == NULL)
1169 969084 : return NULL;
1170 1304532 : if (b == NULL)
1171 : {
1172 : #ifdef REALLOCATE_BITMAPSETS
1173 : a = bms_copy_and_free(a);
1174 : #endif
1175 :
1176 203048 : return a;
1177 : }
1178 :
1179 : /* Remove b's bits from a; we need never copy */
1180 1101484 : if (a->nwords > b->nwords)
1181 : {
1182 : /*
1183 : * We'll never need to remove trailing zero words when 'a' has more
1184 : * words than 'b'.
1185 : */
1186 2 : i = 0;
1187 : do
1188 : {
1189 2 : a->words[i] &= ~b->words[i];
1190 2 : } while (++i < b->nwords);
1191 : }
1192 : else
1193 : {
1194 1101482 : int lastnonzero = -1;
1195 :
1196 : /* we may need to remove trailing zero words from the result. */
1197 1101482 : i = 0;
1198 : do
1199 : {
1200 1101498 : a->words[i] &= ~b->words[i];
1201 :
1202 : /* remember the last non-zero word */
1203 1101498 : if (a->words[i] != 0)
1204 238284 : lastnonzero = i;
1205 1101498 : } while (++i < a->nwords);
1206 :
1207 : /* check if 'a' has become empty */
1208 1101482 : if (lastnonzero == -1)
1209 : {
1210 863202 : pfree(a);
1211 863202 : return NULL;
1212 : }
1213 :
1214 : /* trim off any trailing zero words */
1215 238280 : a->nwords = lastnonzero + 1;
1216 : }
1217 :
1218 : #ifdef REALLOCATE_BITMAPSETS
1219 : a = bms_copy_and_free(a);
1220 : #endif
1221 :
1222 238282 : return a;
1223 : }
1224 :
1225 : /*
1226 : * bms_join - like bms_union, but *either* input *may* be recycled
1227 : */
1228 : Bitmapset *
1229 1795332 : bms_join(Bitmapset *a, Bitmapset *b)
1230 : {
1231 : Bitmapset *result;
1232 : Bitmapset *other;
1233 : int otherlen;
1234 : int i;
1235 :
1236 : Assert(bms_is_valid_set(a));
1237 : Assert(bms_is_valid_set(b));
1238 :
1239 : /* Handle cases where either input is NULL */
1240 1795332 : if (a == NULL)
1241 : {
1242 : #ifdef REALLOCATE_BITMAPSETS
1243 : b = bms_copy_and_free(b);
1244 : #endif
1245 :
1246 736222 : return b;
1247 : }
1248 1059110 : if (b == NULL)
1249 : {
1250 : #ifdef REALLOCATE_BITMAPSETS
1251 : a = bms_copy_and_free(a);
1252 : #endif
1253 :
1254 191160 : return a;
1255 : }
1256 :
1257 : /* Identify shorter and longer input; use longer one as result */
1258 867950 : if (a->nwords < b->nwords)
1259 : {
1260 4 : result = b;
1261 4 : other = a;
1262 : }
1263 : else
1264 : {
1265 867946 : result = a;
1266 867946 : other = b;
1267 : }
1268 : /* And union the shorter input into the result */
1269 867950 : otherlen = other->nwords;
1270 867950 : i = 0;
1271 : do
1272 : {
1273 867950 : result->words[i] |= other->words[i];
1274 867950 : } while (++i < otherlen);
1275 867950 : if (other != result) /* pure paranoia */
1276 867950 : pfree(other);
1277 :
1278 : #ifdef REALLOCATE_BITMAPSETS
1279 : result = bms_copy_and_free(result);
1280 : #endif
1281 :
1282 867950 : return result;
1283 : }
1284 :
1285 : /*
1286 : * bms_next_member - find next member of a set
1287 : *
1288 : * Returns smallest member greater than "prevbit", or -2 if there is none.
1289 : * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1290 : *
1291 : * This is intended as support for iterating through the members of a set.
1292 : * The typical pattern is
1293 : *
1294 : * x = -1;
1295 : * while ((x = bms_next_member(inputset, x)) >= 0)
1296 : * process member x;
1297 : *
1298 : * Notice that when there are no more members, we return -2, not -1 as you
1299 : * might expect. The rationale for that is to allow distinguishing the
1300 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1301 : * It makes no difference in simple loop usage, but complex iteration logic
1302 : * might need such an ability.
1303 : */
1304 : int
1305 23388830 : bms_next_member(const Bitmapset *a, int prevbit)
1306 : {
1307 : int nwords;
1308 : bitmapword mask;
1309 :
1310 : Assert(bms_is_valid_set(a));
1311 :
1312 23388830 : if (a == NULL)
1313 5528912 : return -2;
1314 17859918 : nwords = a->nwords;
1315 17859918 : prevbit++;
1316 17859918 : mask = (~(bitmapword) 0) << BITNUM(prevbit);
1317 23532238 : for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1318 : {
1319 17862620 : bitmapword w = a->words[wordnum];
1320 :
1321 : /* ignore bits before prevbit */
1322 17862620 : w &= mask;
1323 :
1324 17862620 : if (w != 0)
1325 : {
1326 : int result;
1327 :
1328 12190300 : result = wordnum * BITS_PER_BITMAPWORD;
1329 12190300 : result += bmw_rightmost_one_pos(w);
1330 12190300 : return result;
1331 : }
1332 :
1333 : /* in subsequent words, consider all bits */
1334 5672320 : mask = (~(bitmapword) 0);
1335 : }
1336 5669618 : return -2;
1337 : }
1338 :
1339 : /*
1340 : * bms_prev_member - find prev member of a set
1341 : *
1342 : * Returns largest member less than "prevbit", or -2 if there is none.
1343 : * "prevbit" must NOT be more than one above the highest possible bit that can
1344 : * be set in the Bitmapset at its current size.
1345 : *
1346 : * To ease finding the highest set bit for the initial loop, the special
1347 : * prevbit value of -1 can be passed to have the function find the highest
1348 : * valued member in the set.
1349 : *
1350 : * This is intended as support for iterating through the members of a set in
1351 : * reverse. The typical pattern is
1352 : *
1353 : * x = -1;
1354 : * while ((x = bms_prev_member(inputset, x)) >= 0)
1355 : * process member x;
1356 : *
1357 : * Notice that when there are no more members, we return -2, not -1 as you
1358 : * might expect. The rationale for that is to allow distinguishing the
1359 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1360 : * It makes no difference in simple loop usage, but complex iteration logic
1361 : * might need such an ability.
1362 : */
1363 :
1364 : int
1365 30 : bms_prev_member(const Bitmapset *a, int prevbit)
1366 : {
1367 : int ushiftbits;
1368 : bitmapword mask;
1369 :
1370 : Assert(bms_is_valid_set(a));
1371 :
1372 : /*
1373 : * If set is NULL or if there are no more bits to the right then we've
1374 : * nothing to do.
1375 : */
1376 30 : if (a == NULL || prevbit == 0)
1377 4 : return -2;
1378 :
1379 : /* Validate callers didn't give us something out of range */
1380 : Assert(prevbit <= a->nwords * BITS_PER_BITMAPWORD);
1381 : Assert(prevbit >= -1);
1382 :
1383 : /* transform -1 to the highest possible bit we could have set */
1384 26 : if (prevbit == -1)
1385 2 : prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1386 : else
1387 24 : prevbit--;
1388 :
1389 26 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1390 26 : mask = (~(bitmapword) 0) >> ushiftbits;
1391 34 : for (int wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1392 : {
1393 26 : bitmapword w = a->words[wordnum];
1394 :
1395 : /* mask out bits left of prevbit */
1396 26 : w &= mask;
1397 :
1398 26 : if (w != 0)
1399 : {
1400 : int result;
1401 :
1402 18 : result = wordnum * BITS_PER_BITMAPWORD;
1403 18 : result += bmw_leftmost_one_pos(w);
1404 18 : return result;
1405 : }
1406 :
1407 : /* in subsequent words, consider all bits */
1408 8 : mask = (~(bitmapword) 0);
1409 : }
1410 8 : return -2;
1411 : }
1412 :
1413 : /*
1414 : * bms_hash_value - compute a hash key for a Bitmapset
1415 : */
1416 : uint32
1417 7126 : bms_hash_value(const Bitmapset *a)
1418 : {
1419 : Assert(bms_is_valid_set(a));
1420 :
1421 7126 : if (a == NULL)
1422 8 : return 0; /* All empty sets hash to 0 */
1423 7118 : return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1424 7118 : a->nwords * sizeof(bitmapword)));
1425 : }
1426 :
1427 : /*
1428 : * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1429 : *
1430 : * Note: don't forget to specify bitmap_match as the match function!
1431 : */
1432 : uint32
1433 7112 : bitmap_hash(const void *key, Size keysize)
1434 : {
1435 : Assert(keysize == sizeof(Bitmapset *));
1436 7112 : return bms_hash_value(*((const Bitmapset *const *) key));
1437 : }
1438 :
1439 : /*
1440 : * bitmap_match - match function to use with bitmap_hash
1441 : */
1442 : int
1443 4182 : bitmap_match(const void *key1, const void *key2, Size keysize)
1444 : {
1445 : Assert(keysize == sizeof(Bitmapset *));
1446 4182 : return !bms_equal(*((const Bitmapset *const *) key1),
1447 : *((const Bitmapset *const *) key2));
1448 : }
|