Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bitmapset.c
4 : * PostgreSQL generic bitmap set package
5 : *
6 : * A bitmap set can represent any set of nonnegative integers, although
7 : * it is mainly intended for sets where the maximum value is not large,
8 : * say at most a few hundred. By convention, we always represent a set with
9 : * the minimum possible number of words, i.e, there are never any trailing
10 : * zero words. Enforcing this requires that an empty set is represented as
11 : * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
12 : * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
13 : * speed up various loops over the Bitmapset's words array by using "do while"
14 : * loops instead of "for" loops. This means the code does not waste time
15 : * checking the loop condition before the first iteration. For Bitmapsets
16 : * containing only a single word (likely the majority of them) this halves the
17 : * number of loop condition checks.
18 : *
19 : *
20 : * Copyright (c) 2003-2023, PostgreSQL Global Development Group
21 : *
22 : * IDENTIFICATION
23 : * src/backend/nodes/bitmapset.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "postgres.h"
28 :
29 : #include "common/hashfn.h"
30 : #include "nodes/bitmapset.h"
31 : #include "nodes/pg_list.h"
32 : #include "port/pg_bitutils.h"
33 :
34 :
35 : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
36 : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
37 :
38 : #define BITMAPSET_SIZE(nwords) \
39 : (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
40 :
41 : /*----------
42 : * This is a well-known cute trick for isolating the rightmost one-bit
43 : * in a word. It assumes two's complement arithmetic. Consider any
44 : * nonzero value, and focus attention on the rightmost one. The value is
45 : * then something like
46 : * xxxxxx10000
47 : * where x's are unspecified bits. The two's complement negative is formed
48 : * by inverting all the bits and adding one. Inversion gives
49 : * yyyyyy01111
50 : * where each y is the inverse of the corresponding x. Incrementing gives
51 : * yyyyyy10000
52 : * and then ANDing with the original value gives
53 : * 00000010000
54 : * This works for all cases except original value = zero, where of course
55 : * we get zero.
56 : *----------
57 : */
58 : #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
59 :
60 : #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
61 :
62 : /* Select appropriate bit-twiddling functions for bitmap word size */
63 : #if BITS_PER_BITMAPWORD == 32
64 : #define bmw_leftmost_one_pos(w) pg_leftmost_one_pos32(w)
65 : #define bmw_rightmost_one_pos(w) pg_rightmost_one_pos32(w)
66 : #define bmw_popcount(w) pg_popcount32(w)
67 : #elif BITS_PER_BITMAPWORD == 64
68 : #define bmw_leftmost_one_pos(w) pg_leftmost_one_pos64(w)
69 : #define bmw_rightmost_one_pos(w) pg_rightmost_one_pos64(w)
70 : #define bmw_popcount(w) pg_popcount64(w)
71 : #else
72 : #error "invalid BITS_PER_BITMAPWORD"
73 : #endif
74 :
75 :
76 : /*
77 : * bms_copy - make a palloc'd copy of a bitmapset
78 : */
79 : Bitmapset *
80 38455036 : bms_copy(const Bitmapset *a)
81 : {
82 : Bitmapset *result;
83 : size_t size;
84 :
85 38455036 : if (a == NULL)
86 24995858 : return NULL;
87 13459178 : size = BITMAPSET_SIZE(a->nwords);
88 13459178 : result = (Bitmapset *) palloc(size);
89 13459178 : memcpy(result, a, size);
90 13459178 : return result;
91 : }
92 :
93 : /*
94 : * bms_equal - are two bitmapsets equal? or both NULL?
95 : */
96 : bool
97 18886934 : bms_equal(const Bitmapset *a, const Bitmapset *b)
98 : {
99 : int i;
100 :
101 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
102 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
103 :
104 : /* Handle cases where either input is NULL */
105 18886934 : if (a == NULL)
106 : {
107 12931588 : if (b == NULL)
108 12861288 : return true;
109 70300 : return false;
110 : }
111 5955346 : else if (b == NULL)
112 20738 : return false;
113 :
114 : /* can't be equal if the word counts don't match */
115 5934608 : if (a->nwords != b->nwords)
116 0 : return false;
117 :
118 : /* check each word matches */
119 5934608 : i = 0;
120 : do
121 : {
122 5935016 : if (a->words[i] != b->words[i])
123 2228432 : return false;
124 3706584 : } while (++i < a->nwords);
125 :
126 3706176 : return true;
127 : }
128 :
129 : /*
130 : * bms_compare - qsort-style comparator for bitmapsets
131 : *
132 : * This guarantees to report values as equal iff bms_equal would say they are
133 : * equal. Otherwise, the highest-numbered bit that is set in one value but
134 : * not the other determines the result. (This rule means that, for example,
135 : * {6} is greater than {5}, which seems plausible.)
136 : */
137 : int
138 19958 : bms_compare(const Bitmapset *a, const Bitmapset *b)
139 : {
140 : int i;
141 :
142 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
143 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
144 :
145 : /* Handle cases where either input is NULL */
146 19958 : if (a == NULL)
147 0 : return (b == NULL) ? 0 : -1;
148 19958 : else if (b == NULL)
149 0 : return +1;
150 :
151 : /* the set with the most words must be greater */
152 19958 : if (a->nwords != b->nwords)
153 0 : return (a->nwords > b->nwords) ? +1 : -1;
154 :
155 19958 : i = a->nwords - 1;
156 : do
157 : {
158 19958 : bitmapword aw = a->words[i];
159 19958 : bitmapword bw = b->words[i];
160 :
161 19958 : if (aw != bw)
162 19958 : return (aw > bw) ? +1 : -1;
163 0 : } while (--i >= 0);
164 0 : return 0;
165 : }
166 :
167 : /*
168 : * bms_make_singleton - build a bitmapset containing a single member
169 : */
170 : Bitmapset *
171 11693138 : bms_make_singleton(int x)
172 : {
173 : Bitmapset *result;
174 : int wordnum,
175 : bitnum;
176 :
177 11693138 : if (x < 0)
178 0 : elog(ERROR, "negative bitmapset member not allowed");
179 11693138 : wordnum = WORDNUM(x);
180 11693138 : bitnum = BITNUM(x);
181 11693138 : result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
182 11693138 : result->type = T_Bitmapset;
183 11693138 : result->nwords = wordnum + 1;
184 11693138 : result->words[wordnum] = ((bitmapword) 1 << bitnum);
185 11693138 : return result;
186 : }
187 :
188 : /*
189 : * bms_free - free a bitmapset
190 : *
191 : * Same as pfree except for allowing NULL input
192 : */
193 : void
194 14917576 : bms_free(Bitmapset *a)
195 : {
196 14917576 : if (a)
197 6509996 : pfree(a);
198 14917576 : }
199 :
200 :
201 : /*
202 : * These operations all make a freshly palloc'd result,
203 : * leaving their inputs untouched
204 : */
205 :
206 :
207 : /*
208 : * bms_union - set union
209 : */
210 : Bitmapset *
211 6353136 : bms_union(const Bitmapset *a, const Bitmapset *b)
212 : {
213 : Bitmapset *result;
214 : const Bitmapset *other;
215 : int otherlen;
216 : int i;
217 :
218 : /* Handle cases where either input is NULL */
219 6353136 : if (a == NULL)
220 4056130 : return bms_copy(b);
221 2297006 : if (b == NULL)
222 1030252 : return bms_copy(a);
223 : /* Identify shorter and longer input; copy the longer one */
224 1266754 : if (a->nwords <= b->nwords)
225 : {
226 1266754 : result = bms_copy(b);
227 1266754 : other = a;
228 : }
229 : else
230 : {
231 0 : result = bms_copy(a);
232 0 : other = b;
233 : }
234 : /* And union the shorter input into the result */
235 1266754 : otherlen = other->nwords;
236 1266754 : i = 0;
237 : do
238 : {
239 1266754 : result->words[i] |= other->words[i];
240 1266754 : } while (++i < otherlen);
241 1266754 : return result;
242 : }
243 :
244 : /*
245 : * bms_intersect - set intersection
246 : */
247 : Bitmapset *
248 836152 : bms_intersect(const Bitmapset *a, const Bitmapset *b)
249 : {
250 : Bitmapset *result;
251 : const Bitmapset *other;
252 : int lastnonzero;
253 : int resultlen;
254 : int i;
255 :
256 : /* Handle cases where either input is NULL */
257 836152 : if (a == NULL || b == NULL)
258 170870 : return NULL;
259 : /* Identify shorter and longer input; copy the shorter one */
260 665282 : if (a->nwords <= b->nwords)
261 : {
262 665282 : result = bms_copy(a);
263 665282 : other = b;
264 : }
265 : else
266 : {
267 0 : result = bms_copy(b);
268 0 : other = a;
269 : }
270 : /* And intersect the longer input with the result */
271 665282 : resultlen = result->nwords;
272 665282 : lastnonzero = -1;
273 665282 : i = 0;
274 : do
275 : {
276 665282 : result->words[i] &= other->words[i];
277 :
278 665282 : if (result->words[i] != 0)
279 657788 : lastnonzero = i;
280 665282 : } while (++i < resultlen);
281 : /* If we computed an empty result, we must return NULL */
282 665282 : if (lastnonzero == -1)
283 : {
284 7494 : pfree(result);
285 7494 : return NULL;
286 : }
287 :
288 : /* get rid of trailing zero words */
289 657788 : result->nwords = lastnonzero + 1;
290 657788 : return result;
291 : }
292 :
293 : /*
294 : * bms_difference - set difference (ie, A without members of B)
295 : */
296 : Bitmapset *
297 677042 : bms_difference(const Bitmapset *a, const Bitmapset *b)
298 : {
299 : Bitmapset *result;
300 : int i;
301 :
302 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
303 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
304 :
305 : /* Handle cases where either input is NULL */
306 677042 : if (a == NULL)
307 10878 : return NULL;
308 666164 : if (b == NULL)
309 429136 : return bms_copy(a);
310 :
311 : /*
312 : * In Postgres' usage, an empty result is a very common case, so it's
313 : * worth optimizing for that by testing bms_nonempty_difference(). This
314 : * saves us a palloc/pfree cycle compared to checking after-the-fact.
315 : */
316 237028 : if (!bms_nonempty_difference(a, b))
317 88444 : return NULL;
318 :
319 : /* Copy the left input */
320 148584 : result = bms_copy(a);
321 :
322 : /* And remove b's bits from result */
323 148584 : if (result->nwords > b->nwords)
324 : {
325 : /*
326 : * We'll never need to remove trailing zero words when 'a' has more
327 : * words than 'b' as the additional words must be non-zero.
328 : */
329 0 : i = 0;
330 : do
331 : {
332 0 : result->words[i] &= ~b->words[i];
333 0 : } while (++i < b->nwords);
334 : }
335 : else
336 : {
337 148584 : int lastnonzero = -1;
338 :
339 : /* we may need to remove trailing zero words from the result. */
340 148584 : i = 0;
341 : do
342 : {
343 148584 : result->words[i] &= ~b->words[i];
344 :
345 : /* remember the last non-zero word */
346 148584 : if (result->words[i] != 0)
347 148584 : lastnonzero = i;
348 148584 : } while (++i < result->nwords);
349 :
350 : /* trim off trailing zero words */
351 148584 : result->nwords = lastnonzero + 1;
352 : }
353 : Assert(result->nwords != 0);
354 :
355 : /* Need not check for empty result, since we handled that case above */
356 148584 : return result;
357 : }
358 :
359 : /*
360 : * bms_is_subset - is A a subset of B?
361 : */
362 : bool
363 14780514 : bms_is_subset(const Bitmapset *a, const Bitmapset *b)
364 : {
365 : int i;
366 :
367 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
368 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
369 :
370 : /* Handle cases where either input is NULL */
371 14780514 : if (a == NULL)
372 1503664 : return true; /* empty set is a subset of anything */
373 13276850 : if (b == NULL)
374 306606 : return false;
375 :
376 : /* 'a' can't be a subset of 'b' if it contains more words */
377 12970244 : if (a->nwords > b->nwords)
378 0 : return false;
379 :
380 : /* Check all 'a' members are set in 'b' */
381 12970244 : i = 0;
382 : do
383 : {
384 12970244 : if ((a->words[i] & ~b->words[i]) != 0)
385 5136612 : return false;
386 7833632 : } while (++i < a->nwords);
387 7833632 : return true;
388 : }
389 :
390 : /*
391 : * bms_subset_compare - compare A and B for equality/subset relationships
392 : *
393 : * This is more efficient than testing bms_is_subset in both directions.
394 : */
395 : BMS_Comparison
396 1806240 : bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
397 : {
398 : BMS_Comparison result;
399 : int shortlen;
400 : int i;
401 :
402 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
403 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
404 :
405 : /* Handle cases where either input is NULL */
406 1806240 : if (a == NULL)
407 : {
408 1526522 : if (b == NULL)
409 1461294 : return BMS_EQUAL;
410 65228 : return BMS_SUBSET1;
411 : }
412 279718 : if (b == NULL)
413 134252 : return BMS_SUBSET2;
414 : /* Check common words */
415 145466 : result = BMS_EQUAL; /* status so far */
416 145466 : shortlen = Min(a->nwords, b->nwords);
417 145466 : i = 0;
418 : do
419 : {
420 145466 : bitmapword aword = a->words[i];
421 145466 : bitmapword bword = b->words[i];
422 :
423 145466 : if ((aword & ~bword) != 0)
424 : {
425 : /* a is not a subset of b */
426 32844 : if (result == BMS_SUBSET1)
427 0 : return BMS_DIFFERENT;
428 32844 : result = BMS_SUBSET2;
429 : }
430 145466 : if ((bword & ~aword) != 0)
431 : {
432 : /* b is not a subset of a */
433 33018 : if (result == BMS_SUBSET2)
434 30592 : return BMS_DIFFERENT;
435 2426 : result = BMS_SUBSET1;
436 : }
437 114874 : } while (++i < shortlen);
438 : /* Check extra words */
439 114874 : if (a->nwords > b->nwords)
440 : {
441 : /* if a has more words then a is not a subset of b */
442 0 : if (result == BMS_SUBSET1)
443 0 : return BMS_DIFFERENT;
444 0 : return BMS_SUBSET2;
445 : }
446 114874 : else if (a->nwords < b->nwords)
447 : {
448 : /* if b has more words then b is not a subset of a */
449 0 : if (result == BMS_SUBSET2)
450 0 : return BMS_DIFFERENT;
451 0 : return BMS_SUBSET1;
452 : }
453 114874 : return result;
454 : }
455 :
456 : /*
457 : * bms_is_member - is X a member of A?
458 : */
459 : bool
460 11175306 : bms_is_member(int x, const Bitmapset *a)
461 : {
462 : int wordnum,
463 : bitnum;
464 :
465 : /* XXX better to just return false for x<0 ? */
466 11175306 : if (x < 0)
467 0 : elog(ERROR, "negative bitmapset member not allowed");
468 11175306 : if (a == NULL)
469 6655766 : return false;
470 4519540 : wordnum = WORDNUM(x);
471 4519540 : bitnum = BITNUM(x);
472 4519540 : if (wordnum >= a->nwords)
473 226 : return false;
474 4519314 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
475 3046196 : return true;
476 1473118 : return false;
477 : }
478 :
479 : /*
480 : * bms_member_index
481 : * determine 0-based index of member x in the bitmap
482 : *
483 : * Returns (-1) when x is not a member.
484 : */
485 : int
486 4110 : bms_member_index(Bitmapset *a, int x)
487 : {
488 : int i;
489 : int bitnum;
490 : int wordnum;
491 4110 : int result = 0;
492 : bitmapword mask;
493 :
494 : /* return -1 if not a member of the bitmap */
495 4110 : if (!bms_is_member(x, a))
496 0 : return -1;
497 :
498 4110 : wordnum = WORDNUM(x);
499 4110 : bitnum = BITNUM(x);
500 :
501 : /* count bits in preceding words */
502 4110 : for (i = 0; i < wordnum; i++)
503 : {
504 0 : bitmapword w = a->words[i];
505 :
506 : /* No need to count the bits in a zero word */
507 0 : if (w != 0)
508 0 : result += bmw_popcount(w);
509 : }
510 :
511 : /*
512 : * Now add bits of the last word, but only those before the item. We can
513 : * do that by applying a mask and then using popcount again. To get
514 : * 0-based index, we want to count only preceding bits, not the item
515 : * itself, so we subtract 1.
516 : */
517 4110 : mask = ((bitmapword) 1 << bitnum) - 1;
518 4110 : result += bmw_popcount(a->words[wordnum] & mask);
519 :
520 4110 : return result;
521 : }
522 :
523 : /*
524 : * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
525 : */
526 : bool
527 9819294 : bms_overlap(const Bitmapset *a, const Bitmapset *b)
528 : {
529 : int shortlen;
530 : int i;
531 :
532 : /* Handle cases where either input is NULL */
533 9819294 : if (a == NULL || b == NULL)
534 3786136 : return false;
535 : /* Check words in common */
536 6033158 : shortlen = Min(a->nwords, b->nwords);
537 6033158 : i = 0;
538 : do
539 : {
540 6033158 : if ((a->words[i] & b->words[i]) != 0)
541 3889952 : return true;
542 2143206 : } while (++i < shortlen);
543 2143206 : return false;
544 : }
545 :
546 : /*
547 : * bms_overlap_list - does a set overlap an integer list?
548 : */
549 : bool
550 1306 : bms_overlap_list(const Bitmapset *a, const List *b)
551 : {
552 : ListCell *lc;
553 : int wordnum,
554 : bitnum;
555 :
556 1306 : if (a == NULL || b == NIL)
557 1168 : return false;
558 :
559 258 : foreach(lc, b)
560 : {
561 198 : int x = lfirst_int(lc);
562 :
563 198 : if (x < 0)
564 0 : elog(ERROR, "negative bitmapset member not allowed");
565 198 : wordnum = WORDNUM(x);
566 198 : bitnum = BITNUM(x);
567 198 : if (wordnum < a->nwords)
568 198 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
569 78 : return true;
570 : }
571 :
572 60 : return false;
573 : }
574 :
575 : /*
576 : * bms_nonempty_difference - do sets have a nonempty difference?
577 : *
578 : * i.e., are any members set in 'a' that are not also set in 'b'.
579 : */
580 : bool
581 1877928 : bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
582 : {
583 : int i;
584 :
585 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
586 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
587 :
588 : /* Handle cases where either input is NULL */
589 1877928 : if (a == NULL)
590 72 : return false;
591 1877856 : if (b == NULL)
592 0 : return true;
593 : /* if 'a' has more words then it must contain additional members */
594 1877856 : if (a->nwords > b->nwords)
595 0 : return true;
596 : /* Check all 'a' members are set in 'b' */
597 1877856 : i = 0;
598 : do
599 : {
600 1877856 : if ((a->words[i] & ~b->words[i]) != 0)
601 1085204 : return true;
602 792652 : } while (++i < a->nwords);
603 792652 : return false;
604 : }
605 :
606 : /*
607 : * bms_singleton_member - return the sole integer member of set
608 : *
609 : * Raises error if |a| is not 1.
610 : */
611 : int
612 14822 : bms_singleton_member(const Bitmapset *a)
613 : {
614 14822 : int result = -1;
615 : int nwords;
616 : int wordnum;
617 :
618 14822 : if (a == NULL)
619 0 : elog(ERROR, "bitmapset is empty");
620 14822 : nwords = a->nwords;
621 14822 : wordnum = 0;
622 : do
623 : {
624 14822 : bitmapword w = a->words[wordnum];
625 :
626 14822 : if (w != 0)
627 : {
628 14822 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
629 0 : elog(ERROR, "bitmapset has multiple members");
630 14822 : result = wordnum * BITS_PER_BITMAPWORD;
631 14822 : result += bmw_rightmost_one_pos(w);
632 : }
633 14822 : } while (++wordnum < nwords);
634 :
635 : /* we don't expect non-NULL sets to be empty */
636 : Assert(result >= 0);
637 14822 : return result;
638 : }
639 :
640 : /*
641 : * bms_get_singleton_member
642 : *
643 : * Test whether the given set is a singleton.
644 : * If so, set *member to the value of its sole member, and return true.
645 : * If not, return false, without changing *member.
646 : *
647 : * This is more convenient and faster than calling bms_membership() and then
648 : * bms_singleton_member(), if we don't care about distinguishing empty sets
649 : * from multiple-member sets.
650 : */
651 : bool
652 1745826 : bms_get_singleton_member(const Bitmapset *a, int *member)
653 : {
654 1745826 : int result = -1;
655 : int nwords;
656 : int wordnum;
657 :
658 1745826 : if (a == NULL)
659 0 : return false;
660 1745826 : nwords = a->nwords;
661 1745826 : wordnum = 0;
662 : do
663 : {
664 1745826 : bitmapword w = a->words[wordnum];
665 :
666 1745826 : if (w != 0)
667 : {
668 1745826 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
669 298014 : return false;
670 1447812 : result = wordnum * BITS_PER_BITMAPWORD;
671 1447812 : result += bmw_rightmost_one_pos(w);
672 : }
673 1447812 : } while (++wordnum < nwords);
674 :
675 : /* we don't expect non-NULL sets to be empty */
676 : Assert(result >= 0);
677 1447812 : *member = result;
678 1447812 : return true;
679 : }
680 :
681 : /*
682 : * bms_num_members - count members of set
683 : */
684 : int
685 1517208 : bms_num_members(const Bitmapset *a)
686 : {
687 1517208 : int result = 0;
688 : int nwords;
689 : int wordnum;
690 :
691 1517208 : if (a == NULL)
692 140756 : return 0;
693 1376452 : nwords = a->nwords;
694 1376452 : wordnum = 0;
695 : do
696 : {
697 1376452 : bitmapword w = a->words[wordnum];
698 :
699 : /* No need to count the bits in a zero word */
700 1376452 : if (w != 0)
701 1376452 : result += bmw_popcount(w);
702 1376452 : } while (++wordnum < nwords);
703 1376452 : return result;
704 : }
705 :
706 : /*
707 : * bms_membership - does a set have zero, one, or multiple members?
708 : *
709 : * This is faster than making an exact count with bms_num_members().
710 : */
711 : BMS_Membership
712 936542 : bms_membership(const Bitmapset *a)
713 : {
714 936542 : BMS_Membership result = BMS_EMPTY_SET;
715 : int nwords;
716 : int wordnum;
717 :
718 936542 : if (a == NULL)
719 464 : return BMS_EMPTY_SET;
720 936078 : nwords = a->nwords;
721 936078 : wordnum = 0;
722 : do
723 : {
724 936078 : bitmapword w = a->words[wordnum];
725 :
726 936078 : if (w != 0)
727 : {
728 936078 : if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
729 297032 : return BMS_MULTIPLE;
730 639046 : result = BMS_SINGLETON;
731 : }
732 639046 : } while (++wordnum < nwords);
733 639046 : return result;
734 : }
735 :
736 :
737 : /*
738 : * These operations all "recycle" their non-const inputs, ie, either
739 : * return the modified input or pfree it if it can't hold the result.
740 : *
741 : * These should generally be used in the style
742 : *
743 : * foo = bms_add_member(foo, x);
744 : */
745 :
746 :
747 : /*
748 : * bms_add_member - add a specified member to set
749 : *
750 : * Input set is modified or recycled!
751 : */
752 : Bitmapset *
753 16525988 : bms_add_member(Bitmapset *a, int x)
754 : {
755 : int wordnum,
756 : bitnum;
757 :
758 16525988 : if (x < 0)
759 0 : elog(ERROR, "negative bitmapset member not allowed");
760 16525988 : if (a == NULL)
761 9903480 : return bms_make_singleton(x);
762 6622508 : wordnum = WORDNUM(x);
763 6622508 : bitnum = BITNUM(x);
764 :
765 : /* enlarge the set if necessary */
766 6622508 : if (wordnum >= a->nwords)
767 : {
768 204 : int oldnwords = a->nwords;
769 : int i;
770 :
771 204 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
772 204 : a->nwords = wordnum + 1;
773 : /* zero out the enlarged portion */
774 204 : i = oldnwords;
775 : do
776 : {
777 876 : a->words[i] = 0;
778 876 : } while (++i < a->nwords);
779 : }
780 :
781 6622508 : a->words[wordnum] |= ((bitmapword) 1 << bitnum);
782 6622508 : return a;
783 : }
784 :
785 : /*
786 : * bms_del_member - remove a specified member from set
787 : *
788 : * No error if x is not currently a member of set
789 : *
790 : * Input set is modified in-place!
791 : */
792 : Bitmapset *
793 2029236 : bms_del_member(Bitmapset *a, int x)
794 : {
795 : int wordnum,
796 : bitnum;
797 :
798 2029236 : if (x < 0)
799 0 : elog(ERROR, "negative bitmapset member not allowed");
800 2029236 : if (a == NULL)
801 1048860 : return NULL;
802 980376 : wordnum = WORDNUM(x);
803 980376 : bitnum = BITNUM(x);
804 :
805 : /* member can't exist. Return 'a' unmodified */
806 980376 : if (unlikely(wordnum >= a->nwords))
807 0 : return a;
808 :
809 980376 : a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
810 :
811 : /* when last word becomes empty, trim off all trailing empty words */
812 980376 : if (a->words[wordnum] == 0 && wordnum == a->nwords - 1)
813 : {
814 : /* find the last non-empty word and make that the new final word */
815 456288 : for (int i = wordnum - 1; i >= 0; i--)
816 : {
817 0 : if (a->words[i] != 0)
818 : {
819 0 : a->nwords = i + 1;
820 0 : return a;
821 : }
822 : }
823 :
824 : /* the set is now empty */
825 456288 : pfree(a);
826 456288 : return NULL;
827 : }
828 524088 : return a;
829 : }
830 :
831 : /*
832 : * bms_add_members - like bms_union, but left input is recycled
833 : */
834 : Bitmapset *
835 11134240 : bms_add_members(Bitmapset *a, const Bitmapset *b)
836 : {
837 : Bitmapset *result;
838 : const Bitmapset *other;
839 : int otherlen;
840 : int i;
841 :
842 : /* Handle cases where either input is NULL */
843 11134240 : if (a == NULL)
844 5854040 : return bms_copy(b);
845 5280200 : if (b == NULL)
846 3340534 : return a;
847 : /* Identify shorter and longer input; copy the longer one if needed */
848 1939666 : if (a->nwords < b->nwords)
849 : {
850 0 : result = bms_copy(b);
851 0 : other = a;
852 : }
853 : else
854 : {
855 1939666 : result = a;
856 1939666 : other = b;
857 : }
858 : /* And union the shorter input into the result */
859 1939666 : otherlen = other->nwords;
860 1939666 : i = 0;
861 : do
862 : {
863 1939666 : result->words[i] |= other->words[i];
864 1939666 : } while (++i < otherlen);
865 1939666 : if (result != a)
866 0 : pfree(a);
867 1939666 : return result;
868 : }
869 :
870 : /*
871 : * bms_add_range
872 : * Add members in the range of 'lower' to 'upper' to the set.
873 : *
874 : * Note this could also be done by calling bms_add_member in a loop, however,
875 : * using this function will be faster when the range is large as we work at
876 : * the bitmapword level rather than at bit level.
877 : */
878 : Bitmapset *
879 25378 : bms_add_range(Bitmapset *a, int lower, int upper)
880 : {
881 : int lwordnum,
882 : lbitnum,
883 : uwordnum,
884 : ushiftbits,
885 : wordnum;
886 :
887 : /* do nothing if nothing is called for, without further checking */
888 25378 : if (upper < lower)
889 28 : return a;
890 :
891 25350 : if (lower < 0)
892 0 : elog(ERROR, "negative bitmapset member not allowed");
893 25350 : uwordnum = WORDNUM(upper);
894 :
895 25350 : if (a == NULL)
896 : {
897 25350 : a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
898 25350 : a->type = T_Bitmapset;
899 25350 : a->nwords = uwordnum + 1;
900 : }
901 0 : else if (uwordnum >= a->nwords)
902 : {
903 0 : int oldnwords = a->nwords;
904 : int i;
905 :
906 : /* ensure we have enough words to store the upper bit */
907 0 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
908 0 : a->nwords = uwordnum + 1;
909 : /* zero out the enlarged portion */
910 0 : i = oldnwords;
911 : do
912 : {
913 0 : a->words[i] = 0;
914 0 : } while (++i < a->nwords);
915 : }
916 :
917 25350 : wordnum = lwordnum = WORDNUM(lower);
918 :
919 25350 : lbitnum = BITNUM(lower);
920 25350 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
921 :
922 : /*
923 : * Special case when lwordnum is the same as uwordnum we must perform the
924 : * upper and lower masking on the word.
925 : */
926 25350 : if (lwordnum == uwordnum)
927 : {
928 25350 : a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
929 25350 : & (~(bitmapword) 0) >> ushiftbits;
930 : }
931 : else
932 : {
933 : /* turn on lbitnum and all bits left of it */
934 0 : a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
935 :
936 : /* turn on all bits for any intermediate words */
937 0 : while (wordnum < uwordnum)
938 0 : a->words[wordnum++] = ~(bitmapword) 0;
939 :
940 : /* turn on upper's bit and all bits right of it. */
941 0 : a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
942 : }
943 :
944 25350 : return a;
945 : }
946 :
947 : /*
948 : * bms_int_members - like bms_intersect, but left input is recycled
949 : */
950 : Bitmapset *
951 486090 : bms_int_members(Bitmapset *a, const Bitmapset *b)
952 : {
953 : int lastnonzero;
954 : int shortlen;
955 : int i;
956 :
957 : /* Handle cases where either input is NULL */
958 486090 : if (a == NULL)
959 25158 : return NULL;
960 460932 : if (b == NULL)
961 : {
962 3818 : pfree(a);
963 3818 : return NULL;
964 : }
965 : /* Intersect b into a; we need never copy */
966 457114 : shortlen = Min(a->nwords, b->nwords);
967 457114 : lastnonzero = -1;
968 457114 : i = 0;
969 : do
970 : {
971 457114 : a->words[i] &= b->words[i];
972 :
973 457114 : if (a->words[i] != 0)
974 364724 : lastnonzero = i;
975 457114 : } while (++i < shortlen);
976 :
977 : /* If we computed an empty result, we must return NULL */
978 457114 : if (lastnonzero == -1)
979 : {
980 92390 : pfree(a);
981 92390 : return NULL;
982 : }
983 :
984 : /* get rid of trailing zero words */
985 364724 : a->nwords = lastnonzero + 1;
986 364724 : return a;
987 : }
988 :
989 : /*
990 : * bms_del_members - like bms_difference, but left input is recycled
991 : */
992 : Bitmapset *
993 1619744 : bms_del_members(Bitmapset *a, const Bitmapset *b)
994 : {
995 : int i;
996 :
997 : Assert(a == NULL || a->words[a->nwords - 1] != 0);
998 : Assert(b == NULL || b->words[b->nwords - 1] != 0);
999 :
1000 : /* Handle cases where either input is NULL */
1001 1619744 : if (a == NULL)
1002 714950 : return NULL;
1003 904794 : if (b == NULL)
1004 164040 : return a;
1005 : /* Remove b's bits from a; we need never copy */
1006 740754 : if (a->nwords > b->nwords)
1007 : {
1008 : /*
1009 : * We'll never need to remove trailing zero words when 'a' has more
1010 : * words than 'b'.
1011 : */
1012 0 : i = 0;
1013 : do
1014 : {
1015 0 : a->words[i] &= ~b->words[i];
1016 0 : } while (++i < b->nwords);
1017 : }
1018 : else
1019 : {
1020 740754 : int lastnonzero = -1;
1021 :
1022 : /* we may need to remove trailing zero words from the result. */
1023 740754 : i = 0;
1024 : do
1025 : {
1026 740754 : a->words[i] &= ~b->words[i];
1027 :
1028 : /* remember the last non-zero word */
1029 740754 : if (a->words[i] != 0)
1030 138794 : lastnonzero = i;
1031 740754 : } while (++i < a->nwords);
1032 :
1033 : /* check if 'a' has become empty */
1034 740754 : if (lastnonzero == -1)
1035 : {
1036 601960 : pfree(a);
1037 601960 : return NULL;
1038 : }
1039 :
1040 : /* trim off any trailing zero words */
1041 138794 : a->nwords = lastnonzero + 1;
1042 : }
1043 :
1044 138794 : return a;
1045 : }
1046 :
1047 : /*
1048 : * bms_join - like bms_union, but *both* inputs are recycled
1049 : */
1050 : Bitmapset *
1051 1196850 : bms_join(Bitmapset *a, Bitmapset *b)
1052 : {
1053 : Bitmapset *result;
1054 : Bitmapset *other;
1055 : int otherlen;
1056 : int i;
1057 :
1058 : /* Handle cases where either input is NULL */
1059 1196850 : if (a == NULL)
1060 537392 : return b;
1061 659458 : if (b == NULL)
1062 135970 : return a;
1063 : /* Identify shorter and longer input; use longer one as result */
1064 523488 : if (a->nwords < b->nwords)
1065 : {
1066 0 : result = b;
1067 0 : other = a;
1068 : }
1069 : else
1070 : {
1071 523488 : result = a;
1072 523488 : other = b;
1073 : }
1074 : /* And union the shorter input into the result */
1075 523488 : otherlen = other->nwords;
1076 523488 : i = 0;
1077 : do
1078 : {
1079 523488 : result->words[i] |= other->words[i];
1080 523488 : } while (++i < otherlen);
1081 523488 : if (other != result) /* pure paranoia */
1082 523488 : pfree(other);
1083 523488 : return result;
1084 : }
1085 :
1086 : /*
1087 : * bms_next_member - find next member of a set
1088 : *
1089 : * Returns smallest member greater than "prevbit", or -2 if there is none.
1090 : * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1091 : *
1092 : * This is intended as support for iterating through the members of a set.
1093 : * The typical pattern is
1094 : *
1095 : * x = -1;
1096 : * while ((x = bms_next_member(inputset, x)) >= 0)
1097 : * process member x;
1098 : *
1099 : * Notice that when there are no more members, we return -2, not -1 as you
1100 : * might expect. The rationale for that is to allow distinguishing the
1101 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1102 : * It makes no difference in simple loop usage, but complex iteration logic
1103 : * might need such an ability.
1104 : */
1105 : int
1106 31377926 : bms_next_member(const Bitmapset *a, int prevbit)
1107 : {
1108 : int nwords;
1109 : int wordnum;
1110 : bitmapword mask;
1111 :
1112 31377926 : if (a == NULL)
1113 13161692 : return -2;
1114 18216234 : nwords = a->nwords;
1115 18216234 : prevbit++;
1116 18216234 : mask = (~(bitmapword) 0) << BITNUM(prevbit);
1117 24250830 : for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1118 : {
1119 18216642 : bitmapword w = a->words[wordnum];
1120 :
1121 : /* ignore bits before prevbit */
1122 18216642 : w &= mask;
1123 :
1124 18216642 : if (w != 0)
1125 : {
1126 : int result;
1127 :
1128 12182046 : result = wordnum * BITS_PER_BITMAPWORD;
1129 12182046 : result += bmw_rightmost_one_pos(w);
1130 12182046 : return result;
1131 : }
1132 :
1133 : /* in subsequent words, consider all bits */
1134 6034596 : mask = (~(bitmapword) 0);
1135 : }
1136 6034188 : return -2;
1137 : }
1138 :
1139 : /*
1140 : * bms_prev_member - find prev member of a set
1141 : *
1142 : * Returns largest member less than "prevbit", or -2 if there is none.
1143 : * "prevbit" must NOT be more than one above the highest possible bit that can
1144 : * be set at the Bitmapset at its current size.
1145 : *
1146 : * To ease finding the highest set bit for the initial loop, the special
1147 : * prevbit value of -1 can be passed to have the function find the highest
1148 : * valued member in the set.
1149 : *
1150 : * This is intended as support for iterating through the members of a set in
1151 : * reverse. The typical pattern is
1152 : *
1153 : * x = -1;
1154 : * while ((x = bms_prev_member(inputset, x)) >= 0)
1155 : * process member x;
1156 : *
1157 : * Notice that when there are no more members, we return -2, not -1 as you
1158 : * might expect. The rationale for that is to allow distinguishing the
1159 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1160 : * It makes no difference in simple loop usage, but complex iteration logic
1161 : * might need such an ability.
1162 : */
1163 :
1164 : int
1165 18 : bms_prev_member(const Bitmapset *a, int prevbit)
1166 : {
1167 : int wordnum;
1168 : int ushiftbits;
1169 : bitmapword mask;
1170 :
1171 : /*
1172 : * If set is NULL or if there are no more bits to the right then we've
1173 : * nothing to do.
1174 : */
1175 18 : if (a == NULL || prevbit == 0)
1176 0 : return -2;
1177 :
1178 : /* transform -1 to the highest possible bit we could have set */
1179 18 : if (prevbit == -1)
1180 0 : prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1181 : else
1182 18 : prevbit--;
1183 :
1184 18 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1185 18 : mask = (~(bitmapword) 0) >> ushiftbits;
1186 24 : for (wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1187 : {
1188 18 : bitmapword w = a->words[wordnum];
1189 :
1190 : /* mask out bits left of prevbit */
1191 18 : w &= mask;
1192 :
1193 18 : if (w != 0)
1194 : {
1195 : int result;
1196 :
1197 12 : result = wordnum * BITS_PER_BITMAPWORD;
1198 12 : result += bmw_leftmost_one_pos(w);
1199 12 : return result;
1200 : }
1201 :
1202 : /* in subsequent words, consider all bits */
1203 6 : mask = (~(bitmapword) 0);
1204 : }
1205 6 : return -2;
1206 : }
1207 :
1208 : /*
1209 : * bms_hash_value - compute a hash key for a Bitmapset
1210 : */
1211 : uint32
1212 5298 : bms_hash_value(const Bitmapset *a)
1213 : {
1214 5298 : if (a == NULL)
1215 0 : return 0; /* All empty sets hash to 0 */
1216 5298 : return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1217 5298 : a->nwords * sizeof(bitmapword)));
1218 : }
1219 :
1220 : /*
1221 : * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1222 : *
1223 : * Note: don't forget to specify bitmap_match as the match function!
1224 : */
1225 : uint32
1226 5298 : bitmap_hash(const void *key, Size keysize)
1227 : {
1228 : Assert(keysize == sizeof(Bitmapset *));
1229 5298 : return bms_hash_value(*((const Bitmapset *const *) key));
1230 : }
1231 :
1232 : /*
1233 : * bitmap_match - match function to use with bitmap_hash
1234 : */
1235 : int
1236 3204 : bitmap_match(const void *key1, const void *key2, Size keysize)
1237 : {
1238 : Assert(keysize == sizeof(Bitmapset *));
1239 3204 : return !bms_equal(*((const Bitmapset *const *) key1),
1240 : *((const Bitmapset *const *) key2));
1241 : }
|