Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * rangetypes_gist.c
4 : * GiST support for range types.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/rangetypes_gist.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist.h"
18 : #include "access/stratnum.h"
19 : #include "utils/datum.h"
20 : #include "utils/float.h"
21 : #include "utils/fmgrprotos.h"
22 : #include "utils/multirangetypes.h"
23 : #include "utils/rangetypes.h"
24 :
25 : /*
26 : * Range class properties used to segregate different classes of ranges in
27 : * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot
28 : * be combined with anything else.
29 : */
30 : #define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */
31 : #define CLS_LOWER_INF 1 /* Lower bound is infinity */
32 : #define CLS_UPPER_INF 2 /* Upper bound is infinity */
33 : #define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */
34 : #define CLS_EMPTY 8 /* Special class for empty ranges */
35 :
36 : #define CLS_COUNT 9 /* # of classes; includes all combinations of
37 : * properties. CLS_EMPTY doesn't combine with
38 : * anything else, so it's only 2^3 + 1. */
39 :
40 : /*
41 : * Minimum accepted ratio of split for items of the same class. If the items
42 : * are of different classes, we will separate along those lines regardless of
43 : * the ratio.
44 : */
45 : #define LIMIT_RATIO 0.3
46 :
47 : /* Constants for fixed penalty values */
48 : #define INFINITE_BOUND_PENALTY 2.0
49 : #define CONTAIN_EMPTY_PENALTY 1.0
50 : #define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0
51 :
52 : /*
53 : * Per-item data for range_gist_single_sorting_split.
54 : */
55 : typedef struct
56 : {
57 : int index;
58 : RangeBound bound;
59 : } SingleBoundSortItem;
60 :
61 : /* place on left or right side of split? */
62 : typedef enum
63 : {
64 : SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */
65 : SPLIT_RIGHT,
66 : } SplitLR;
67 :
68 : /*
69 : * Context for range_gist_consider_split.
70 : */
71 : typedef struct
72 : {
73 : TypeCacheEntry *typcache; /* typcache for range type */
74 : bool has_subtype_diff; /* does it have subtype_diff? */
75 : int entries_count; /* total number of entries being split */
76 :
77 : /* Information about currently selected split follows */
78 :
79 : bool first; /* true if no split was selected yet */
80 :
81 : RangeBound *left_upper; /* upper bound of left interval */
82 : RangeBound *right_lower; /* lower bound of right interval */
83 :
84 : float4 ratio; /* split ratio */
85 : float4 overlap; /* overlap between left and right predicate */
86 : int common_left; /* # common entries destined for each side */
87 : int common_right;
88 : } ConsiderSplitContext;
89 :
90 : /*
91 : * Bounds extracted from a non-empty range, for use in
92 : * range_gist_double_sorting_split.
93 : */
94 : typedef struct
95 : {
96 : RangeBound lower;
97 : RangeBound upper;
98 : } NonEmptyRange;
99 :
100 : /*
101 : * Represents information about an entry that can be placed in either group
102 : * without affecting overlap over selected axis ("common entry").
103 : */
104 : typedef struct
105 : {
106 : /* Index of entry in the initial array */
107 : int index;
108 : /* Delta between closeness of range to each of the two groups */
109 : double delta;
110 : } CommonEntry;
111 :
112 : /* Helper macros to place an entry in the left or right group during split */
113 : /* Note direct access to variables v, typcache, left_range, right_range */
114 : #define PLACE_LEFT(range, off) \
115 : do { \
116 : if (v->spl_nleft > 0) \
117 : left_range = range_super_union(typcache, left_range, range); \
118 : else \
119 : left_range = (range); \
120 : v->spl_left[v->spl_nleft++] = (off); \
121 : } while(0)
122 :
123 : #define PLACE_RIGHT(range, off) \
124 : do { \
125 : if (v->spl_nright > 0) \
126 : right_range = range_super_union(typcache, right_range, range); \
127 : else \
128 : right_range = (range); \
129 : v->spl_right[v->spl_nright++] = (off); \
130 : } while(0)
131 :
132 : /* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
133 : #define rangeCopy(r) \
134 : ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
135 : false, -1)))
136 :
137 : static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
138 : RangeType *r2);
139 : static bool range_gist_consistent_int_range(TypeCacheEntry *typcache,
140 : StrategyNumber strategy,
141 : const RangeType *key,
142 : const RangeType *query);
143 : static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
144 : StrategyNumber strategy,
145 : const RangeType *key,
146 : const MultirangeType *query);
147 : static bool range_gist_consistent_int_element(TypeCacheEntry *typcache,
148 : StrategyNumber strategy,
149 : const RangeType *key,
150 : Datum query);
151 : static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
152 : StrategyNumber strategy,
153 : const RangeType *key,
154 : const RangeType *query);
155 : static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
156 : StrategyNumber strategy,
157 : const RangeType *key,
158 : const MultirangeType *query);
159 : static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
160 : StrategyNumber strategy,
161 : const RangeType *key,
162 : Datum query);
163 : static void range_gist_fallback_split(TypeCacheEntry *typcache,
164 : GistEntryVector *entryvec,
165 : GIST_SPLITVEC *v);
166 : static void range_gist_class_split(TypeCacheEntry *typcache,
167 : GistEntryVector *entryvec,
168 : GIST_SPLITVEC *v,
169 : SplitLR *classes_groups);
170 : static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
171 : GistEntryVector *entryvec,
172 : GIST_SPLITVEC *v,
173 : bool use_upper_bound);
174 : static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
175 : GistEntryVector *entryvec,
176 : GIST_SPLITVEC *v);
177 : static void range_gist_consider_split(ConsiderSplitContext *context,
178 : RangeBound *right_lower, int min_left_count,
179 : RangeBound *left_upper, int max_left_count);
180 : static int get_gist_range_class(RangeType *range);
181 : static int single_bound_cmp(const void *a, const void *b, void *arg);
182 : static int interval_cmp_lower(const void *a, const void *b, void *arg);
183 : static int interval_cmp_upper(const void *a, const void *b, void *arg);
184 : static int common_entry_cmp(const void *i1, const void *i2);
185 : static float8 call_subtype_diff(TypeCacheEntry *typcache,
186 : Datum val1, Datum val2);
187 :
188 :
189 : /* GiST query consistency check */
190 : Datum
191 652818 : range_gist_consistent(PG_FUNCTION_ARGS)
192 : {
193 652818 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
194 652818 : Datum query = PG_GETARG_DATUM(1);
195 652818 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
196 : bool result;
197 652818 : Oid subtype = PG_GETARG_OID(3);
198 652818 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
199 652818 : RangeType *key = DatumGetRangeTypeP(entry->key);
200 : TypeCacheEntry *typcache;
201 :
202 : /* All operators served by this function are exact */
203 652818 : *recheck = false;
204 :
205 652818 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
206 :
207 : /*
208 : * Perform consistent checking using function corresponding to key type
209 : * (leaf or internal) and query subtype (range, multirange, or element).
210 : * Note that invalid subtype means that query type matches key type
211 : * (range).
212 : */
213 652818 : if (GIST_LEAF(entry))
214 : {
215 644898 : if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
216 330030 : result = range_gist_consistent_leaf_range(typcache, strategy, key,
217 330030 : DatumGetRangeTypeP(query));
218 314868 : else if (subtype == ANYMULTIRANGEOID)
219 306854 : result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
220 306854 : DatumGetMultirangeTypeP(query));
221 : else
222 8014 : result = range_gist_consistent_leaf_element(typcache, strategy,
223 : key, query);
224 : }
225 : else
226 : {
227 7920 : if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
228 3960 : result = range_gist_consistent_int_range(typcache, strategy, key,
229 3960 : DatumGetRangeTypeP(query));
230 3960 : else if (subtype == ANYMULTIRANGEOID)
231 3564 : result = range_gist_consistent_int_multirange(typcache, strategy, key,
232 3564 : DatumGetMultirangeTypeP(query));
233 : else
234 396 : result = range_gist_consistent_int_element(typcache, strategy,
235 : key, query);
236 : }
237 652818 : PG_RETURN_BOOL(result);
238 : }
239 :
240 : /*
241 : * GiST compress method for multiranges: multirange is approximated as union
242 : * range with no gaps.
243 : */
244 : Datum
245 40466 : multirange_gist_compress(PG_FUNCTION_ARGS)
246 : {
247 40466 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
248 :
249 40466 : if (entry->leafkey)
250 : {
251 23196 : MultirangeType *mr = DatumGetMultirangeTypeP(entry->key);
252 : RangeType *r;
253 : TypeCacheEntry *typcache;
254 23196 : GISTENTRY *retval = palloc(sizeof(GISTENTRY));
255 :
256 23196 : typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
257 23196 : r = multirange_get_union_range(typcache->rngtype, mr);
258 :
259 23196 : gistentryinit(*retval, RangeTypePGetDatum(r),
260 : entry->rel, entry->page, entry->offset, false);
261 :
262 23196 : PG_RETURN_POINTER(retval);
263 : }
264 :
265 17270 : PG_RETURN_POINTER(entry);
266 : }
267 :
268 : /* GiST query consistency check for multiranges */
269 : Datum
270 272462 : multirange_gist_consistent(PG_FUNCTION_ARGS)
271 : {
272 272462 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
273 272462 : Datum query = PG_GETARG_DATUM(1);
274 272462 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
275 : bool result;
276 272462 : Oid subtype = PG_GETARG_OID(3);
277 272462 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
278 272462 : RangeType *key = DatumGetRangeTypeP(entry->key);
279 : TypeCacheEntry *typcache;
280 :
281 : /*
282 : * All operators served by this function are inexact because multirange is
283 : * approximated by union range with no gaps.
284 : */
285 272462 : *recheck = true;
286 :
287 272462 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
288 :
289 : /*
290 : * Perform consistent checking using function corresponding to key type
291 : * (leaf or internal) and query subtype (range, multirange, or element).
292 : * Note that invalid subtype means that query type matches key type
293 : * (multirange).
294 : */
295 272462 : if (GIST_LEAF(entry))
296 : {
297 266230 : if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
298 147410 : result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
299 147410 : DatumGetMultirangeTypeP(query));
300 118820 : else if (subtype == ANYRANGEOID)
301 116396 : result = range_gist_consistent_leaf_range(typcache, strategy, key,
302 116396 : DatumGetRangeTypeP(query));
303 : else
304 2424 : result = range_gist_consistent_leaf_element(typcache, strategy,
305 : key, query);
306 : }
307 : else
308 : {
309 6232 : if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
310 3280 : result = range_gist_consistent_int_multirange(typcache, strategy, key,
311 3280 : DatumGetMultirangeTypeP(query));
312 2952 : else if (subtype == ANYRANGEOID)
313 2788 : result = range_gist_consistent_int_range(typcache, strategy, key,
314 2788 : DatumGetRangeTypeP(query));
315 : else
316 164 : result = range_gist_consistent_int_element(typcache, strategy,
317 : key, query);
318 : }
319 272462 : PG_RETURN_BOOL(result);
320 : }
321 :
322 : /* form union range */
323 : Datum
324 91884 : range_gist_union(PG_FUNCTION_ARGS)
325 : {
326 91884 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
327 91884 : GISTENTRY *ent = entryvec->vector;
328 : RangeType *result_range;
329 : TypeCacheEntry *typcache;
330 : int i;
331 :
332 91884 : result_range = DatumGetRangeTypeP(ent[0].key);
333 :
334 91884 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
335 :
336 208428 : for (i = 1; i < entryvec->n; i++)
337 : {
338 116544 : result_range = range_super_union(typcache, result_range,
339 116544 : DatumGetRangeTypeP(ent[i].key));
340 : }
341 :
342 91884 : PG_RETURN_RANGE_P(result_range);
343 : }
344 :
345 : /*
346 : * We store ranges as ranges in GiST indexes, so we do not need
347 : * compress, decompress, or fetch functions. Note this implies a limit
348 : * on the size of range values that can be indexed.
349 : */
350 :
351 : /*
352 : * GiST page split penalty function.
353 : *
354 : * The penalty function has the following goals (in order from most to least
355 : * important):
356 : * - Keep normal ranges separate
357 : * - Avoid broadening the class of the original predicate
358 : * - Avoid broadening (as determined by subtype_diff) the original predicate
359 : * - Favor adding ranges to narrower original predicates
360 : */
361 : Datum
362 1250864 : range_gist_penalty(PG_FUNCTION_ARGS)
363 : {
364 1250864 : GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
365 1250864 : GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
366 1250864 : float *penalty = (float *) PG_GETARG_POINTER(2);
367 1250864 : RangeType *orig = DatumGetRangeTypeP(origentry->key);
368 1250864 : RangeType *new = DatumGetRangeTypeP(newentry->key);
369 : TypeCacheEntry *typcache;
370 : bool has_subtype_diff;
371 : RangeBound orig_lower,
372 : new_lower,
373 : orig_upper,
374 : new_upper;
375 : bool orig_empty,
376 : new_empty;
377 :
378 1250864 : if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
379 0 : elog(ERROR, "range types do not match");
380 :
381 1250864 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
382 :
383 1250864 : has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
384 :
385 1250864 : range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
386 1250864 : range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
387 :
388 : /*
389 : * Distinct branches for handling distinct classes of ranges. Note that
390 : * penalty values only need to be commensurate within the same class of
391 : * new range.
392 : */
393 1250864 : if (new_empty)
394 : {
395 : /* Handle insertion of empty range */
396 255904 : if (orig_empty)
397 : {
398 : /*
399 : * The best case is to insert it to empty original range.
400 : * Insertion here means no broadening of original range. Also
401 : * original range is the most narrow.
402 : */
403 16638 : *penalty = 0.0;
404 : }
405 239266 : else if (RangeIsOrContainsEmpty(orig))
406 : {
407 : /*
408 : * The second case is to insert empty range into range which
409 : * contains at least one underlying empty range. There is still
410 : * no broadening of original range, but original range is not as
411 : * narrow as possible.
412 : */
413 3414 : *penalty = CONTAIN_EMPTY_PENALTY;
414 : }
415 235852 : else if (orig_lower.infinite && orig_upper.infinite)
416 : {
417 : /*
418 : * Original range requires broadening. (-inf; +inf) is most far
419 : * from normal range in this case.
420 : */
421 0 : *penalty = 2 * CONTAIN_EMPTY_PENALTY;
422 : }
423 235852 : else if (orig_lower.infinite || orig_upper.infinite)
424 : {
425 : /*
426 : * (-inf, x) or (x, +inf) original ranges are closer to normal
427 : * ranges, so it's worse to mix it with empty ranges.
428 : */
429 0 : *penalty = 3 * CONTAIN_EMPTY_PENALTY;
430 : }
431 : else
432 : {
433 : /*
434 : * The least preferred case is broadening of normal range.
435 : */
436 235852 : *penalty = 4 * CONTAIN_EMPTY_PENALTY;
437 : }
438 : }
439 994960 : else if (new_lower.infinite && new_upper.infinite)
440 : {
441 : /* Handle insertion of (-inf, +inf) range */
442 0 : if (orig_lower.infinite && orig_upper.infinite)
443 : {
444 : /*
445 : * Best case is inserting to (-inf, +inf) original range.
446 : */
447 0 : *penalty = 0.0;
448 : }
449 0 : else if (orig_lower.infinite || orig_upper.infinite)
450 : {
451 : /*
452 : * When original range is (-inf, x) or (x, +inf) it requires
453 : * broadening of original range (extension of one bound to
454 : * infinity).
455 : */
456 0 : *penalty = INFINITE_BOUND_PENALTY;
457 : }
458 : else
459 : {
460 : /*
461 : * Insertion to normal original range is least preferred.
462 : */
463 0 : *penalty = 2 * INFINITE_BOUND_PENALTY;
464 : }
465 :
466 0 : if (RangeIsOrContainsEmpty(orig))
467 : {
468 : /*
469 : * Original range is narrower when it doesn't contain empty
470 : * ranges. Add additional penalty otherwise.
471 : */
472 0 : *penalty += CONTAIN_EMPTY_PENALTY;
473 : }
474 : }
475 994960 : else if (new_lower.infinite)
476 : {
477 : /* Handle insertion of (-inf, x) range */
478 42168 : if (!orig_empty && orig_lower.infinite)
479 : {
480 1782 : if (orig_upper.infinite)
481 : {
482 : /*
483 : * (-inf, +inf) range won't be extended by insertion of (-inf,
484 : * x) range. It's a less desirable case than insertion to
485 : * (-inf, y) original range without extension, because in that
486 : * case original range is narrower. But we can't express that
487 : * in single float value.
488 : */
489 0 : *penalty = 0.0;
490 : }
491 : else
492 : {
493 1782 : if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
494 : {
495 : /*
496 : * Get extension of original range using subtype_diff. Use
497 : * constant if subtype_diff unavailable.
498 : */
499 1310 : if (has_subtype_diff)
500 1310 : *penalty = call_subtype_diff(typcache,
501 : new_upper.val,
502 : orig_upper.val);
503 : else
504 0 : *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
505 : }
506 : else
507 : {
508 : /* No extension of original range */
509 472 : *penalty = 0.0;
510 : }
511 : }
512 : }
513 : else
514 : {
515 : /*
516 : * If lower bound of original range is not -inf, then extension of
517 : * it is infinity.
518 : */
519 40386 : *penalty = get_float4_infinity();
520 : }
521 : }
522 952792 : else if (new_upper.infinite)
523 : {
524 : /* Handle insertion of (x, +inf) range */
525 33528 : if (!orig_empty && orig_upper.infinite)
526 : {
527 1782 : if (orig_lower.infinite)
528 : {
529 : /*
530 : * (-inf, +inf) range won't be extended by insertion of (x,
531 : * +inf) range. It's a less desirable case than insertion to
532 : * (y, +inf) original range without extension, because in that
533 : * case original range is narrower. But we can't express that
534 : * in single float value.
535 : */
536 792 : *penalty = 0.0;
537 : }
538 : else
539 : {
540 990 : if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
541 : {
542 : /*
543 : * Get extension of original range using subtype_diff. Use
544 : * constant if subtype_diff unavailable.
545 : */
546 0 : if (has_subtype_diff)
547 0 : *penalty = call_subtype_diff(typcache,
548 : orig_lower.val,
549 : new_lower.val);
550 : else
551 0 : *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
552 : }
553 : else
554 : {
555 : /* No extension of original range */
556 990 : *penalty = 0.0;
557 : }
558 : }
559 : }
560 : else
561 : {
562 : /*
563 : * If upper bound of original range is not +inf, then extension of
564 : * it is infinity.
565 : */
566 31746 : *penalty = get_float4_infinity();
567 : }
568 : }
569 : else
570 : {
571 : /* Handle insertion of normal (non-empty, non-infinite) range */
572 919264 : if (orig_empty || orig_lower.infinite || orig_upper.infinite)
573 : {
574 : /*
575 : * Avoid mixing normal ranges with infinite and empty ranges.
576 : */
577 68438 : *penalty = get_float4_infinity();
578 : }
579 : else
580 : {
581 : /*
582 : * Calculate extension of original range by calling subtype_diff.
583 : * Use constant if subtype_diff unavailable.
584 : */
585 850826 : float8 diff = 0.0;
586 :
587 850826 : if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
588 : {
589 280380 : if (has_subtype_diff)
590 280380 : diff += call_subtype_diff(typcache,
591 : orig_lower.val,
592 : new_lower.val);
593 : else
594 0 : diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
595 : }
596 850826 : if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
597 : {
598 705062 : if (has_subtype_diff)
599 705062 : diff += call_subtype_diff(typcache,
600 : new_upper.val,
601 : orig_upper.val);
602 : else
603 0 : diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
604 : }
605 850826 : *penalty = diff;
606 : }
607 : }
608 :
609 1250864 : PG_RETURN_POINTER(penalty);
610 : }
611 :
612 : /*
613 : * The GiST PickSplit method for ranges
614 : *
615 : * Primarily, we try to segregate ranges of different classes. If splitting
616 : * ranges of the same class, use the appropriate split method for that class.
617 : */
618 : Datum
619 542 : range_gist_picksplit(PG_FUNCTION_ARGS)
620 : {
621 542 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
622 542 : GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
623 : TypeCacheEntry *typcache;
624 : OffsetNumber i;
625 : RangeType *pred_left;
626 : int nbytes;
627 : OffsetNumber maxoff;
628 : int count_in_classes[CLS_COUNT];
629 : int j;
630 542 : int non_empty_classes_count = 0;
631 542 : int biggest_class = -1;
632 542 : int biggest_class_count = 0;
633 : int total_count;
634 :
635 : /* use first item to look up range type's info */
636 542 : pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
637 542 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
638 :
639 542 : maxoff = entryvec->n - 1;
640 542 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
641 542 : v->spl_left = (OffsetNumber *) palloc(nbytes);
642 542 : v->spl_right = (OffsetNumber *) palloc(nbytes);
643 :
644 : /*
645 : * Get count distribution of range classes.
646 : */
647 542 : memset(count_in_classes, 0, sizeof(count_in_classes));
648 153598 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
649 : {
650 153056 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
651 :
652 153056 : count_in_classes[get_gist_range_class(range)]++;
653 : }
654 :
655 : /*
656 : * Count non-empty classes and find biggest class.
657 : */
658 542 : total_count = maxoff;
659 5420 : for (j = 0; j < CLS_COUNT; j++)
660 : {
661 4878 : if (count_in_classes[j] > 0)
662 : {
663 576 : if (count_in_classes[j] > biggest_class_count)
664 : {
665 560 : biggest_class_count = count_in_classes[j];
666 560 : biggest_class = j;
667 : }
668 576 : non_empty_classes_count++;
669 : }
670 : }
671 :
672 : Assert(non_empty_classes_count > 0);
673 :
674 542 : if (non_empty_classes_count == 1)
675 : {
676 : /* One non-empty class, so split inside class */
677 516 : if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
678 : {
679 : /* double sorting split for normal ranges */
680 474 : range_gist_double_sorting_split(typcache, entryvec, v);
681 : }
682 42 : else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
683 : {
684 : /* upper bound sorting split for (-inf, x) ranges */
685 0 : range_gist_single_sorting_split(typcache, entryvec, v, true);
686 : }
687 42 : else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
688 : {
689 : /* lower bound sorting split for (x, +inf) ranges */
690 0 : range_gist_single_sorting_split(typcache, entryvec, v, false);
691 : }
692 : else
693 : {
694 : /* trivial split for all (-inf, +inf) or all empty ranges */
695 42 : range_gist_fallback_split(typcache, entryvec, v);
696 : }
697 : }
698 : else
699 : {
700 : /*
701 : * Class based split.
702 : *
703 : * To which side of the split should each class go? Initialize them
704 : * all to go to the left side.
705 : */
706 : SplitLR classes_groups[CLS_COUNT];
707 :
708 26 : memset(classes_groups, 0, sizeof(classes_groups));
709 :
710 26 : if (count_in_classes[CLS_NORMAL] > 0)
711 : {
712 : /* separate normal ranges if any */
713 26 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
714 : }
715 : else
716 : {
717 : /*----------
718 : * Try to split classes in one of two ways:
719 : * 1) containing infinities - not containing infinities
720 : * 2) containing empty - not containing empty
721 : *
722 : * Select the way which balances the ranges between left and right
723 : * the best. If split in these ways is not possible, there are at
724 : * most 3 classes, so just separate biggest class.
725 : *----------
726 : */
727 : int infCount,
728 : nonInfCount;
729 : int emptyCount,
730 : nonEmptyCount;
731 :
732 0 : nonInfCount =
733 0 : count_in_classes[CLS_NORMAL] +
734 0 : count_in_classes[CLS_CONTAIN_EMPTY] +
735 0 : count_in_classes[CLS_EMPTY];
736 0 : infCount = total_count - nonInfCount;
737 :
738 0 : nonEmptyCount =
739 0 : count_in_classes[CLS_NORMAL] +
740 0 : count_in_classes[CLS_LOWER_INF] +
741 0 : count_in_classes[CLS_UPPER_INF] +
742 0 : count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
743 0 : emptyCount = total_count - nonEmptyCount;
744 :
745 0 : if (infCount > 0 && nonInfCount > 0 &&
746 0 : (abs(infCount - nonInfCount) <=
747 0 : abs(emptyCount - nonEmptyCount)))
748 : {
749 0 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
750 0 : classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
751 0 : classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
752 : }
753 0 : else if (emptyCount > 0 && nonEmptyCount > 0)
754 : {
755 0 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
756 0 : classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
757 0 : classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
758 0 : classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
759 : }
760 : else
761 : {
762 : /*
763 : * Either total_count == emptyCount or total_count ==
764 : * infCount.
765 : */
766 0 : classes_groups[biggest_class] = SPLIT_RIGHT;
767 : }
768 : }
769 :
770 26 : range_gist_class_split(typcache, entryvec, v, classes_groups);
771 : }
772 :
773 542 : PG_RETURN_POINTER(v);
774 : }
775 :
776 : /* equality comparator for GiST */
777 : Datum
778 91704 : range_gist_same(PG_FUNCTION_ARGS)
779 : {
780 91704 : RangeType *r1 = PG_GETARG_RANGE_P(0);
781 91704 : RangeType *r2 = PG_GETARG_RANGE_P(1);
782 91704 : bool *result = (bool *) PG_GETARG_POINTER(2);
783 :
784 : /*
785 : * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
786 : * that for ourselves. More generally, if the entries have been properly
787 : * normalized, then unequal flags bytes must mean unequal ranges ... so
788 : * let's just test all the flag bits at once.
789 : */
790 91704 : if (range_get_flags(r1) != range_get_flags(r2))
791 54 : *result = false;
792 : else
793 : {
794 : TypeCacheEntry *typcache;
795 :
796 91650 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
797 :
798 91650 : *result = range_eq_internal(typcache, r1, r2);
799 : }
800 :
801 91704 : PG_RETURN_POINTER(result);
802 : }
803 :
804 : /*
805 : *----------------------------------------------------------
806 : * STATIC FUNCTIONS
807 : *----------------------------------------------------------
808 : */
809 :
810 : /*
811 : * Return the smallest range that contains r1 and r2
812 : *
813 : * This differs from regular range_union in two critical ways:
814 : * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
815 : * the intervening values into the result range.
816 : * 2. We track whether any empty range has been union'd into the result,
817 : * so that contained_by searches can be indexed. Note that this means
818 : * that *all* unions formed within the GiST index must go through here.
819 : */
820 : static RangeType *
821 268606 : range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
822 : {
823 : RangeType *result;
824 : RangeBound lower1,
825 : lower2;
826 : RangeBound upper1,
827 : upper2;
828 : bool empty1,
829 : empty2;
830 : char flags1,
831 : flags2;
832 : RangeBound *result_lower;
833 : RangeBound *result_upper;
834 :
835 268606 : range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
836 268606 : range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
837 268606 : flags1 = range_get_flags(r1);
838 268606 : flags2 = range_get_flags(r2);
839 :
840 268606 : if (empty1)
841 : {
842 : /* We can return r2 as-is if it already is or contains empty */
843 31050 : if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
844 31050 : return r2;
845 : /* Else we'd better copy it (modify-in-place isn't safe) */
846 0 : r2 = rangeCopy(r2);
847 0 : range_set_contain_empty(r2);
848 0 : return r2;
849 : }
850 237556 : if (empty2)
851 : {
852 : /* We can return r1 as-is if it already is or contains empty */
853 3432 : if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
854 3414 : return r1;
855 : /* Else we'd better copy it (modify-in-place isn't safe) */
856 18 : r1 = rangeCopy(r1);
857 18 : range_set_contain_empty(r1);
858 18 : return r1;
859 : }
860 :
861 234124 : if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
862 234018 : result_lower = &lower1;
863 : else
864 106 : result_lower = &lower2;
865 :
866 234124 : if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
867 44008 : result_upper = &upper1;
868 : else
869 190116 : result_upper = &upper2;
870 :
871 : /* optimization to avoid constructing a new range */
872 234124 : if (result_lower == &lower1 && result_upper == &upper1 &&
873 43990 : ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
874 43990 : return r1;
875 190134 : if (result_lower == &lower2 && result_upper == &upper2 &&
876 88 : ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
877 88 : return r2;
878 :
879 190046 : result = make_range(typcache, result_lower, result_upper, false, NULL);
880 :
881 190046 : if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
882 0 : range_set_contain_empty(result);
883 :
884 190046 : return result;
885 : }
886 :
887 : static bool
888 5868 : multirange_union_range_equal(TypeCacheEntry *typcache,
889 : const RangeType *r,
890 : const MultirangeType *mr)
891 : {
892 : RangeBound lower1,
893 : upper1,
894 : lower2,
895 : upper2,
896 : tmp;
897 : bool empty;
898 :
899 5868 : if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
900 3000 : return (RangeIsEmpty(r) && MultirangeIsEmpty(mr));
901 :
902 2868 : range_deserialize(typcache, r, &lower1, &upper1, &empty);
903 : Assert(!empty);
904 2868 : multirange_get_bounds(typcache, mr, 0, &lower2, &tmp);
905 2868 : multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2);
906 :
907 3048 : return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 &&
908 180 : range_cmp_bounds(typcache, &upper1, &upper2) == 0);
909 : }
910 :
911 : /*
912 : * GiST consistent test on an index internal page with range query
913 : */
914 : static bool
915 6748 : range_gist_consistent_int_range(TypeCacheEntry *typcache,
916 : StrategyNumber strategy,
917 : const RangeType *key,
918 : const RangeType *query)
919 : {
920 6748 : switch (strategy)
921 : {
922 724 : case RANGESTRAT_BEFORE:
923 724 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
924 224 : return false;
925 500 : return (!range_overright_internal(typcache, key, query));
926 724 : case RANGESTRAT_OVERLEFT:
927 724 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
928 224 : return false;
929 500 : return (!range_after_internal(typcache, key, query));
930 724 : case RANGESTRAT_OVERLAPS:
931 724 : return range_overlaps_internal(typcache, key, query);
932 724 : case RANGESTRAT_OVERRIGHT:
933 724 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
934 224 : return false;
935 500 : return (!range_before_internal(typcache, key, query));
936 724 : case RANGESTRAT_AFTER:
937 724 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
938 224 : return false;
939 500 : return (!range_overleft_internal(typcache, key, query));
940 724 : case RANGESTRAT_ADJACENT:
941 724 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
942 224 : return false;
943 500 : if (range_adjacent_internal(typcache, key, query))
944 2 : return true;
945 498 : return range_overlaps_internal(typcache, key, query);
946 1284 : case RANGESTRAT_CONTAINS:
947 1284 : return range_contains_internal(typcache, key, query);
948 724 : case RANGESTRAT_CONTAINED_BY:
949 :
950 : /*
951 : * Empty ranges are contained by anything, so if key is or
952 : * contains any empty ranges, we must descend into it. Otherwise,
953 : * descend only if key overlaps the query.
954 : */
955 724 : if (RangeIsOrContainsEmpty(key))
956 72 : return true;
957 652 : return range_overlaps_internal(typcache, key, query);
958 396 : case RANGESTRAT_EQ:
959 :
960 : /*
961 : * If query is empty, descend only if the key is or contains any
962 : * empty ranges. Otherwise, descend if key contains query.
963 : */
964 396 : if (RangeIsEmpty(query))
965 0 : return RangeIsOrContainsEmpty(key);
966 396 : return range_contains_internal(typcache, key, query);
967 0 : default:
968 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
969 : return false; /* keep compiler quiet */
970 : }
971 : }
972 :
973 : /*
974 : * GiST consistent test on an index internal page with multirange query
975 : */
976 : static bool
977 6844 : range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
978 : StrategyNumber strategy,
979 : const RangeType *key,
980 : const MultirangeType *query)
981 : {
982 6844 : switch (strategy)
983 : {
984 724 : case RANGESTRAT_BEFORE:
985 724 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
986 224 : return false;
987 500 : return (!range_overright_multirange_internal(typcache, key, query));
988 724 : case RANGESTRAT_OVERLEFT:
989 724 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
990 224 : return false;
991 500 : return (!range_after_multirange_internal(typcache, key, query));
992 724 : case RANGESTRAT_OVERLAPS:
993 724 : return range_overlaps_multirange_internal(typcache, key, query);
994 724 : case RANGESTRAT_OVERRIGHT:
995 724 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
996 224 : return false;
997 500 : return (!range_before_multirange_internal(typcache, key, query));
998 724 : case RANGESTRAT_AFTER:
999 724 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1000 224 : return false;
1001 500 : return (!range_overleft_multirange_internal(typcache, key, query));
1002 724 : case RANGESTRAT_ADJACENT:
1003 724 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1004 224 : return false;
1005 500 : if (range_adjacent_multirange_internal(typcache, key, query))
1006 2 : return true;
1007 498 : return range_overlaps_multirange_internal(typcache, key, query);
1008 1448 : case RANGESTRAT_CONTAINS:
1009 1448 : return range_contains_multirange_internal(typcache, key, query);
1010 724 : case RANGESTRAT_CONTAINED_BY:
1011 :
1012 : /*
1013 : * Empty ranges are contained by anything, so if key is or
1014 : * contains any empty ranges, we must descend into it. Otherwise,
1015 : * descend only if key overlaps the query.
1016 : */
1017 724 : if (RangeIsOrContainsEmpty(key))
1018 72 : return true;
1019 652 : return range_overlaps_multirange_internal(typcache, key, query);
1020 328 : case RANGESTRAT_EQ:
1021 :
1022 : /*
1023 : * If query is empty, descend only if the key is or contains any
1024 : * empty ranges. Otherwise, descend if key contains query.
1025 : */
1026 328 : if (MultirangeIsEmpty(query))
1027 164 : return RangeIsOrContainsEmpty(key);
1028 164 : return range_contains_multirange_internal(typcache, key, query);
1029 0 : default:
1030 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1031 : return false; /* keep compiler quiet */
1032 : }
1033 : }
1034 :
1035 : /*
1036 : * GiST consistent test on an index internal page with element query
1037 : */
1038 : static bool
1039 560 : range_gist_consistent_int_element(TypeCacheEntry *typcache,
1040 : StrategyNumber strategy,
1041 : const RangeType *key,
1042 : Datum query)
1043 : {
1044 560 : switch (strategy)
1045 : {
1046 560 : case RANGESTRAT_CONTAINS_ELEM:
1047 560 : return range_contains_elem_internal(typcache, key, query);
1048 0 : default:
1049 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1050 : return false; /* keep compiler quiet */
1051 : }
1052 : }
1053 :
1054 : /*
1055 : * GiST consistent test on an index leaf page with range query
1056 : */
1057 : static bool
1058 446426 : range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
1059 : StrategyNumber strategy,
1060 : const RangeType *key,
1061 : const RangeType *query)
1062 : {
1063 446426 : switch (strategy)
1064 : {
1065 17092 : case RANGESTRAT_BEFORE:
1066 17092 : return range_before_internal(typcache, key, query);
1067 36748 : case RANGESTRAT_OVERLEFT:
1068 36748 : return range_overleft_internal(typcache, key, query);
1069 16376 : case RANGESTRAT_OVERLAPS:
1070 16376 : return range_overlaps_internal(typcache, key, query);
1071 81600 : case RANGESTRAT_OVERRIGHT:
1072 81600 : return range_overright_internal(typcache, key, query);
1073 72894 : case RANGESTRAT_AFTER:
1074 72894 : return range_after_internal(typcache, key, query);
1075 37040 : case RANGESTRAT_ADJACENT:
1076 37040 : return range_adjacent_internal(typcache, key, query);
1077 129238 : case RANGESTRAT_CONTAINS:
1078 129238 : return range_contains_internal(typcache, key, query);
1079 32926 : case RANGESTRAT_CONTAINED_BY:
1080 32926 : return range_contained_by_internal(typcache, key, query);
1081 22512 : case RANGESTRAT_EQ:
1082 22512 : return range_eq_internal(typcache, key, query);
1083 0 : default:
1084 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1085 : return false; /* keep compiler quiet */
1086 : }
1087 : }
1088 :
1089 : /*
1090 : * GiST consistent test on an index leaf page with multirange query
1091 : */
1092 : static bool
1093 454264 : range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
1094 : StrategyNumber strategy,
1095 : const RangeType *key,
1096 : const MultirangeType *query)
1097 : {
1098 454264 : switch (strategy)
1099 : {
1100 17092 : case RANGESTRAT_BEFORE:
1101 17092 : return range_before_multirange_internal(typcache, key, query);
1102 36748 : case RANGESTRAT_OVERLEFT:
1103 36748 : return range_overleft_multirange_internal(typcache, key, query);
1104 17872 : case RANGESTRAT_OVERLAPS:
1105 17872 : return range_overlaps_multirange_internal(typcache, key, query);
1106 81600 : case RANGESTRAT_OVERRIGHT:
1107 81600 : return range_overright_multirange_internal(typcache, key, query);
1108 72894 : case RANGESTRAT_AFTER:
1109 72894 : return range_after_multirange_internal(typcache, key, query);
1110 37040 : case RANGESTRAT_ADJACENT:
1111 37040 : return range_adjacent_multirange_internal(typcache, key, query);
1112 151438 : case RANGESTRAT_CONTAINS:
1113 151438 : return range_contains_multirange_internal(typcache, key, query);
1114 33712 : case RANGESTRAT_CONTAINED_BY:
1115 33712 : return multirange_contains_range_internal(typcache, query, key);
1116 5868 : case RANGESTRAT_EQ:
1117 5868 : return multirange_union_range_equal(typcache, key, query);
1118 0 : default:
1119 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1120 : return false; /* keep compiler quiet */
1121 : }
1122 : }
1123 :
1124 : /*
1125 : * GiST consistent test on an index leaf page with element query
1126 : */
1127 : static bool
1128 10438 : range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
1129 : StrategyNumber strategy,
1130 : const RangeType *key,
1131 : Datum query)
1132 : {
1133 10438 : switch (strategy)
1134 : {
1135 10438 : case RANGESTRAT_CONTAINS_ELEM:
1136 10438 : return range_contains_elem_internal(typcache, key, query);
1137 0 : default:
1138 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1139 : return false; /* keep compiler quiet */
1140 : }
1141 : }
1142 :
1143 : /*
1144 : * Trivial split: half of entries will be placed on one page
1145 : * and the other half on the other page.
1146 : */
1147 : static void
1148 42 : range_gist_fallback_split(TypeCacheEntry *typcache,
1149 : GistEntryVector *entryvec,
1150 : GIST_SPLITVEC *v)
1151 : {
1152 42 : RangeType *left_range = NULL;
1153 42 : RangeType *right_range = NULL;
1154 : OffsetNumber i,
1155 : maxoff,
1156 : split_idx;
1157 :
1158 42 : maxoff = entryvec->n - 1;
1159 : /* Split entries before this to left page, after to right: */
1160 42 : split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
1161 :
1162 42 : v->spl_nleft = 0;
1163 42 : v->spl_nright = 0;
1164 16194 : for (i = FirstOffsetNumber; i <= maxoff; i++)
1165 : {
1166 16152 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1167 :
1168 16152 : if (i < split_idx)
1169 8046 : PLACE_LEFT(range, i);
1170 : else
1171 8106 : PLACE_RIGHT(range, i);
1172 : }
1173 :
1174 42 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1175 42 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1176 42 : }
1177 :
1178 : /*
1179 : * Split based on classes of ranges.
1180 : *
1181 : * See get_gist_range_class for class definitions.
1182 : * classes_groups is an array of length CLS_COUNT indicating the side of the
1183 : * split to which each class should go.
1184 : */
1185 : static void
1186 26 : range_gist_class_split(TypeCacheEntry *typcache,
1187 : GistEntryVector *entryvec,
1188 : GIST_SPLITVEC *v,
1189 : SplitLR *classes_groups)
1190 : {
1191 26 : RangeType *left_range = NULL;
1192 26 : RangeType *right_range = NULL;
1193 : OffsetNumber i,
1194 : maxoff;
1195 :
1196 26 : maxoff = entryvec->n - 1;
1197 :
1198 26 : v->spl_nleft = 0;
1199 26 : v->spl_nright = 0;
1200 8062 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1201 : {
1202 8036 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1203 : int class;
1204 :
1205 : /* Get class of range */
1206 8036 : class = get_gist_range_class(range);
1207 :
1208 : /* Place range to appropriate page */
1209 8036 : if (classes_groups[class] == SPLIT_LEFT)
1210 4510 : PLACE_LEFT(range, i);
1211 : else
1212 : {
1213 : Assert(classes_groups[class] == SPLIT_RIGHT);
1214 3526 : PLACE_RIGHT(range, i);
1215 : }
1216 : }
1217 :
1218 26 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1219 26 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1220 26 : }
1221 :
1222 : /*
1223 : * Sorting based split. First half of entries according to the sort will be
1224 : * placed to one page, and second half of entries will be placed to other
1225 : * page. use_upper_bound parameter indicates whether to use upper or lower
1226 : * bound for sorting.
1227 : */
1228 : static void
1229 0 : range_gist_single_sorting_split(TypeCacheEntry *typcache,
1230 : GistEntryVector *entryvec,
1231 : GIST_SPLITVEC *v,
1232 : bool use_upper_bound)
1233 : {
1234 : SingleBoundSortItem *sortItems;
1235 0 : RangeType *left_range = NULL;
1236 0 : RangeType *right_range = NULL;
1237 : OffsetNumber i,
1238 : maxoff,
1239 : split_idx;
1240 :
1241 0 : maxoff = entryvec->n - 1;
1242 :
1243 : sortItems = (SingleBoundSortItem *)
1244 0 : palloc(maxoff * sizeof(SingleBoundSortItem));
1245 :
1246 : /*
1247 : * Prepare auxiliary array and sort the values.
1248 : */
1249 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1250 : {
1251 0 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1252 : RangeBound bound2;
1253 : bool empty;
1254 :
1255 0 : sortItems[i - 1].index = i;
1256 : /* Put appropriate bound into array */
1257 0 : if (use_upper_bound)
1258 0 : range_deserialize(typcache, range, &bound2,
1259 0 : &sortItems[i - 1].bound, &empty);
1260 : else
1261 0 : range_deserialize(typcache, range, &sortItems[i - 1].bound,
1262 : &bound2, &empty);
1263 : Assert(!empty);
1264 : }
1265 :
1266 0 : qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
1267 : single_bound_cmp, typcache);
1268 :
1269 0 : split_idx = maxoff / 2;
1270 :
1271 0 : v->spl_nleft = 0;
1272 0 : v->spl_nright = 0;
1273 :
1274 0 : for (i = 0; i < maxoff; i++)
1275 : {
1276 0 : int idx = sortItems[i].index;
1277 0 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1278 :
1279 0 : if (i < split_idx)
1280 0 : PLACE_LEFT(range, idx);
1281 : else
1282 0 : PLACE_RIGHT(range, idx);
1283 : }
1284 :
1285 0 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1286 0 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1287 0 : }
1288 :
1289 : /*
1290 : * Double sorting split algorithm.
1291 : *
1292 : * The algorithm considers dividing ranges into two groups. The first (left)
1293 : * group contains general left bound. The second (right) group contains
1294 : * general right bound. The challenge is to find upper bound of left group
1295 : * and lower bound of right group so that overlap of groups is minimal and
1296 : * ratio of distribution is acceptable. Algorithm finds for each lower bound of
1297 : * right group minimal upper bound of left group, and for each upper bound of
1298 : * left group maximal lower bound of right group. For each found pair
1299 : * range_gist_consider_split considers replacement of currently selected
1300 : * split with the new one.
1301 : *
1302 : * After that, all the entries are divided into three groups:
1303 : * 1) Entries which should be placed to the left group
1304 : * 2) Entries which should be placed to the right group
1305 : * 3) "Common entries" which can be placed to either group without affecting
1306 : * amount of overlap.
1307 : *
1308 : * The common ranges are distributed by difference of distance from lower
1309 : * bound of common range to lower bound of right group and distance from upper
1310 : * bound of common range to upper bound of left group.
1311 : *
1312 : * For details see:
1313 : * "A new double sorting-based node splitting algorithm for R-tree",
1314 : * A. Korotkov
1315 : * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
1316 : */
1317 : static void
1318 474 : range_gist_double_sorting_split(TypeCacheEntry *typcache,
1319 : GistEntryVector *entryvec,
1320 : GIST_SPLITVEC *v)
1321 : {
1322 : ConsiderSplitContext context;
1323 : OffsetNumber i,
1324 : maxoff;
1325 474 : RangeType *left_range = NULL,
1326 474 : *right_range = NULL;
1327 : int common_entries_count;
1328 : NonEmptyRange *by_lower,
1329 : *by_upper;
1330 : CommonEntry *common_entries;
1331 : int nentries,
1332 : i1,
1333 : i2;
1334 : RangeBound *right_lower,
1335 : *left_upper;
1336 :
1337 474 : memset(&context, 0, sizeof(ConsiderSplitContext));
1338 474 : context.typcache = typcache;
1339 474 : context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
1340 :
1341 474 : maxoff = entryvec->n - 1;
1342 474 : nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
1343 474 : context.first = true;
1344 :
1345 : /* Allocate arrays for sorted range bounds */
1346 474 : by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1347 474 : by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1348 :
1349 : /* Fill arrays of bounds */
1350 129342 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1351 : {
1352 128868 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1353 : bool empty;
1354 :
1355 128868 : range_deserialize(typcache, range,
1356 128868 : &by_lower[i - FirstOffsetNumber].lower,
1357 128868 : &by_lower[i - FirstOffsetNumber].upper,
1358 : &empty);
1359 : Assert(!empty);
1360 : }
1361 :
1362 : /*
1363 : * Make two arrays of range bounds: one sorted by lower bound and another
1364 : * sorted by upper bound.
1365 : */
1366 474 : memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
1367 474 : qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
1368 : interval_cmp_lower, typcache);
1369 474 : qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
1370 : interval_cmp_upper, typcache);
1371 :
1372 : /*----------
1373 : * The goal is to form a left and right range, so that every entry
1374 : * range is contained by either left or right interval (or both).
1375 : *
1376 : * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
1377 : *
1378 : * 0 1 2 3 4
1379 : * +-+
1380 : * +---+
1381 : * +-+
1382 : * +---+
1383 : *
1384 : * The left and right ranges are of the form (0,a) and (b,4).
1385 : * We first consider splits where b is the lower bound of an entry.
1386 : * We iterate through all entries, and for each b, calculate the
1387 : * smallest possible a. Then we consider splits where a is the
1388 : * upper bound of an entry, and for each a, calculate the greatest
1389 : * possible b.
1390 : *
1391 : * In the above example, the first loop would consider splits:
1392 : * b=0: (0,1)-(0,4)
1393 : * b=1: (0,1)-(1,4)
1394 : * b=2: (0,3)-(2,4)
1395 : *
1396 : * And the second loop:
1397 : * a=1: (0,1)-(1,4)
1398 : * a=3: (0,3)-(2,4)
1399 : * a=4: (0,4)-(2,4)
1400 : *----------
1401 : */
1402 :
1403 : /*
1404 : * Iterate over lower bound of right group, finding smallest possible
1405 : * upper bound of left group.
1406 : */
1407 474 : i1 = 0;
1408 474 : i2 = 0;
1409 474 : right_lower = &by_lower[i1].lower;
1410 474 : left_upper = &by_upper[i2].lower;
1411 : while (true)
1412 : {
1413 : /*
1414 : * Find next lower bound of right group.
1415 : */
1416 512418 : while (i1 < nentries &&
1417 255972 : range_cmp_bounds(typcache, right_lower,
1418 255972 : &by_lower[i1].lower) == 0)
1419 : {
1420 128868 : if (range_cmp_bounds(typcache, &by_lower[i1].upper,
1421 : left_upper) > 0)
1422 109758 : left_upper = &by_lower[i1].upper;
1423 128868 : i1++;
1424 : }
1425 127578 : if (i1 >= nentries)
1426 474 : break;
1427 127104 : right_lower = &by_lower[i1].lower;
1428 :
1429 : /*
1430 : * Find count of ranges which anyway should be placed to the left
1431 : * group.
1432 : */
1433 493578 : while (i2 < nentries &&
1434 237876 : range_cmp_bounds(typcache, &by_upper[i2].upper,
1435 : left_upper) <= 0)
1436 128598 : i2++;
1437 :
1438 : /*
1439 : * Consider found split to see if it's better than what we had.
1440 : */
1441 127104 : range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
1442 : }
1443 :
1444 : /*
1445 : * Iterate over upper bound of left group finding greatest possible lower
1446 : * bound of right group.
1447 : */
1448 474 : i1 = nentries - 1;
1449 474 : i2 = nentries - 1;
1450 474 : right_lower = &by_lower[i1].upper;
1451 474 : left_upper = &by_upper[i2].upper;
1452 : while (true)
1453 : {
1454 : /*
1455 : * Find next upper bound of left group.
1456 : */
1457 514998 : while (i2 >= 0 &&
1458 257262 : range_cmp_bounds(typcache, left_upper,
1459 257262 : &by_upper[i2].upper) == 0)
1460 : {
1461 128868 : if (range_cmp_bounds(typcache, &by_upper[i2].lower,
1462 : right_lower) < 0)
1463 109752 : right_lower = &by_upper[i2].lower;
1464 128868 : i2--;
1465 : }
1466 128868 : if (i2 < 0)
1467 474 : break;
1468 128394 : left_upper = &by_upper[i2].upper;
1469 :
1470 : /*
1471 : * Find count of intervals which anyway should be placed to the right
1472 : * group.
1473 : */
1474 494868 : while (i1 >= 0 &&
1475 237876 : range_cmp_bounds(typcache, &by_lower[i1].lower,
1476 : right_lower) >= 0)
1477 128598 : i1--;
1478 :
1479 : /*
1480 : * Consider found split to see if it's better than what we had.
1481 : */
1482 128394 : range_gist_consider_split(&context, right_lower, i1 + 1,
1483 : left_upper, i2 + 1);
1484 : }
1485 :
1486 : /*
1487 : * If we failed to find any acceptable splits, use trivial split.
1488 : */
1489 474 : if (context.first)
1490 : {
1491 0 : range_gist_fallback_split(typcache, entryvec, v);
1492 0 : return;
1493 : }
1494 :
1495 : /*
1496 : * Ok, we have now selected bounds of the groups. Now we have to
1497 : * distribute entries themselves. At first we distribute entries which can
1498 : * be placed unambiguously and collect "common entries" to array.
1499 : */
1500 :
1501 : /* Allocate vectors for results */
1502 474 : v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1503 474 : v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1504 474 : v->spl_nleft = 0;
1505 474 : v->spl_nright = 0;
1506 :
1507 : /*
1508 : * Allocate an array for "common entries" - entries which can be placed to
1509 : * either group without affecting overlap along selected axis.
1510 : */
1511 474 : common_entries_count = 0;
1512 474 : common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1513 :
1514 : /*
1515 : * Distribute entries which can be distributed unambiguously, and collect
1516 : * common entries.
1517 : */
1518 129342 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1519 : {
1520 : RangeType *range;
1521 : RangeBound lower,
1522 : upper;
1523 : bool empty;
1524 :
1525 : /*
1526 : * Get upper and lower bounds along selected axis.
1527 : */
1528 128868 : range = DatumGetRangeTypeP(entryvec->vector[i].key);
1529 :
1530 128868 : range_deserialize(typcache, range, &lower, &upper, &empty);
1531 :
1532 128868 : if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
1533 : {
1534 : /* Fits in the left group */
1535 59466 : if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
1536 : {
1537 : /* Fits also in the right group, so "common entry" */
1538 11706 : common_entries[common_entries_count].index = i;
1539 11706 : if (context.has_subtype_diff)
1540 : {
1541 : /*
1542 : * delta = (lower - context.right_lower) -
1543 : * (context.left_upper - upper)
1544 : */
1545 11706 : common_entries[common_entries_count].delta =
1546 11706 : call_subtype_diff(typcache,
1547 : lower.val,
1548 23412 : context.right_lower->val) -
1549 11706 : call_subtype_diff(typcache,
1550 11706 : context.left_upper->val,
1551 : upper.val);
1552 : }
1553 : else
1554 : {
1555 : /* Without subtype_diff, take all deltas as zero */
1556 0 : common_entries[common_entries_count].delta = 0;
1557 : }
1558 11706 : common_entries_count++;
1559 : }
1560 : else
1561 : {
1562 : /* Doesn't fit to the right group, so join to the left group */
1563 47760 : PLACE_LEFT(range, i);
1564 : }
1565 : }
1566 : else
1567 : {
1568 : /*
1569 : * Each entry should fit on either left or right group. Since this
1570 : * entry didn't fit in the left group, it better fit in the right
1571 : * group.
1572 : */
1573 : Assert(range_cmp_bounds(typcache, &lower,
1574 : context.right_lower) >= 0);
1575 69402 : PLACE_RIGHT(range, i);
1576 : }
1577 : }
1578 :
1579 : /*
1580 : * Distribute "common entries", if any.
1581 : */
1582 474 : if (common_entries_count > 0)
1583 : {
1584 : /*
1585 : * Sort "common entries" by calculated deltas in order to distribute
1586 : * the most ambiguous entries first.
1587 : */
1588 204 : qsort(common_entries, common_entries_count, sizeof(CommonEntry),
1589 : common_entry_cmp);
1590 :
1591 : /*
1592 : * Distribute "common entries" between groups according to sorting.
1593 : */
1594 11910 : for (i = 0; i < common_entries_count; i++)
1595 : {
1596 : RangeType *range;
1597 11706 : int idx = common_entries[i].index;
1598 :
1599 11706 : range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1600 :
1601 : /*
1602 : * Check if we have to place this entry in either group to achieve
1603 : * LIMIT_RATIO.
1604 : */
1605 11706 : if (i < context.common_left)
1606 0 : PLACE_LEFT(range, idx);
1607 : else
1608 11706 : PLACE_RIGHT(range, idx);
1609 : }
1610 : }
1611 :
1612 474 : v->spl_ldatum = PointerGetDatum(left_range);
1613 474 : v->spl_rdatum = PointerGetDatum(right_range);
1614 : }
1615 :
1616 : /*
1617 : * Consider replacement of currently selected split with a better one
1618 : * during range_gist_double_sorting_split.
1619 : */
1620 : static void
1621 255498 : range_gist_consider_split(ConsiderSplitContext *context,
1622 : RangeBound *right_lower, int min_left_count,
1623 : RangeBound *left_upper, int max_left_count)
1624 : {
1625 : int left_count,
1626 : right_count;
1627 : float4 ratio,
1628 : overlap;
1629 :
1630 : /*
1631 : * Calculate entries distribution ratio assuming most uniform distribution
1632 : * of common entries.
1633 : */
1634 255498 : if (min_left_count >= (context->entries_count + 1) / 2)
1635 112838 : left_count = min_left_count;
1636 142660 : else if (max_left_count <= context->entries_count / 2)
1637 111906 : left_count = max_left_count;
1638 : else
1639 30754 : left_count = context->entries_count / 2;
1640 255498 : right_count = context->entries_count - left_count;
1641 :
1642 : /*
1643 : * Ratio of split: quotient between size of smaller group and total
1644 : * entries count. This is necessarily 0.5 or less; if it's less than
1645 : * LIMIT_RATIO then we will never accept the new split.
1646 : */
1647 255498 : ratio = ((float4) Min(left_count, right_count)) /
1648 255498 : ((float4) context->entries_count);
1649 :
1650 255498 : if (ratio > LIMIT_RATIO)
1651 : {
1652 125222 : bool selectthis = false;
1653 :
1654 : /*
1655 : * The ratio is acceptable, so compare current split with previously
1656 : * selected one. We search for minimal overlap (allowing negative
1657 : * values) and minimal ratio secondarily. If subtype_diff is
1658 : * available, it's used for overlap measure. Without subtype_diff we
1659 : * use number of "common entries" as an overlap measure.
1660 : */
1661 125222 : if (context->has_subtype_diff)
1662 125222 : overlap = call_subtype_diff(context->typcache,
1663 : left_upper->val,
1664 : right_lower->val);
1665 : else
1666 0 : overlap = max_left_count - min_left_count;
1667 :
1668 : /* If there is no previous selection, select this split */
1669 125222 : if (context->first)
1670 474 : selectthis = true;
1671 : else
1672 : {
1673 : /*
1674 : * Choose the new split if it has a smaller overlap, or same
1675 : * overlap but better ratio.
1676 : */
1677 124748 : if (overlap < context->overlap ||
1678 111036 : (overlap == context->overlap && ratio > context->ratio))
1679 34586 : selectthis = true;
1680 : }
1681 :
1682 125222 : if (selectthis)
1683 : {
1684 : /* save information about selected split */
1685 35060 : context->first = false;
1686 35060 : context->ratio = ratio;
1687 35060 : context->overlap = overlap;
1688 35060 : context->right_lower = right_lower;
1689 35060 : context->left_upper = left_upper;
1690 35060 : context->common_left = max_left_count - left_count;
1691 35060 : context->common_right = left_count - min_left_count;
1692 : }
1693 : }
1694 255498 : }
1695 :
1696 : /*
1697 : * Find class number for range.
1698 : *
1699 : * The class number is a valid combination of the properties of the
1700 : * range. Note: the highest possible number is 8, because CLS_EMPTY
1701 : * can't be combined with anything else.
1702 : */
1703 : static int
1704 161092 : get_gist_range_class(RangeType *range)
1705 : {
1706 : int classNumber;
1707 : char flags;
1708 :
1709 161092 : flags = range_get_flags(range);
1710 161092 : if (flags & RANGE_EMPTY)
1711 : {
1712 23016 : classNumber = CLS_EMPTY;
1713 : }
1714 : else
1715 : {
1716 138076 : classNumber = 0;
1717 138076 : if (flags & RANGE_LB_INF)
1718 1600 : classNumber |= CLS_LOWER_INF;
1719 138076 : if (flags & RANGE_UB_INF)
1720 556 : classNumber |= CLS_UPPER_INF;
1721 138076 : if (flags & RANGE_CONTAIN_EMPTY)
1722 0 : classNumber |= CLS_CONTAIN_EMPTY;
1723 : }
1724 161092 : return classNumber;
1725 : }
1726 :
1727 : /*
1728 : * Comparison function for range_gist_single_sorting_split.
1729 : */
1730 : static int
1731 0 : single_bound_cmp(const void *a, const void *b, void *arg)
1732 : {
1733 0 : SingleBoundSortItem *i1 = (SingleBoundSortItem *) a;
1734 0 : SingleBoundSortItem *i2 = (SingleBoundSortItem *) b;
1735 0 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1736 :
1737 0 : return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
1738 : }
1739 :
1740 : /*
1741 : * Compare NonEmptyRanges by lower bound.
1742 : */
1743 : static int
1744 513130 : interval_cmp_lower(const void *a, const void *b, void *arg)
1745 : {
1746 513130 : NonEmptyRange *i1 = (NonEmptyRange *) a;
1747 513130 : NonEmptyRange *i2 = (NonEmptyRange *) b;
1748 513130 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1749 :
1750 513130 : return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
1751 : }
1752 :
1753 : /*
1754 : * Compare NonEmptyRanges by upper bound.
1755 : */
1756 : static int
1757 481248 : interval_cmp_upper(const void *a, const void *b, void *arg)
1758 : {
1759 481248 : NonEmptyRange *i1 = (NonEmptyRange *) a;
1760 481248 : NonEmptyRange *i2 = (NonEmptyRange *) b;
1761 481248 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1762 :
1763 481248 : return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
1764 : }
1765 :
1766 : /*
1767 : * Compare CommonEntrys by their deltas.
1768 : */
1769 : static int
1770 11502 : common_entry_cmp(const void *i1, const void *i2)
1771 : {
1772 11502 : double delta1 = ((CommonEntry *) i1)->delta;
1773 11502 : double delta2 = ((CommonEntry *) i2)->delta;
1774 :
1775 11502 : if (delta1 < delta2)
1776 11502 : return -1;
1777 0 : else if (delta1 > delta2)
1778 0 : return 1;
1779 : else
1780 0 : return 0;
1781 : }
1782 :
1783 : /*
1784 : * Convenience function to invoke type-specific subtype_diff function.
1785 : * Caller must have already checked that there is one for the range type.
1786 : */
1787 : static float8
1788 1135386 : call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
1789 : {
1790 : float8 value;
1791 :
1792 1135386 : value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
1793 : typcache->rng_collation,
1794 : val1, val2));
1795 : /* Cope with buggy subtype_diff function by returning zero */
1796 1135386 : if (value >= 0.0)
1797 1135386 : return value;
1798 0 : return 0.0;
1799 : }
|