Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * rangetypes_gist.c
4 : * GiST support for range types.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/utils/adt/rangetypes_gist.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist.h"
18 : #include "access/stratnum.h"
19 : #include "utils/datum.h"
20 : #include "utils/float.h"
21 : #include "utils/fmgrprotos.h"
22 : #include "utils/multirangetypes.h"
23 : #include "utils/rangetypes.h"
24 :
25 : /*
26 : * Range class properties used to segregate different classes of ranges in
27 : * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot
28 : * be combined with anything else.
29 : */
30 : #define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */
31 : #define CLS_LOWER_INF 1 /* Lower bound is infinity */
32 : #define CLS_UPPER_INF 2 /* Upper bound is infinity */
33 : #define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */
34 : #define CLS_EMPTY 8 /* Special class for empty ranges */
35 :
36 : #define CLS_COUNT 9 /* # of classes; includes all combinations of
37 : * properties. CLS_EMPTY doesn't combine with
38 : * anything else, so it's only 2^3 + 1. */
39 :
40 : /*
41 : * Minimum accepted ratio of split for items of the same class. If the items
42 : * are of different classes, we will separate along those lines regardless of
43 : * the ratio.
44 : */
45 : #define LIMIT_RATIO 0.3
46 :
47 : /* Constants for fixed penalty values */
48 : #define INFINITE_BOUND_PENALTY 2.0
49 : #define CONTAIN_EMPTY_PENALTY 1.0
50 : #define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0
51 :
52 : /*
53 : * Per-item data for range_gist_single_sorting_split.
54 : */
55 : typedef struct
56 : {
57 : int index;
58 : RangeBound bound;
59 : } SingleBoundSortItem;
60 :
61 : /* place on left or right side of split? */
62 : typedef enum
63 : {
64 : SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */
65 : SPLIT_RIGHT,
66 : } SplitLR;
67 :
68 : /*
69 : * Context for range_gist_consider_split.
70 : */
71 : typedef struct
72 : {
73 : TypeCacheEntry *typcache; /* typcache for range type */
74 : bool has_subtype_diff; /* does it have subtype_diff? */
75 : int entries_count; /* total number of entries being split */
76 :
77 : /* Information about currently selected split follows */
78 :
79 : bool first; /* true if no split was selected yet */
80 :
81 : RangeBound *left_upper; /* upper bound of left interval */
82 : RangeBound *right_lower; /* lower bound of right interval */
83 :
84 : float4 ratio; /* split ratio */
85 : float4 overlap; /* overlap between left and right predicate */
86 : int common_left; /* # common entries destined for each side */
87 : int common_right;
88 : } ConsiderSplitContext;
89 :
90 : /*
91 : * Bounds extracted from a non-empty range, for use in
92 : * range_gist_double_sorting_split.
93 : */
94 : typedef struct
95 : {
96 : RangeBound lower;
97 : RangeBound upper;
98 : } NonEmptyRange;
99 :
100 : /*
101 : * Represents information about an entry that can be placed in either group
102 : * without affecting overlap over selected axis ("common entry").
103 : */
104 : typedef struct
105 : {
106 : /* Index of entry in the initial array */
107 : int index;
108 : /* Delta between closeness of range to each of the two groups */
109 : double delta;
110 : } CommonEntry;
111 :
112 : /* Helper macros to place an entry in the left or right group during split */
113 : /* Note direct access to variables v, typcache, left_range, right_range */
114 : #define PLACE_LEFT(range, off) \
115 : do { \
116 : if (v->spl_nleft > 0) \
117 : left_range = range_super_union(typcache, left_range, range); \
118 : else \
119 : left_range = (range); \
120 : v->spl_left[v->spl_nleft++] = (off); \
121 : } while(0)
122 :
123 : #define PLACE_RIGHT(range, off) \
124 : do { \
125 : if (v->spl_nright > 0) \
126 : right_range = range_super_union(typcache, right_range, range); \
127 : else \
128 : right_range = (range); \
129 : v->spl_right[v->spl_nright++] = (off); \
130 : } while(0)
131 :
132 : /* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
133 : #define rangeCopy(r) \
134 : ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
135 : false, -1)))
136 :
137 : static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
138 : RangeType *r2);
139 : static bool range_gist_consistent_int_range(TypeCacheEntry *typcache,
140 : StrategyNumber strategy,
141 : const RangeType *key,
142 : const RangeType *query);
143 : static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
144 : StrategyNumber strategy,
145 : const RangeType *key,
146 : const MultirangeType *query);
147 : static bool range_gist_consistent_int_element(TypeCacheEntry *typcache,
148 : StrategyNumber strategy,
149 : const RangeType *key,
150 : Datum query);
151 : static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
152 : StrategyNumber strategy,
153 : const RangeType *key,
154 : const RangeType *query);
155 : static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
156 : StrategyNumber strategy,
157 : const RangeType *key,
158 : const MultirangeType *query);
159 : static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
160 : StrategyNumber strategy,
161 : const RangeType *key,
162 : Datum query);
163 : static void range_gist_fallback_split(TypeCacheEntry *typcache,
164 : GistEntryVector *entryvec,
165 : GIST_SPLITVEC *v);
166 : static void range_gist_class_split(TypeCacheEntry *typcache,
167 : GistEntryVector *entryvec,
168 : GIST_SPLITVEC *v,
169 : SplitLR *classes_groups);
170 : static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
171 : GistEntryVector *entryvec,
172 : GIST_SPLITVEC *v,
173 : bool use_upper_bound);
174 : static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
175 : GistEntryVector *entryvec,
176 : GIST_SPLITVEC *v);
177 : static void range_gist_consider_split(ConsiderSplitContext *context,
178 : RangeBound *right_lower, int min_left_count,
179 : RangeBound *left_upper, int max_left_count);
180 : static int get_gist_range_class(RangeType *range);
181 : static int single_bound_cmp(const void *a, const void *b, void *arg);
182 : static int interval_cmp_lower(const void *a, const void *b, void *arg);
183 : static int interval_cmp_upper(const void *a, const void *b, void *arg);
184 : static int common_entry_cmp(const void *i1, const void *i2);
185 : static float8 call_subtype_diff(TypeCacheEntry *typcache,
186 : Datum val1, Datum val2);
187 :
188 :
189 : /* GiST query consistency check */
190 : Datum
191 653946 : range_gist_consistent(PG_FUNCTION_ARGS)
192 : {
193 653946 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
194 653946 : Datum query = PG_GETARG_DATUM(1);
195 653946 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
196 : bool result;
197 653946 : Oid subtype = PG_GETARG_OID(3);
198 653946 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
199 653946 : RangeType *key = DatumGetRangeTypeP(entry->key);
200 : TypeCacheEntry *typcache;
201 :
202 : /* All operators served by this function are exact */
203 653946 : *recheck = false;
204 :
205 653946 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
206 :
207 : /*
208 : * Perform consistent checking using function corresponding to key type
209 : * (leaf or internal) and query subtype (range, multirange, or element).
210 : * Note that invalid subtype means that query type matches key type
211 : * (range).
212 : */
213 653946 : if (GIST_LEAF(entry))
214 : {
215 646026 : if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
216 330188 : result = range_gist_consistent_leaf_range(typcache, strategy, key,
217 330188 : DatumGetRangeTypeP(query));
218 315838 : else if (subtype == ANYMULTIRANGEOID)
219 307262 : result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
220 307262 : DatumGetMultirangeTypeP(query));
221 : else
222 8576 : result = range_gist_consistent_leaf_element(typcache, strategy,
223 : key, query);
224 : }
225 : else
226 : {
227 7920 : if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
228 3960 : result = range_gist_consistent_int_range(typcache, strategy, key,
229 3960 : DatumGetRangeTypeP(query));
230 3960 : else if (subtype == ANYMULTIRANGEOID)
231 3564 : result = range_gist_consistent_int_multirange(typcache, strategy, key,
232 3564 : DatumGetMultirangeTypeP(query));
233 : else
234 396 : result = range_gist_consistent_int_element(typcache, strategy,
235 : key, query);
236 : }
237 653946 : PG_RETURN_BOOL(result);
238 : }
239 :
240 : /*
241 : * GiST compress method for multiranges: multirange is approximated as union
242 : * range with no gaps.
243 : */
244 : Datum
245 40462 : multirange_gist_compress(PG_FUNCTION_ARGS)
246 : {
247 40462 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
248 :
249 40462 : if (entry->leafkey)
250 : {
251 23196 : MultirangeType *mr = DatumGetMultirangeTypeP(entry->key);
252 : RangeType *r;
253 : TypeCacheEntry *typcache;
254 23196 : GISTENTRY *retval = palloc(sizeof(GISTENTRY));
255 :
256 23196 : typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
257 23196 : r = multirange_get_union_range(typcache->rngtype, mr);
258 :
259 23196 : gistentryinit(*retval, RangeTypePGetDatum(r),
260 : entry->rel, entry->page, entry->offset, false);
261 :
262 23196 : PG_RETURN_POINTER(retval);
263 : }
264 :
265 17266 : PG_RETURN_POINTER(entry);
266 : }
267 :
268 : /* GiST query consistency check for multiranges */
269 : Datum
270 275006 : multirange_gist_consistent(PG_FUNCTION_ARGS)
271 : {
272 275006 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
273 275006 : Datum query = PG_GETARG_DATUM(1);
274 275006 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
275 : bool result;
276 275006 : Oid subtype = PG_GETARG_OID(3);
277 275006 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
278 275006 : RangeType *key = DatumGetRangeTypeP(entry->key);
279 : TypeCacheEntry *typcache;
280 :
281 : /*
282 : * All operators served by this function are inexact because multirange is
283 : * approximated by union range with no gaps.
284 : */
285 275006 : *recheck = true;
286 :
287 275006 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
288 :
289 : /*
290 : * Perform consistent checking using function corresponding to key type
291 : * (leaf or internal) and query subtype (range, multirange, or element).
292 : * Note that invalid subtype means that query type matches key type
293 : * (multirange).
294 : */
295 275006 : if (GIST_LEAF(entry))
296 : {
297 268850 : if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
298 148720 : result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
299 148720 : DatumGetMultirangeTypeP(query));
300 120130 : else if (subtype == ANYRANGEOID)
301 117444 : result = range_gist_consistent_leaf_range(typcache, strategy, key,
302 117444 : DatumGetRangeTypeP(query));
303 : else
304 2686 : result = range_gist_consistent_leaf_element(typcache, strategy,
305 : key, query);
306 : }
307 : else
308 : {
309 6156 : if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
310 3240 : result = range_gist_consistent_int_multirange(typcache, strategy, key,
311 3240 : DatumGetMultirangeTypeP(query));
312 2916 : else if (subtype == ANYRANGEOID)
313 2754 : result = range_gist_consistent_int_range(typcache, strategy, key,
314 2754 : DatumGetRangeTypeP(query));
315 : else
316 162 : result = range_gist_consistent_int_element(typcache, strategy,
317 : key, query);
318 : }
319 275006 : PG_RETURN_BOOL(result);
320 : }
321 :
322 : /* form union range */
323 : Datum
324 91888 : range_gist_union(PG_FUNCTION_ARGS)
325 : {
326 91888 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
327 91888 : GISTENTRY *ent = entryvec->vector;
328 : RangeType *result_range;
329 : TypeCacheEntry *typcache;
330 : int i;
331 :
332 91888 : result_range = DatumGetRangeTypeP(ent[0].key);
333 :
334 91888 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
335 :
336 208952 : for (i = 1; i < entryvec->n; i++)
337 : {
338 117064 : result_range = range_super_union(typcache, result_range,
339 117064 : DatumGetRangeTypeP(ent[i].key));
340 : }
341 :
342 91888 : PG_RETURN_RANGE_P(result_range);
343 : }
344 :
345 : /*
346 : * We store ranges as ranges in GiST indexes, so we do not need
347 : * compress, decompress, or fetch functions. Note this implies a limit
348 : * on the size of range values that can be indexed.
349 : */
350 :
351 : /*
352 : * GiST page split penalty function.
353 : *
354 : * The penalty function has the following goals (in order from most to least
355 : * important):
356 : * - Keep normal ranges separate
357 : * - Avoid broadening the class of the original predicate
358 : * - Avoid broadening (as determined by subtype_diff) the original predicate
359 : * - Favor adding ranges to narrower original predicates
360 : */
361 : Datum
362 1251682 : range_gist_penalty(PG_FUNCTION_ARGS)
363 : {
364 1251682 : GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
365 1251682 : GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
366 1251682 : float *penalty = (float *) PG_GETARG_POINTER(2);
367 1251682 : RangeType *orig = DatumGetRangeTypeP(origentry->key);
368 1251682 : RangeType *new = DatumGetRangeTypeP(newentry->key);
369 : TypeCacheEntry *typcache;
370 : bool has_subtype_diff;
371 : RangeBound orig_lower,
372 : new_lower,
373 : orig_upper,
374 : new_upper;
375 : bool orig_empty,
376 : new_empty;
377 :
378 1251682 : if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
379 0 : elog(ERROR, "range types do not match");
380 :
381 1251682 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
382 :
383 1251682 : has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
384 :
385 1251682 : range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
386 1251682 : range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
387 :
388 : /*
389 : * Distinct branches for handling distinct classes of ranges. Note that
390 : * penalty values only need to be commensurate within the same class of
391 : * new range.
392 : */
393 1251682 : if (new_empty)
394 : {
395 : /* Handle insertion of empty range */
396 255106 : if (orig_empty)
397 : {
398 : /*
399 : * The best case is to insert it to empty original range.
400 : * Insertion here means no broadening of original range. Also
401 : * original range is the most narrow.
402 : */
403 16618 : *penalty = 0.0;
404 : }
405 238488 : else if (RangeIsOrContainsEmpty(orig))
406 : {
407 : /*
408 : * The second case is to insert empty range into range which
409 : * contains at least one underlying empty range. There is still
410 : * no broadening of original range, but original range is not as
411 : * narrow as possible.
412 : */
413 3414 : *penalty = CONTAIN_EMPTY_PENALTY;
414 : }
415 235074 : else if (orig_lower.infinite && orig_upper.infinite)
416 : {
417 : /*
418 : * Original range requires broadening. (-inf; +inf) is most far
419 : * from normal range in this case.
420 : */
421 0 : *penalty = 2 * CONTAIN_EMPTY_PENALTY;
422 : }
423 235074 : else if (orig_lower.infinite || orig_upper.infinite)
424 : {
425 : /*
426 : * (-inf, x) or (x, +inf) original ranges are closer to normal
427 : * ranges, so it's worse to mix it with empty ranges.
428 : */
429 0 : *penalty = 3 * CONTAIN_EMPTY_PENALTY;
430 : }
431 : else
432 : {
433 : /*
434 : * The least preferred case is broadening of normal range.
435 : */
436 235074 : *penalty = 4 * CONTAIN_EMPTY_PENALTY;
437 : }
438 : }
439 996576 : else if (new_lower.infinite && new_upper.infinite)
440 : {
441 : /* Handle insertion of (-inf, +inf) range */
442 0 : if (orig_lower.infinite && orig_upper.infinite)
443 : {
444 : /*
445 : * Best case is inserting to (-inf, +inf) original range.
446 : */
447 0 : *penalty = 0.0;
448 : }
449 0 : else if (orig_lower.infinite || orig_upper.infinite)
450 : {
451 : /*
452 : * When original range is (-inf, x) or (x, +inf) it requires
453 : * broadening of original range (extension of one bound to
454 : * infinity).
455 : */
456 0 : *penalty = INFINITE_BOUND_PENALTY;
457 : }
458 : else
459 : {
460 : /*
461 : * Insertion to normal original range is least preferred.
462 : */
463 0 : *penalty = 2 * INFINITE_BOUND_PENALTY;
464 : }
465 :
466 0 : if (RangeIsOrContainsEmpty(orig))
467 : {
468 : /*
469 : * Original range is narrower when it doesn't contain empty
470 : * ranges. Add additional penalty otherwise.
471 : */
472 0 : *penalty += CONTAIN_EMPTY_PENALTY;
473 : }
474 : }
475 996576 : else if (new_lower.infinite)
476 : {
477 : /* Handle insertion of (-inf, x) range */
478 41902 : if (!orig_empty && orig_lower.infinite)
479 : {
480 1782 : if (orig_upper.infinite)
481 : {
482 : /*
483 : * (-inf, +inf) range won't be extended by insertion of (-inf,
484 : * x) range. It's a less desirable case than insertion to
485 : * (-inf, y) original range without extension, because in that
486 : * case original range is narrower. But we can't express that
487 : * in single float value.
488 : */
489 0 : *penalty = 0.0;
490 : }
491 : else
492 : {
493 1782 : if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
494 : {
495 : /*
496 : * Get extension of original range using subtype_diff. Use
497 : * constant if subtype_diff unavailable.
498 : */
499 1280 : if (has_subtype_diff)
500 1280 : *penalty = call_subtype_diff(typcache,
501 : new_upper.val,
502 : orig_upper.val);
503 : else
504 0 : *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
505 : }
506 : else
507 : {
508 : /* No extension of original range */
509 502 : *penalty = 0.0;
510 : }
511 : }
512 : }
513 : else
514 : {
515 : /*
516 : * If lower bound of original range is not -inf, then extension of
517 : * it is infinity.
518 : */
519 40120 : *penalty = get_float4_infinity();
520 : }
521 : }
522 954674 : else if (new_upper.infinite)
523 : {
524 : /* Handle insertion of (x, +inf) range */
525 30390 : if (!orig_empty && orig_upper.infinite)
526 : {
527 1782 : if (orig_lower.infinite)
528 : {
529 : /*
530 : * (-inf, +inf) range won't be extended by insertion of (x,
531 : * +inf) range. It's a less desirable case than insertion to
532 : * (y, +inf) original range without extension, because in that
533 : * case original range is narrower. But we can't express that
534 : * in single float value.
535 : */
536 396 : *penalty = 0.0;
537 : }
538 : else
539 : {
540 1386 : if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
541 : {
542 : /*
543 : * Get extension of original range using subtype_diff. Use
544 : * constant if subtype_diff unavailable.
545 : */
546 0 : if (has_subtype_diff)
547 0 : *penalty = call_subtype_diff(typcache,
548 : orig_lower.val,
549 : new_lower.val);
550 : else
551 0 : *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
552 : }
553 : else
554 : {
555 : /* No extension of original range */
556 1386 : *penalty = 0.0;
557 : }
558 : }
559 : }
560 : else
561 : {
562 : /*
563 : * If upper bound of original range is not +inf, then extension of
564 : * it is infinity.
565 : */
566 28608 : *penalty = get_float4_infinity();
567 : }
568 : }
569 : else
570 : {
571 : /* Handle insertion of normal (non-empty, non-infinite) range */
572 924284 : if (orig_empty || orig_lower.infinite || orig_upper.infinite)
573 : {
574 : /*
575 : * Avoid mixing normal ranges with infinite and empty ranges.
576 : */
577 76140 : *penalty = get_float4_infinity();
578 : }
579 : else
580 : {
581 : /*
582 : * Calculate extension of original range by calling subtype_diff.
583 : * Use constant if subtype_diff unavailable.
584 : */
585 848144 : float8 diff = 0.0;
586 :
587 848144 : if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
588 : {
589 283268 : if (has_subtype_diff)
590 283268 : diff += call_subtype_diff(typcache,
591 : orig_lower.val,
592 : new_lower.val);
593 : else
594 0 : diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
595 : }
596 848144 : if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
597 : {
598 699662 : if (has_subtype_diff)
599 699662 : diff += call_subtype_diff(typcache,
600 : new_upper.val,
601 : orig_upper.val);
602 : else
603 0 : diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
604 : }
605 848144 : *penalty = diff;
606 : }
607 : }
608 :
609 1251682 : PG_RETURN_POINTER(penalty);
610 : }
611 :
612 : /*
613 : * The GiST PickSplit method for ranges
614 : *
615 : * Primarily, we try to segregate ranges of different classes. If splitting
616 : * ranges of the same class, use the appropriate split method for that class.
617 : */
618 : Datum
619 540 : range_gist_picksplit(PG_FUNCTION_ARGS)
620 : {
621 540 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
622 540 : GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
623 : TypeCacheEntry *typcache;
624 : OffsetNumber i;
625 : RangeType *pred_left;
626 : int nbytes;
627 : OffsetNumber maxoff;
628 : int count_in_classes[CLS_COUNT];
629 : int j;
630 540 : int non_empty_classes_count = 0;
631 540 : int biggest_class = -1;
632 540 : int biggest_class_count = 0;
633 : int total_count;
634 :
635 : /* use first item to look up range type's info */
636 540 : pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
637 540 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
638 :
639 540 : maxoff = entryvec->n - 1;
640 540 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
641 540 : v->spl_left = (OffsetNumber *) palloc(nbytes);
642 540 : v->spl_right = (OffsetNumber *) palloc(nbytes);
643 :
644 : /*
645 : * Get count distribution of range classes.
646 : */
647 540 : memset(count_in_classes, 0, sizeof(count_in_classes));
648 153072 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
649 : {
650 152532 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
651 :
652 152532 : count_in_classes[get_gist_range_class(range)]++;
653 : }
654 :
655 : /*
656 : * Count non-empty classes and find biggest class.
657 : */
658 540 : total_count = maxoff;
659 5400 : for (j = 0; j < CLS_COUNT; j++)
660 : {
661 4860 : if (count_in_classes[j] > 0)
662 : {
663 566 : if (count_in_classes[j] > biggest_class_count)
664 : {
665 558 : biggest_class_count = count_in_classes[j];
666 558 : biggest_class = j;
667 : }
668 566 : non_empty_classes_count++;
669 : }
670 : }
671 :
672 : Assert(non_empty_classes_count > 0);
673 :
674 540 : if (non_empty_classes_count == 1)
675 : {
676 : /* One non-empty class, so split inside class */
677 518 : if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
678 : {
679 : /* double sorting split for normal ranges */
680 476 : range_gist_double_sorting_split(typcache, entryvec, v);
681 : }
682 42 : else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
683 : {
684 : /* upper bound sorting split for (-inf, x) ranges */
685 0 : range_gist_single_sorting_split(typcache, entryvec, v, true);
686 : }
687 42 : else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
688 : {
689 : /* lower bound sorting split for (x, +inf) ranges */
690 0 : range_gist_single_sorting_split(typcache, entryvec, v, false);
691 : }
692 : else
693 : {
694 : /* trivial split for all (-inf, +inf) or all empty ranges */
695 42 : range_gist_fallback_split(typcache, entryvec, v);
696 : }
697 : }
698 : else
699 : {
700 : /*
701 : * Class based split.
702 : *
703 : * To which side of the split should each class go? Initialize them
704 : * all to go to the left side.
705 : */
706 : SplitLR classes_groups[CLS_COUNT];
707 :
708 22 : memset(classes_groups, 0, sizeof(classes_groups));
709 :
710 22 : if (count_in_classes[CLS_NORMAL] > 0)
711 : {
712 : /* separate normal ranges if any */
713 22 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
714 : }
715 : else
716 : {
717 : /*----------
718 : * Try to split classes in one of two ways:
719 : * 1) containing infinities - not containing infinities
720 : * 2) containing empty - not containing empty
721 : *
722 : * Select the way which balances the ranges between left and right
723 : * the best. If split in these ways is not possible, there are at
724 : * most 3 classes, so just separate biggest class.
725 : *----------
726 : */
727 : int infCount,
728 : nonInfCount;
729 : int emptyCount,
730 : nonEmptyCount;
731 :
732 0 : nonInfCount =
733 0 : count_in_classes[CLS_NORMAL] +
734 0 : count_in_classes[CLS_CONTAIN_EMPTY] +
735 0 : count_in_classes[CLS_EMPTY];
736 0 : infCount = total_count - nonInfCount;
737 :
738 0 : nonEmptyCount =
739 0 : count_in_classes[CLS_NORMAL] +
740 0 : count_in_classes[CLS_LOWER_INF] +
741 0 : count_in_classes[CLS_UPPER_INF] +
742 0 : count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
743 0 : emptyCount = total_count - nonEmptyCount;
744 :
745 0 : if (infCount > 0 && nonInfCount > 0 &&
746 0 : (abs(infCount - nonInfCount) <=
747 0 : abs(emptyCount - nonEmptyCount)))
748 : {
749 0 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
750 0 : classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
751 0 : classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
752 : }
753 0 : else if (emptyCount > 0 && nonEmptyCount > 0)
754 : {
755 0 : classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
756 0 : classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
757 0 : classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
758 0 : classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
759 : }
760 : else
761 : {
762 : /*
763 : * Either total_count == emptyCount or total_count ==
764 : * infCount.
765 : */
766 0 : classes_groups[biggest_class] = SPLIT_RIGHT;
767 : }
768 : }
769 :
770 22 : range_gist_class_split(typcache, entryvec, v, classes_groups);
771 : }
772 :
773 540 : PG_RETURN_POINTER(v);
774 : }
775 :
776 : /* equality comparator for GiST */
777 : Datum
778 91704 : range_gist_same(PG_FUNCTION_ARGS)
779 : {
780 91704 : RangeType *r1 = PG_GETARG_RANGE_P(0);
781 91704 : RangeType *r2 = PG_GETARG_RANGE_P(1);
782 91704 : bool *result = (bool *) PG_GETARG_POINTER(2);
783 :
784 : /*
785 : * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
786 : * that for ourselves. More generally, if the entries have been properly
787 : * normalized, then unequal flags bytes must mean unequal ranges ... so
788 : * let's just test all the flag bits at once.
789 : */
790 91704 : if (range_get_flags(r1) != range_get_flags(r2))
791 54 : *result = false;
792 : else
793 : {
794 : TypeCacheEntry *typcache;
795 :
796 91650 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
797 :
798 91650 : *result = range_eq_internal(typcache, r1, r2);
799 : }
800 :
801 91704 : PG_RETURN_POINTER(result);
802 : }
803 :
804 : /*
805 : *----------------------------------------------------------
806 : * STATIC FUNCTIONS
807 : *----------------------------------------------------------
808 : */
809 :
810 : /*
811 : * Return the smallest range that contains r1 and r2
812 : *
813 : * This differs from regular range_union in two critical ways:
814 : * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
815 : * the intervening values into the result range.
816 : * 2. We track whether any empty range has been union'd into the result,
817 : * so that contained_by searches can be indexed. Note that this means
818 : * that *all* unions formed within the GiST index must go through here.
819 : */
820 : static RangeType *
821 268608 : range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
822 : {
823 : RangeType *result;
824 : RangeBound lower1,
825 : lower2;
826 : RangeBound upper1,
827 : upper2;
828 : bool empty1,
829 : empty2;
830 : char flags1,
831 : flags2;
832 : RangeBound *result_lower;
833 : RangeBound *result_upper;
834 :
835 268608 : range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
836 268608 : range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
837 268608 : flags1 = range_get_flags(r1);
838 268608 : flags2 = range_get_flags(r2);
839 :
840 268608 : if (empty1)
841 : {
842 : /* We can return r2 as-is if it already is or contains empty */
843 31050 : if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
844 31050 : return r2;
845 : /* Else we'd better copy it (modify-in-place isn't safe) */
846 0 : r2 = rangeCopy(r2);
847 0 : range_set_contain_empty(r2);
848 0 : return r2;
849 : }
850 237558 : if (empty2)
851 : {
852 : /* We can return r1 as-is if it already is or contains empty */
853 3432 : if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
854 3414 : return r1;
855 : /* Else we'd better copy it (modify-in-place isn't safe) */
856 18 : r1 = rangeCopy(r1);
857 18 : range_set_contain_empty(r1);
858 18 : return r1;
859 : }
860 :
861 234126 : if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
862 234022 : result_lower = &lower1;
863 : else
864 104 : result_lower = &lower2;
865 :
866 234126 : if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
867 44320 : result_upper = &upper1;
868 : else
869 189806 : result_upper = &upper2;
870 :
871 : /* optimization to avoid constructing a new range */
872 234126 : if (result_lower == &lower1 && result_upper == &upper1 &&
873 44302 : ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
874 44302 : return r1;
875 189824 : if (result_lower == &lower2 && result_upper == &upper2 &&
876 86 : ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
877 86 : return r2;
878 :
879 189738 : result = make_range(typcache, result_lower, result_upper, false, NULL);
880 :
881 189738 : if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
882 0 : range_set_contain_empty(result);
883 :
884 189738 : return result;
885 : }
886 :
887 : static bool
888 6130 : multirange_union_range_equal(TypeCacheEntry *typcache,
889 : const RangeType *r,
890 : const MultirangeType *mr)
891 : {
892 : RangeBound lower1,
893 : upper1,
894 : lower2,
895 : upper2,
896 : tmp;
897 : bool empty;
898 :
899 6130 : if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
900 3000 : return (RangeIsEmpty(r) && MultirangeIsEmpty(mr));
901 :
902 3130 : range_deserialize(typcache, r, &lower1, &upper1, &empty);
903 : Assert(!empty);
904 3130 : multirange_get_bounds(typcache, mr, 0, &lower2, &tmp);
905 3130 : multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2);
906 :
907 3310 : return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 &&
908 180 : range_cmp_bounds(typcache, &upper1, &upper2) == 0);
909 : }
910 :
911 : /*
912 : * GiST consistent test on an index internal page with range query
913 : */
914 : static bool
915 6714 : range_gist_consistent_int_range(TypeCacheEntry *typcache,
916 : StrategyNumber strategy,
917 : const RangeType *key,
918 : const RangeType *query)
919 : {
920 6714 : switch (strategy)
921 : {
922 720 : case RANGESTRAT_BEFORE:
923 720 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
924 222 : return false;
925 498 : return (!range_overright_internal(typcache, key, query));
926 720 : case RANGESTRAT_OVERLEFT:
927 720 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
928 222 : return false;
929 498 : return (!range_after_internal(typcache, key, query));
930 720 : case RANGESTRAT_OVERLAPS:
931 720 : return range_overlaps_internal(typcache, key, query);
932 720 : case RANGESTRAT_OVERRIGHT:
933 720 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
934 222 : return false;
935 498 : return (!range_before_internal(typcache, key, query));
936 720 : case RANGESTRAT_AFTER:
937 720 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
938 222 : return false;
939 498 : return (!range_overleft_internal(typcache, key, query));
940 720 : case RANGESTRAT_ADJACENT:
941 720 : if (RangeIsEmpty(key) || RangeIsEmpty(query))
942 222 : return false;
943 498 : if (range_adjacent_internal(typcache, key, query))
944 0 : return true;
945 498 : return range_overlaps_internal(typcache, key, query);
946 1278 : case RANGESTRAT_CONTAINS:
947 1278 : return range_contains_internal(typcache, key, query);
948 720 : case RANGESTRAT_CONTAINED_BY:
949 :
950 : /*
951 : * Empty ranges are contained by anything, so if key is or
952 : * contains any empty ranges, we must descend into it. Otherwise,
953 : * descend only if key overlaps the query.
954 : */
955 720 : if (RangeIsOrContainsEmpty(key))
956 72 : return true;
957 648 : return range_overlaps_internal(typcache, key, query);
958 396 : case RANGESTRAT_EQ:
959 :
960 : /*
961 : * If query is empty, descend only if the key is or contains any
962 : * empty ranges. Otherwise, descend if key contains query.
963 : */
964 396 : if (RangeIsEmpty(query))
965 0 : return RangeIsOrContainsEmpty(key);
966 396 : return range_contains_internal(typcache, key, query);
967 0 : default:
968 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
969 : return false; /* keep compiler quiet */
970 : }
971 : }
972 :
973 : /*
974 : * GiST consistent test on an index internal page with multirange query
975 : */
976 : static bool
977 6804 : range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
978 : StrategyNumber strategy,
979 : const RangeType *key,
980 : const MultirangeType *query)
981 : {
982 6804 : switch (strategy)
983 : {
984 720 : case RANGESTRAT_BEFORE:
985 720 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
986 222 : return false;
987 498 : return (!range_overright_multirange_internal(typcache, key, query));
988 720 : case RANGESTRAT_OVERLEFT:
989 720 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
990 222 : return false;
991 498 : return (!range_after_multirange_internal(typcache, key, query));
992 720 : case RANGESTRAT_OVERLAPS:
993 720 : return range_overlaps_multirange_internal(typcache, key, query);
994 720 : case RANGESTRAT_OVERRIGHT:
995 720 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
996 222 : return false;
997 498 : return (!range_before_multirange_internal(typcache, key, query));
998 720 : case RANGESTRAT_AFTER:
999 720 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1000 222 : return false;
1001 498 : return (!range_overleft_multirange_internal(typcache, key, query));
1002 720 : case RANGESTRAT_ADJACENT:
1003 720 : if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1004 222 : return false;
1005 498 : if (range_adjacent_multirange_internal(typcache, key, query))
1006 0 : return true;
1007 498 : return range_overlaps_multirange_internal(typcache, key, query);
1008 1440 : case RANGESTRAT_CONTAINS:
1009 1440 : return range_contains_multirange_internal(typcache, key, query);
1010 720 : case RANGESTRAT_CONTAINED_BY:
1011 :
1012 : /*
1013 : * Empty ranges are contained by anything, so if key is or
1014 : * contains any empty ranges, we must descend into it. Otherwise,
1015 : * descend only if key overlaps the query.
1016 : */
1017 720 : if (RangeIsOrContainsEmpty(key))
1018 72 : return true;
1019 648 : return range_overlaps_multirange_internal(typcache, key, query);
1020 324 : case RANGESTRAT_EQ:
1021 :
1022 : /*
1023 : * If query is empty, descend only if the key is or contains any
1024 : * empty ranges. Otherwise, descend if key contains query.
1025 : */
1026 324 : if (MultirangeIsEmpty(query))
1027 162 : return RangeIsOrContainsEmpty(key);
1028 162 : return range_contains_multirange_internal(typcache, key, query);
1029 0 : default:
1030 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1031 : return false; /* keep compiler quiet */
1032 : }
1033 : }
1034 :
1035 : /*
1036 : * GiST consistent test on an index internal page with element query
1037 : */
1038 : static bool
1039 558 : range_gist_consistent_int_element(TypeCacheEntry *typcache,
1040 : StrategyNumber strategy,
1041 : const RangeType *key,
1042 : Datum query)
1043 : {
1044 558 : switch (strategy)
1045 : {
1046 558 : case RANGESTRAT_CONTAINS_ELEM:
1047 558 : return range_contains_elem_internal(typcache, key, query);
1048 0 : default:
1049 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1050 : return false; /* keep compiler quiet */
1051 : }
1052 : }
1053 :
1054 : /*
1055 : * GiST consistent test on an index leaf page with range query
1056 : */
1057 : static bool
1058 447632 : range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
1059 : StrategyNumber strategy,
1060 : const RangeType *key,
1061 : const RangeType *query)
1062 : {
1063 447632 : switch (strategy)
1064 : {
1065 17330 : case RANGESTRAT_BEFORE:
1066 17330 : return range_before_internal(typcache, key, query);
1067 36180 : case RANGESTRAT_OVERLEFT:
1068 36180 : return range_overleft_internal(typcache, key, query);
1069 16580 : case RANGESTRAT_OVERLAPS:
1070 16580 : return range_overlaps_internal(typcache, key, query);
1071 81600 : case RANGESTRAT_OVERRIGHT:
1072 81600 : return range_overright_internal(typcache, key, query);
1073 74006 : case RANGESTRAT_AFTER:
1074 74006 : return range_after_internal(typcache, key, query);
1075 36180 : case RANGESTRAT_ADJACENT:
1076 36180 : return range_adjacent_internal(typcache, key, query);
1077 130062 : case RANGESTRAT_CONTAINS:
1078 130062 : return range_contains_internal(typcache, key, query);
1079 33120 : case RANGESTRAT_CONTAINED_BY:
1080 33120 : return range_contained_by_internal(typcache, key, query);
1081 22574 : case RANGESTRAT_EQ:
1082 22574 : return range_eq_internal(typcache, key, query);
1083 0 : default:
1084 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1085 : return false; /* keep compiler quiet */
1086 : }
1087 : }
1088 :
1089 : /*
1090 : * GiST consistent test on an index leaf page with multirange query
1091 : */
1092 : static bool
1093 455982 : range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
1094 : StrategyNumber strategy,
1095 : const RangeType *key,
1096 : const MultirangeType *query)
1097 : {
1098 455982 : switch (strategy)
1099 : {
1100 17330 : case RANGESTRAT_BEFORE:
1101 17330 : return range_before_multirange_internal(typcache, key, query);
1102 36180 : case RANGESTRAT_OVERLEFT:
1103 36180 : return range_overleft_multirange_internal(typcache, key, query);
1104 18358 : case RANGESTRAT_OVERLAPS:
1105 18358 : return range_overlaps_multirange_internal(typcache, key, query);
1106 81600 : case RANGESTRAT_OVERRIGHT:
1107 81600 : return range_overright_multirange_internal(typcache, key, query);
1108 74006 : case RANGESTRAT_AFTER:
1109 74006 : return range_after_multirange_internal(typcache, key, query);
1110 36180 : case RANGESTRAT_ADJACENT:
1111 36180 : return range_adjacent_multirange_internal(typcache, key, query);
1112 152262 : case RANGESTRAT_CONTAINS:
1113 152262 : return range_contains_multirange_internal(typcache, key, query);
1114 33936 : case RANGESTRAT_CONTAINED_BY:
1115 33936 : return multirange_contains_range_internal(typcache, query, key);
1116 6130 : case RANGESTRAT_EQ:
1117 6130 : return multirange_union_range_equal(typcache, key, query);
1118 0 : default:
1119 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1120 : return false; /* keep compiler quiet */
1121 : }
1122 : }
1123 :
1124 : /*
1125 : * GiST consistent test on an index leaf page with element query
1126 : */
1127 : static bool
1128 11262 : range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
1129 : StrategyNumber strategy,
1130 : const RangeType *key,
1131 : Datum query)
1132 : {
1133 11262 : switch (strategy)
1134 : {
1135 11262 : case RANGESTRAT_CONTAINS_ELEM:
1136 11262 : return range_contains_elem_internal(typcache, key, query);
1137 0 : default:
1138 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
1139 : return false; /* keep compiler quiet */
1140 : }
1141 : }
1142 :
1143 : /*
1144 : * Trivial split: half of entries will be placed on one page
1145 : * and the other half on the other page.
1146 : */
1147 : static void
1148 42 : range_gist_fallback_split(TypeCacheEntry *typcache,
1149 : GistEntryVector *entryvec,
1150 : GIST_SPLITVEC *v)
1151 : {
1152 42 : RangeType *left_range = NULL;
1153 42 : RangeType *right_range = NULL;
1154 : OffsetNumber i,
1155 : maxoff,
1156 : split_idx;
1157 :
1158 42 : maxoff = entryvec->n - 1;
1159 : /* Split entries before this to left page, after to right: */
1160 42 : split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
1161 :
1162 42 : v->spl_nleft = 0;
1163 42 : v->spl_nright = 0;
1164 16194 : for (i = FirstOffsetNumber; i <= maxoff; i++)
1165 : {
1166 16152 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1167 :
1168 16152 : if (i < split_idx)
1169 8046 : PLACE_LEFT(range, i);
1170 : else
1171 8106 : PLACE_RIGHT(range, i);
1172 : }
1173 :
1174 42 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1175 42 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1176 42 : }
1177 :
1178 : /*
1179 : * Split based on classes of ranges.
1180 : *
1181 : * See get_gist_range_class for class definitions.
1182 : * classes_groups is an array of length CLS_COUNT indicating the side of the
1183 : * split to which each class should go.
1184 : */
1185 : static void
1186 22 : range_gist_class_split(TypeCacheEntry *typcache,
1187 : GistEntryVector *entryvec,
1188 : GIST_SPLITVEC *v,
1189 : SplitLR *classes_groups)
1190 : {
1191 22 : RangeType *left_range = NULL;
1192 22 : RangeType *right_range = NULL;
1193 : OffsetNumber i,
1194 : maxoff;
1195 :
1196 22 : maxoff = entryvec->n - 1;
1197 :
1198 22 : v->spl_nleft = 0;
1199 22 : v->spl_nright = 0;
1200 7010 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1201 : {
1202 6988 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1203 : int class;
1204 :
1205 : /* Get class of range */
1206 6988 : class = get_gist_range_class(range);
1207 :
1208 : /* Place range to appropriate page */
1209 6988 : if (classes_groups[class] == SPLIT_LEFT)
1210 3986 : PLACE_LEFT(range, i);
1211 : else
1212 : {
1213 : Assert(classes_groups[class] == SPLIT_RIGHT);
1214 3002 : PLACE_RIGHT(range, i);
1215 : }
1216 : }
1217 :
1218 22 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1219 22 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1220 22 : }
1221 :
1222 : /*
1223 : * Sorting based split. First half of entries according to the sort will be
1224 : * placed to one page, and second half of entries will be placed to other
1225 : * page. use_upper_bound parameter indicates whether to use upper or lower
1226 : * bound for sorting.
1227 : */
1228 : static void
1229 0 : range_gist_single_sorting_split(TypeCacheEntry *typcache,
1230 : GistEntryVector *entryvec,
1231 : GIST_SPLITVEC *v,
1232 : bool use_upper_bound)
1233 : {
1234 : SingleBoundSortItem *sortItems;
1235 0 : RangeType *left_range = NULL;
1236 0 : RangeType *right_range = NULL;
1237 : OffsetNumber i,
1238 : maxoff,
1239 : split_idx;
1240 :
1241 0 : maxoff = entryvec->n - 1;
1242 :
1243 : sortItems = (SingleBoundSortItem *)
1244 0 : palloc(maxoff * sizeof(SingleBoundSortItem));
1245 :
1246 : /*
1247 : * Prepare auxiliary array and sort the values.
1248 : */
1249 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1250 : {
1251 0 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1252 : RangeBound bound2;
1253 : bool empty;
1254 :
1255 0 : sortItems[i - 1].index = i;
1256 : /* Put appropriate bound into array */
1257 0 : if (use_upper_bound)
1258 0 : range_deserialize(typcache, range, &bound2,
1259 0 : &sortItems[i - 1].bound, &empty);
1260 : else
1261 0 : range_deserialize(typcache, range, &sortItems[i - 1].bound,
1262 : &bound2, &empty);
1263 : Assert(!empty);
1264 : }
1265 :
1266 0 : qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
1267 : single_bound_cmp, typcache);
1268 :
1269 0 : split_idx = maxoff / 2;
1270 :
1271 0 : v->spl_nleft = 0;
1272 0 : v->spl_nright = 0;
1273 :
1274 0 : for (i = 0; i < maxoff; i++)
1275 : {
1276 0 : int idx = sortItems[i].index;
1277 0 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1278 :
1279 0 : if (i < split_idx)
1280 0 : PLACE_LEFT(range, idx);
1281 : else
1282 0 : PLACE_RIGHT(range, idx);
1283 : }
1284 :
1285 0 : v->spl_ldatum = RangeTypePGetDatum(left_range);
1286 0 : v->spl_rdatum = RangeTypePGetDatum(right_range);
1287 0 : }
1288 :
1289 : /*
1290 : * Double sorting split algorithm.
1291 : *
1292 : * The algorithm considers dividing ranges into two groups. The first (left)
1293 : * group contains general left bound. The second (right) group contains
1294 : * general right bound. The challenge is to find upper bound of left group
1295 : * and lower bound of right group so that overlap of groups is minimal and
1296 : * ratio of distribution is acceptable. Algorithm finds for each lower bound of
1297 : * right group minimal upper bound of left group, and for each upper bound of
1298 : * left group maximal lower bound of right group. For each found pair
1299 : * range_gist_consider_split considers replacement of currently selected
1300 : * split with the new one.
1301 : *
1302 : * After that, all the entries are divided into three groups:
1303 : * 1) Entries which should be placed to the left group
1304 : * 2) Entries which should be placed to the right group
1305 : * 3) "Common entries" which can be placed to either group without affecting
1306 : * amount of overlap.
1307 : *
1308 : * The common ranges are distributed by difference of distance from lower
1309 : * bound of common range to lower bound of right group and distance from upper
1310 : * bound of common range to upper bound of left group.
1311 : *
1312 : * For details see:
1313 : * "A new double sorting-based node splitting algorithm for R-tree",
1314 : * A. Korotkov
1315 : * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
1316 : */
1317 : static void
1318 476 : range_gist_double_sorting_split(TypeCacheEntry *typcache,
1319 : GistEntryVector *entryvec,
1320 : GIST_SPLITVEC *v)
1321 : {
1322 : ConsiderSplitContext context;
1323 : OffsetNumber i,
1324 : maxoff;
1325 476 : RangeType *left_range = NULL,
1326 476 : *right_range = NULL;
1327 : int common_entries_count;
1328 : NonEmptyRange *by_lower,
1329 : *by_upper;
1330 : CommonEntry *common_entries;
1331 : int nentries,
1332 : i1,
1333 : i2;
1334 : RangeBound *right_lower,
1335 : *left_upper;
1336 :
1337 476 : memset(&context, 0, sizeof(ConsiderSplitContext));
1338 476 : context.typcache = typcache;
1339 476 : context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
1340 :
1341 476 : maxoff = entryvec->n - 1;
1342 476 : nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
1343 476 : context.first = true;
1344 :
1345 : /* Allocate arrays for sorted range bounds */
1346 476 : by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1347 476 : by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1348 :
1349 : /* Fill arrays of bounds */
1350 129868 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1351 : {
1352 129392 : RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1353 : bool empty;
1354 :
1355 129392 : range_deserialize(typcache, range,
1356 129392 : &by_lower[i - FirstOffsetNumber].lower,
1357 129392 : &by_lower[i - FirstOffsetNumber].upper,
1358 : &empty);
1359 : Assert(!empty);
1360 : }
1361 :
1362 : /*
1363 : * Make two arrays of range bounds: one sorted by lower bound and another
1364 : * sorted by upper bound.
1365 : */
1366 476 : memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
1367 476 : qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
1368 : interval_cmp_lower, typcache);
1369 476 : qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
1370 : interval_cmp_upper, typcache);
1371 :
1372 : /*----------
1373 : * The goal is to form a left and right range, so that every entry
1374 : * range is contained by either left or right interval (or both).
1375 : *
1376 : * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
1377 : *
1378 : * 0 1 2 3 4
1379 : * +-+
1380 : * +---+
1381 : * +-+
1382 : * +---+
1383 : *
1384 : * The left and right ranges are of the form (0,a) and (b,4).
1385 : * We first consider splits where b is the lower bound of an entry.
1386 : * We iterate through all entries, and for each b, calculate the
1387 : * smallest possible a. Then we consider splits where a is the
1388 : * upper bound of an entry, and for each a, calculate the greatest
1389 : * possible b.
1390 : *
1391 : * In the above example, the first loop would consider splits:
1392 : * b=0: (0,1)-(0,4)
1393 : * b=1: (0,1)-(1,4)
1394 : * b=2: (0,3)-(2,4)
1395 : *
1396 : * And the second loop:
1397 : * a=1: (0,1)-(1,4)
1398 : * a=3: (0,3)-(2,4)
1399 : * a=4: (0,4)-(2,4)
1400 : *----------
1401 : */
1402 :
1403 : /*
1404 : * Iterate over lower bound of right group, finding smallest possible
1405 : * upper bound of left group.
1406 : */
1407 476 : i1 = 0;
1408 476 : i2 = 0;
1409 476 : right_lower = &by_lower[i1].lower;
1410 476 : left_upper = &by_upper[i2].lower;
1411 : while (true)
1412 : {
1413 : /*
1414 : * Find next lower bound of right group.
1415 : */
1416 514020 : while (i1 < nentries &&
1417 256772 : range_cmp_bounds(typcache, right_lower,
1418 256772 : &by_lower[i1].lower) == 0)
1419 : {
1420 129392 : if (range_cmp_bounds(typcache, &by_lower[i1].upper,
1421 : left_upper) > 0)
1422 109952 : left_upper = &by_lower[i1].upper;
1423 129392 : i1++;
1424 : }
1425 127856 : if (i1 >= nentries)
1426 476 : break;
1427 127380 : right_lower = &by_lower[i1].lower;
1428 :
1429 : /*
1430 : * Find count of ranges which anyway should be placed to the left
1431 : * group.
1432 : */
1433 495092 : while (i2 < nentries &&
1434 238590 : range_cmp_bounds(typcache, &by_upper[i2].upper,
1435 : left_upper) <= 0)
1436 129122 : i2++;
1437 :
1438 : /*
1439 : * Consider found split to see if it's better than what we had.
1440 : */
1441 127380 : range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
1442 : }
1443 :
1444 : /*
1445 : * Iterate over upper bound of left group finding greatest possible lower
1446 : * bound of right group.
1447 : */
1448 476 : i1 = nentries - 1;
1449 476 : i2 = nentries - 1;
1450 476 : right_lower = &by_lower[i1].upper;
1451 476 : left_upper = &by_upper[i2].upper;
1452 : while (true)
1453 : {
1454 : /*
1455 : * Find next upper bound of left group.
1456 : */
1457 517092 : while (i2 >= 0 &&
1458 258308 : range_cmp_bounds(typcache, left_upper,
1459 258308 : &by_upper[i2].upper) == 0)
1460 : {
1461 129392 : if (range_cmp_bounds(typcache, &by_upper[i2].lower,
1462 : right_lower) < 0)
1463 109944 : right_lower = &by_upper[i2].lower;
1464 129392 : i2--;
1465 : }
1466 129392 : if (i2 < 0)
1467 476 : break;
1468 128916 : left_upper = &by_upper[i2].upper;
1469 :
1470 : /*
1471 : * Find count of intervals which anyway should be placed to the right
1472 : * group.
1473 : */
1474 496628 : while (i1 >= 0 &&
1475 238590 : range_cmp_bounds(typcache, &by_lower[i1].lower,
1476 : right_lower) >= 0)
1477 129122 : i1--;
1478 :
1479 : /*
1480 : * Consider found split to see if it's better than what we had.
1481 : */
1482 128916 : range_gist_consider_split(&context, right_lower, i1 + 1,
1483 : left_upper, i2 + 1);
1484 : }
1485 :
1486 : /*
1487 : * If we failed to find any acceptable splits, use trivial split.
1488 : */
1489 476 : if (context.first)
1490 : {
1491 0 : range_gist_fallback_split(typcache, entryvec, v);
1492 0 : return;
1493 : }
1494 :
1495 : /*
1496 : * Ok, we have now selected bounds of the groups. Now we have to
1497 : * distribute entries themselves. At first we distribute entries which can
1498 : * be placed unambiguously and collect "common entries" to array.
1499 : */
1500 :
1501 : /* Allocate vectors for results */
1502 476 : v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1503 476 : v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1504 476 : v->spl_nleft = 0;
1505 476 : v->spl_nright = 0;
1506 :
1507 : /*
1508 : * Allocate an array for "common entries" - entries which can be placed to
1509 : * either group without affecting overlap along selected axis.
1510 : */
1511 476 : common_entries_count = 0;
1512 476 : common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1513 :
1514 : /*
1515 : * Distribute entries which can be distributed unambiguously, and collect
1516 : * common entries.
1517 : */
1518 129868 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1519 : {
1520 : RangeType *range;
1521 : RangeBound lower,
1522 : upper;
1523 : bool empty;
1524 :
1525 : /*
1526 : * Get upper and lower bounds along selected axis.
1527 : */
1528 129392 : range = DatumGetRangeTypeP(entryvec->vector[i].key);
1529 :
1530 129392 : range_deserialize(typcache, range, &lower, &upper, &empty);
1531 :
1532 129392 : if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
1533 : {
1534 : /* Fits in the left group */
1535 59624 : if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
1536 : {
1537 : /* Fits also in the right group, so "common entry" */
1538 11860 : common_entries[common_entries_count].index = i;
1539 11860 : if (context.has_subtype_diff)
1540 : {
1541 : /*
1542 : * delta = (lower - context.right_lower) -
1543 : * (context.left_upper - upper)
1544 : */
1545 11860 : common_entries[common_entries_count].delta =
1546 11860 : call_subtype_diff(typcache,
1547 : lower.val,
1548 23720 : context.right_lower->val) -
1549 11860 : call_subtype_diff(typcache,
1550 11860 : context.left_upper->val,
1551 : upper.val);
1552 : }
1553 : else
1554 : {
1555 : /* Without subtype_diff, take all deltas as zero */
1556 0 : common_entries[common_entries_count].delta = 0;
1557 : }
1558 11860 : common_entries_count++;
1559 : }
1560 : else
1561 : {
1562 : /* Doesn't fit to the right group, so join to the left group */
1563 47764 : PLACE_LEFT(range, i);
1564 : }
1565 : }
1566 : else
1567 : {
1568 : /*
1569 : * Each entry should fit on either left or right group. Since this
1570 : * entry didn't fit in the left group, it better fit in the right
1571 : * group.
1572 : */
1573 : Assert(range_cmp_bounds(typcache, &lower,
1574 : context.right_lower) >= 0);
1575 69768 : PLACE_RIGHT(range, i);
1576 : }
1577 : }
1578 :
1579 : /*
1580 : * Distribute "common entries", if any.
1581 : */
1582 476 : if (common_entries_count > 0)
1583 : {
1584 : /*
1585 : * Sort "common entries" by calculated deltas in order to distribute
1586 : * the most ambiguous entries first.
1587 : */
1588 206 : qsort(common_entries, common_entries_count, sizeof(CommonEntry),
1589 : common_entry_cmp);
1590 :
1591 : /*
1592 : * Distribute "common entries" between groups according to sorting.
1593 : */
1594 12066 : for (i = 0; i < common_entries_count; i++)
1595 : {
1596 : RangeType *range;
1597 11860 : int idx = common_entries[i].index;
1598 :
1599 11860 : range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1600 :
1601 : /*
1602 : * Check if we have to place this entry in either group to achieve
1603 : * LIMIT_RATIO.
1604 : */
1605 11860 : if (i < context.common_left)
1606 0 : PLACE_LEFT(range, idx);
1607 : else
1608 11860 : PLACE_RIGHT(range, idx);
1609 : }
1610 : }
1611 :
1612 476 : v->spl_ldatum = PointerGetDatum(left_range);
1613 476 : v->spl_rdatum = PointerGetDatum(right_range);
1614 : }
1615 :
1616 : /*
1617 : * Consider replacement of currently selected split with a better one
1618 : * during range_gist_double_sorting_split.
1619 : */
1620 : static void
1621 256296 : range_gist_consider_split(ConsiderSplitContext *context,
1622 : RangeBound *right_lower, int min_left_count,
1623 : RangeBound *left_upper, int max_left_count)
1624 : {
1625 : int left_count,
1626 : right_count;
1627 : float4 ratio,
1628 : overlap;
1629 :
1630 : /*
1631 : * Calculate entries distribution ratio assuming most uniform distribution
1632 : * of common entries.
1633 : */
1634 256296 : if (min_left_count >= (context->entries_count + 1) / 2)
1635 113092 : left_count = min_left_count;
1636 143204 : else if (max_left_count <= context->entries_count / 2)
1637 112168 : left_count = max_left_count;
1638 : else
1639 31036 : left_count = context->entries_count / 2;
1640 256296 : right_count = context->entries_count - left_count;
1641 :
1642 : /*
1643 : * Ratio of split: quotient between size of smaller group and total
1644 : * entries count. This is necessarily 0.5 or less; if it's less than
1645 : * LIMIT_RATIO then we will never accept the new split.
1646 : */
1647 256296 : ratio = ((float4) Min(left_count, right_count)) /
1648 256296 : ((float4) context->entries_count);
1649 :
1650 256296 : if (ratio > LIMIT_RATIO)
1651 : {
1652 125710 : bool selectthis = false;
1653 :
1654 : /*
1655 : * The ratio is acceptable, so compare current split with previously
1656 : * selected one. We search for minimal overlap (allowing negative
1657 : * values) and minimal ratio secondarily. If subtype_diff is
1658 : * available, it's used for overlap measure. Without subtype_diff we
1659 : * use number of "common entries" as an overlap measure.
1660 : */
1661 125710 : if (context->has_subtype_diff)
1662 125710 : overlap = call_subtype_diff(context->typcache,
1663 : left_upper->val,
1664 : right_lower->val);
1665 : else
1666 0 : overlap = max_left_count - min_left_count;
1667 :
1668 : /* If there is no previous selection, select this split */
1669 125710 : if (context->first)
1670 476 : selectthis = true;
1671 : else
1672 : {
1673 : /*
1674 : * Choose the new split if it has a smaller overlap, or same
1675 : * overlap but better ratio.
1676 : */
1677 125234 : if (overlap < context->overlap ||
1678 111394 : (overlap == context->overlap && ratio > context->ratio))
1679 34714 : selectthis = true;
1680 : }
1681 :
1682 125710 : if (selectthis)
1683 : {
1684 : /* save information about selected split */
1685 35190 : context->first = false;
1686 35190 : context->ratio = ratio;
1687 35190 : context->overlap = overlap;
1688 35190 : context->right_lower = right_lower;
1689 35190 : context->left_upper = left_upper;
1690 35190 : context->common_left = max_left_count - left_count;
1691 35190 : context->common_right = left_count - min_left_count;
1692 : }
1693 : }
1694 256296 : }
1695 :
1696 : /*
1697 : * Find class number for range.
1698 : *
1699 : * The class number is a valid combination of the properties of the
1700 : * range. Note: the highest possible number is 8, because CLS_EMPTY
1701 : * can't be combined with anything else.
1702 : */
1703 : static int
1704 159520 : get_gist_range_class(RangeType *range)
1705 : {
1706 : int classNumber;
1707 : char flags;
1708 :
1709 159520 : flags = range_get_flags(range);
1710 159520 : if (flags & RANGE_EMPTY)
1711 : {
1712 23016 : classNumber = CLS_EMPTY;
1713 : }
1714 : else
1715 : {
1716 136504 : classNumber = 0;
1717 136504 : if (flags & RANGE_LB_INF)
1718 800 : classNumber |= CLS_LOWER_INF;
1719 136504 : if (flags & RANGE_UB_INF)
1720 308 : classNumber |= CLS_UPPER_INF;
1721 136504 : if (flags & RANGE_CONTAIN_EMPTY)
1722 0 : classNumber |= CLS_CONTAIN_EMPTY;
1723 : }
1724 159520 : return classNumber;
1725 : }
1726 :
1727 : /*
1728 : * Comparison function for range_gist_single_sorting_split.
1729 : */
1730 : static int
1731 0 : single_bound_cmp(const void *a, const void *b, void *arg)
1732 : {
1733 0 : SingleBoundSortItem *i1 = (SingleBoundSortItem *) a;
1734 0 : SingleBoundSortItem *i2 = (SingleBoundSortItem *) b;
1735 0 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1736 :
1737 0 : return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
1738 : }
1739 :
1740 : /*
1741 : * Compare NonEmptyRanges by lower bound.
1742 : */
1743 : static int
1744 517102 : interval_cmp_lower(const void *a, const void *b, void *arg)
1745 : {
1746 517102 : NonEmptyRange *i1 = (NonEmptyRange *) a;
1747 517102 : NonEmptyRange *i2 = (NonEmptyRange *) b;
1748 517102 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1749 :
1750 517102 : return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
1751 : }
1752 :
1753 : /*
1754 : * Compare NonEmptyRanges by upper bound.
1755 : */
1756 : static int
1757 486090 : interval_cmp_upper(const void *a, const void *b, void *arg)
1758 : {
1759 486090 : NonEmptyRange *i1 = (NonEmptyRange *) a;
1760 486090 : NonEmptyRange *i2 = (NonEmptyRange *) b;
1761 486090 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1762 :
1763 486090 : return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
1764 : }
1765 :
1766 : /*
1767 : * Compare CommonEntrys by their deltas.
1768 : */
1769 : static int
1770 11654 : common_entry_cmp(const void *i1, const void *i2)
1771 : {
1772 11654 : double delta1 = ((CommonEntry *) i1)->delta;
1773 11654 : double delta2 = ((CommonEntry *) i2)->delta;
1774 :
1775 11654 : if (delta1 < delta2)
1776 11654 : return -1;
1777 0 : else if (delta1 > delta2)
1778 0 : return 1;
1779 : else
1780 0 : return 0;
1781 : }
1782 :
1783 : /*
1784 : * Convenience function to invoke type-specific subtype_diff function.
1785 : * Caller must have already checked that there is one for the range type.
1786 : */
1787 : static float8
1788 1133640 : call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
1789 : {
1790 : float8 value;
1791 :
1792 1133640 : value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
1793 : typcache->rng_collation,
1794 : val1, val2));
1795 : /* Cope with buggy subtype_diff function by returning zero */
1796 1133640 : if (value >= 0.0)
1797 1133640 : return value;
1798 0 : return 0.0;
1799 : }
|