Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * rangetypes_spgist.c
4 : * implementation of quad tree over ranges mapped to 2d-points for SP-GiST.
5 : *
6 : * Quad tree is a data structure similar to a binary tree, but is adapted to
7 : * 2d data. Each inner node of a quad tree contains a point (centroid) which
8 : * divides the 2d-space into 4 quadrants. Each quadrant is associated with a
9 : * child node.
10 : *
11 : * Ranges are mapped to 2d-points so that the lower bound is one dimension,
12 : * and the upper bound is another. By convention, we visualize the lower bound
13 : * to be the horizontal axis, and upper bound the vertical axis.
14 : *
15 : * One quirk with this mapping is the handling of empty ranges. An empty range
16 : * doesn't have lower and upper bounds, so it cannot be mapped to 2d space in
17 : * a straightforward way. To cope with that, the root node can have a 5th
18 : * quadrant, which is reserved for empty ranges. Furthermore, there can be
19 : * inner nodes in the tree with no centroid. They contain only two child nodes,
20 : * one for empty ranges and another for non-empty ones. Such a node can appear
21 : * as the root node, or in the tree under the 5th child of the root node (in
22 : * which case it will only contain empty nodes).
23 : *
24 : * The SP-GiST picksplit function uses medians along both axes as the centroid.
25 : * This implementation only uses the comparison function of the range element
26 : * datatype, therefore it works for any range type.
27 : *
28 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
29 : * Portions Copyright (c) 1994, Regents of the University of California
30 : *
31 : * IDENTIFICATION
32 : * src/backend/utils/adt/rangetypes_spgist.c
33 : *
34 : *-------------------------------------------------------------------------
35 : */
36 :
37 : #include "postgres.h"
38 :
39 : #include "access/spgist.h"
40 : #include "access/stratnum.h"
41 : #include "catalog/pg_type.h"
42 : #include "utils/datum.h"
43 : #include "utils/fmgrprotos.h"
44 : #include "utils/rangetypes.h"
45 :
46 : static int16 getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid,
47 : const RangeType *tst);
48 : static int bound_cmp(const void *a, const void *b, void *arg);
49 :
50 : static int adjacent_inner_consistent(TypeCacheEntry *typcache,
51 : const RangeBound *arg, const RangeBound *centroid,
52 : const RangeBound *prev);
53 : static int adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
54 : const RangeBound *centroid);
55 :
56 : /*
57 : * SP-GiST 'config' interface function.
58 : */
59 : Datum
60 110 : spg_range_quad_config(PG_FUNCTION_ARGS)
61 : {
62 : /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
63 110 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
64 :
65 110 : cfg->prefixType = ANYRANGEOID;
66 110 : cfg->labelType = VOIDOID; /* we don't need node labels */
67 110 : cfg->canReturnData = true;
68 110 : cfg->longValuesOK = false;
69 110 : PG_RETURN_VOID();
70 : }
71 :
72 : /*----------
73 : * Determine which quadrant a 2d-mapped range falls into, relative to the
74 : * centroid.
75 : *
76 : * Quadrants are numbered like this:
77 : *
78 : * 4 | 1
79 : * ----+----
80 : * 3 | 2
81 : *
82 : * Where the lower bound of range is the horizontal axis and upper bound the
83 : * vertical axis.
84 : *
85 : * Ranges on one of the axes are taken to lie in the quadrant with higher value
86 : * along perpendicular axis. That is, a value on the horizontal axis is taken
87 : * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to
88 : * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in
89 : * quadrant 1.
90 : *
91 : * Empty ranges are taken to lie in the special quadrant 5.
92 : *----------
93 : */
94 : static int16
95 758706 : getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid, const RangeType *tst)
96 : {
97 : RangeBound centroidLower,
98 : centroidUpper;
99 : bool centroidEmpty;
100 : RangeBound lower,
101 : upper;
102 : bool empty;
103 :
104 758706 : range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper,
105 : ¢roidEmpty);
106 758706 : range_deserialize(typcache, tst, &lower, &upper, &empty);
107 :
108 758706 : if (empty)
109 12000 : return 5;
110 :
111 746706 : if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0)
112 : {
113 680318 : if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0)
114 679640 : return 1;
115 : else
116 678 : return 2;
117 : }
118 : else
119 : {
120 66388 : if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0)
121 14606 : return 4;
122 : else
123 51782 : return 3;
124 : }
125 : }
126 :
127 : /*
128 : * Choose SP-GiST function: choose path for addition of new range.
129 : */
130 : Datum
131 714824 : spg_range_quad_choose(PG_FUNCTION_ARGS)
132 : {
133 714824 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
134 714824 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
135 714824 : RangeType *inRange = DatumGetRangeTypeP(in->datum),
136 : *centroid;
137 : int16 quadrant;
138 : TypeCacheEntry *typcache;
139 :
140 714824 : if (in->allTheSame)
141 : {
142 12940 : out->resultType = spgMatchNode;
143 : /* nodeN will be set by core */
144 12940 : out->result.matchNode.levelAdd = 0;
145 12940 : out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
146 12940 : PG_RETURN_VOID();
147 : }
148 :
149 701884 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange));
150 :
151 : /*
152 : * A node with no centroid divides ranges purely on whether they're empty
153 : * or not. All empty ranges go to child node 0, all non-empty ranges go to
154 : * node 1.
155 : */
156 701884 : if (!in->hasPrefix)
157 : {
158 0 : out->resultType = spgMatchNode;
159 0 : if (RangeIsEmpty(inRange))
160 0 : out->result.matchNode.nodeN = 0;
161 : else
162 0 : out->result.matchNode.nodeN = 1;
163 0 : out->result.matchNode.levelAdd = 1;
164 0 : out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
165 0 : PG_RETURN_VOID();
166 : }
167 :
168 701884 : centroid = DatumGetRangeTypeP(in->prefixDatum);
169 701884 : quadrant = getQuadrant(typcache, centroid, inRange);
170 :
171 : Assert(quadrant <= in->nNodes);
172 :
173 : /* Select node matching to quadrant number */
174 701884 : out->resultType = spgMatchNode;
175 701884 : out->result.matchNode.nodeN = quadrant - 1;
176 701884 : out->result.matchNode.levelAdd = 1;
177 701884 : out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
178 :
179 701884 : PG_RETURN_VOID();
180 : }
181 :
182 : /*
183 : * Bound comparison for sorting.
184 : */
185 : static int
186 702118 : bound_cmp(const void *a, const void *b, void *arg)
187 : {
188 702118 : RangeBound *ba = (RangeBound *) a;
189 702118 : RangeBound *bb = (RangeBound *) b;
190 702118 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
191 :
192 702118 : return range_cmp_bounds(typcache, ba, bb);
193 : }
194 :
195 : /*
196 : * Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
197 : * range and distribute ranges according to quadrants.
198 : */
199 : Datum
200 482 : spg_range_quad_picksplit(PG_FUNCTION_ARGS)
201 : {
202 482 : spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
203 482 : spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
204 : int i;
205 : int j;
206 : int nonEmptyCount;
207 : RangeType *centroid;
208 : bool empty;
209 : TypeCacheEntry *typcache;
210 :
211 : /* Use the median values of lower and upper bounds as the centroid range */
212 : RangeBound *lowerBounds,
213 : *upperBounds;
214 :
215 482 : typcache = range_get_typcache(fcinfo,
216 482 : RangeTypeGetOid(DatumGetRangeTypeP(in->datums[0])));
217 :
218 : /* Allocate memory for bounds */
219 482 : lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
220 482 : upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
221 482 : j = 0;
222 :
223 : /* Deserialize bounds of ranges, count non-empty ranges */
224 63906 : for (i = 0; i < in->nTuples; i++)
225 : {
226 63424 : range_deserialize(typcache, DatumGetRangeTypeP(in->datums[i]),
227 63424 : &lowerBounds[j], &upperBounds[j], &empty);
228 63424 : if (!empty)
229 56798 : j++;
230 : }
231 482 : nonEmptyCount = j;
232 :
233 : /*
234 : * All the ranges are empty. The best we can do is to construct an inner
235 : * node with no centroid, and put all ranges into node 0. If non-empty
236 : * ranges are added later, they will be routed to node 1.
237 : */
238 482 : if (nonEmptyCount == 0)
239 : {
240 72 : out->nNodes = 2;
241 72 : out->hasPrefix = false;
242 : /* Prefix is empty */
243 72 : out->prefixDatum = PointerGetDatum(NULL);
244 72 : out->nodeLabels = NULL;
245 :
246 72 : out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
247 72 : out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
248 :
249 : /* Place all ranges into node 0 */
250 6698 : for (i = 0; i < in->nTuples; i++)
251 : {
252 6626 : RangeType *range = DatumGetRangeTypeP(in->datums[i]);
253 :
254 6626 : out->leafTupleDatums[i] = RangeTypePGetDatum(range);
255 6626 : out->mapTuplesToNodes[i] = 0;
256 : }
257 72 : PG_RETURN_VOID();
258 : }
259 :
260 : /* Sort range bounds in order to find medians */
261 410 : qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
262 : bound_cmp, typcache);
263 410 : qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
264 : bound_cmp, typcache);
265 :
266 : /* Construct "centroid" range from medians of lower and upper bounds */
267 410 : centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
268 410 : &upperBounds[nonEmptyCount / 2], false, NULL);
269 410 : out->hasPrefix = true;
270 410 : out->prefixDatum = RangeTypePGetDatum(centroid);
271 :
272 : /* Create node for empty ranges only if it is a root node */
273 410 : out->nNodes = (in->level == 0) ? 5 : 4;
274 410 : out->nodeLabels = NULL; /* we don't need node labels */
275 :
276 410 : out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
277 410 : out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
278 :
279 : /*
280 : * Assign ranges to corresponding nodes according to quadrants relative to
281 : * "centroid" range.
282 : */
283 57208 : for (i = 0; i < in->nTuples; i++)
284 : {
285 56798 : RangeType *range = DatumGetRangeTypeP(in->datums[i]);
286 56798 : int16 quadrant = getQuadrant(typcache, centroid, range);
287 :
288 56798 : out->leafTupleDatums[i] = RangeTypePGetDatum(range);
289 56798 : out->mapTuplesToNodes[i] = quadrant - 1;
290 : }
291 :
292 410 : PG_RETURN_VOID();
293 : }
294 :
295 : /*
296 : * SP-GiST consistent function for inner nodes: check which nodes are
297 : * consistent with given set of queries.
298 : */
299 : Datum
300 1794 : spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
301 : {
302 1794 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
303 1794 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
304 : int which;
305 : int i;
306 : MemoryContext oldCtx;
307 :
308 : /*
309 : * For adjacent search we need also previous centroid (if any) to improve
310 : * the precision of the consistent check. In this case needPrevious flag
311 : * is set and centroid is passed into traversalValue.
312 : */
313 1794 : bool needPrevious = false;
314 :
315 1794 : if (in->allTheSame)
316 : {
317 : /* Report that all nodes should be visited */
318 144 : out->nNodes = in->nNodes;
319 144 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
320 1296 : for (i = 0; i < in->nNodes; i++)
321 1152 : out->nodeNumbers[i] = i;
322 144 : PG_RETURN_VOID();
323 : }
324 :
325 1650 : if (!in->hasPrefix)
326 : {
327 : /*
328 : * No centroid on this inner node. Such a node has two child nodes,
329 : * the first for empty ranges, and the second for non-empty ones.
330 : */
331 : Assert(in->nNodes == 2);
332 :
333 : /*
334 : * Nth bit of which variable means that (N - 1)th node should be
335 : * visited. Initially all bits are set. Bits of nodes which should be
336 : * skipped will be unset.
337 : */
338 0 : which = (1 << 1) | (1 << 2);
339 0 : for (i = 0; i < in->nkeys; i++)
340 : {
341 0 : StrategyNumber strategy = in->scankeys[i].sk_strategy;
342 : bool empty;
343 :
344 : /*
345 : * The only strategy when second argument of operator is not range
346 : * is RANGESTRAT_CONTAINS_ELEM.
347 : */
348 0 : if (strategy != RANGESTRAT_CONTAINS_ELEM)
349 0 : empty = RangeIsEmpty(DatumGetRangeTypeP(in->scankeys[i].sk_argument));
350 : else
351 0 : empty = false;
352 :
353 0 : switch (strategy)
354 : {
355 0 : case RANGESTRAT_BEFORE:
356 : case RANGESTRAT_OVERLEFT:
357 : case RANGESTRAT_OVERLAPS:
358 : case RANGESTRAT_OVERRIGHT:
359 : case RANGESTRAT_AFTER:
360 : case RANGESTRAT_ADJACENT:
361 : /* These strategies return false if any argument is empty */
362 0 : if (empty)
363 0 : which = 0;
364 : else
365 0 : which &= (1 << 2);
366 0 : break;
367 :
368 0 : case RANGESTRAT_CONTAINS:
369 :
370 : /*
371 : * All ranges contain an empty range. Only non-empty
372 : * ranges can contain a non-empty range.
373 : */
374 0 : if (!empty)
375 0 : which &= (1 << 2);
376 0 : break;
377 :
378 0 : case RANGESTRAT_CONTAINED_BY:
379 :
380 : /*
381 : * Only an empty range is contained by an empty range.
382 : * Both empty and non-empty ranges can be contained by a
383 : * non-empty range.
384 : */
385 0 : if (empty)
386 0 : which &= (1 << 1);
387 0 : break;
388 :
389 0 : case RANGESTRAT_CONTAINS_ELEM:
390 0 : which &= (1 << 2);
391 0 : break;
392 :
393 0 : case RANGESTRAT_EQ:
394 0 : if (empty)
395 0 : which &= (1 << 1);
396 : else
397 0 : which &= (1 << 2);
398 0 : break;
399 :
400 0 : default:
401 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
402 : break;
403 : }
404 0 : if (which == 0)
405 0 : break; /* no need to consider remaining conditions */
406 : }
407 : }
408 : else
409 : {
410 : RangeBound centroidLower,
411 : centroidUpper;
412 : bool centroidEmpty;
413 : TypeCacheEntry *typcache;
414 : RangeType *centroid;
415 :
416 : /* This node has a centroid. Fetch it. */
417 1650 : centroid = DatumGetRangeTypeP(in->prefixDatum);
418 1650 : typcache = range_get_typcache(fcinfo,
419 : RangeTypeGetOid(centroid));
420 1650 : range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper,
421 : ¢roidEmpty);
422 :
423 : Assert(in->nNodes == 4 || in->nNodes == 5);
424 :
425 : /*
426 : * Nth bit of which variable means that (N - 1)th node (Nth quadrant)
427 : * should be visited. Initially all bits are set. Bits of nodes which
428 : * can be skipped will be unset.
429 : */
430 1650 : which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);
431 :
432 3300 : for (i = 0; i < in->nkeys; i++)
433 : {
434 : StrategyNumber strategy;
435 : RangeBound lower,
436 : upper;
437 : bool empty;
438 1650 : RangeType *range = NULL;
439 :
440 1650 : RangeType *prevCentroid = NULL;
441 : RangeBound prevLower,
442 : prevUpper;
443 : bool prevEmpty;
444 :
445 : /* Restrictions on range bounds according to scan strategy */
446 1650 : RangeBound *minLower = NULL,
447 1650 : *maxLower = NULL,
448 1650 : *minUpper = NULL,
449 1650 : *maxUpper = NULL;
450 :
451 : /* Are the restrictions on range bounds inclusive? */
452 1650 : bool inclusive = true;
453 1650 : bool strictEmpty = true;
454 : int cmp,
455 : which1,
456 : which2;
457 :
458 1650 : strategy = in->scankeys[i].sk_strategy;
459 :
460 : /*
461 : * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
462 : * the argument is a single element. Expand the single element to
463 : * a range containing only the element, and treat it like
464 : * RANGESTRAT_CONTAINS.
465 : */
466 1650 : if (strategy == RANGESTRAT_CONTAINS_ELEM)
467 : {
468 48 : lower.inclusive = true;
469 48 : lower.infinite = false;
470 48 : lower.lower = true;
471 48 : lower.val = in->scankeys[i].sk_argument;
472 :
473 48 : upper.inclusive = true;
474 48 : upper.infinite = false;
475 48 : upper.lower = false;
476 48 : upper.val = in->scankeys[i].sk_argument;
477 :
478 48 : empty = false;
479 :
480 48 : strategy = RANGESTRAT_CONTAINS;
481 : }
482 : else
483 : {
484 1602 : range = DatumGetRangeTypeP(in->scankeys[i].sk_argument);
485 1602 : range_deserialize(typcache, range, &lower, &upper, &empty);
486 : }
487 :
488 : /*
489 : * Most strategies are handled by forming a bounding box from the
490 : * search key, defined by a minLower, maxLower, minUpper,
491 : * maxUpper. Some modify 'which' directly, to specify exactly
492 : * which quadrants need to be visited.
493 : *
494 : * For most strategies, nothing matches an empty search key, and
495 : * an empty range never matches a non-empty key. If a strategy
496 : * does not behave like that wrt. empty ranges, set strictEmpty to
497 : * false.
498 : */
499 1650 : switch (strategy)
500 : {
501 24 : case RANGESTRAT_BEFORE:
502 :
503 : /*
504 : * Range A is before range B if upper bound of A is lower
505 : * than lower bound of B.
506 : */
507 24 : maxUpper = &lower;
508 24 : inclusive = false;
509 24 : break;
510 :
511 132 : case RANGESTRAT_OVERLEFT:
512 :
513 : /*
514 : * Range A is overleft to range B if upper bound of A is
515 : * less than or equal to upper bound of B.
516 : */
517 132 : maxUpper = &upper;
518 132 : break;
519 :
520 48 : case RANGESTRAT_OVERLAPS:
521 :
522 : /*
523 : * Non-empty ranges overlap, if lower bound of each range
524 : * is lower or equal to upper bound of the other range.
525 : */
526 48 : maxLower = &upper;
527 48 : minUpper = &lower;
528 48 : break;
529 :
530 398 : case RANGESTRAT_OVERRIGHT:
531 :
532 : /*
533 : * Range A is overright to range B if lower bound of A is
534 : * greater than or equal to lower bound of B.
535 : */
536 398 : minLower = &lower;
537 398 : break;
538 :
539 362 : case RANGESTRAT_AFTER:
540 :
541 : /*
542 : * Range A is after range B if lower bound of A is greater
543 : * than upper bound of B.
544 : */
545 362 : minLower = &upper;
546 362 : inclusive = false;
547 362 : break;
548 :
549 132 : case RANGESTRAT_ADJACENT:
550 132 : if (empty)
551 0 : break; /* Skip to strictEmpty check. */
552 :
553 : /*
554 : * Previously selected quadrant could exclude possibility
555 : * for lower or upper bounds to be adjacent. Deserialize
556 : * previous centroid range if present for checking this.
557 : */
558 132 : if (in->traversalValue)
559 : {
560 114 : prevCentroid = in->traversalValue;
561 114 : range_deserialize(typcache, prevCentroid,
562 : &prevLower, &prevUpper, &prevEmpty);
563 : }
564 :
565 : /*
566 : * For a range's upper bound to be adjacent to the
567 : * argument's lower bound, it will be found along the line
568 : * adjacent to (and just below) Y=lower. Therefore, if the
569 : * argument's lower bound is less than the centroid's
570 : * upper bound, the line falls in quadrants 2 and 3; if
571 : * greater, the line falls in quadrants 1 and 4. (see
572 : * adjacent_cmp_bounds for description of edge cases).
573 : */
574 132 : cmp = adjacent_inner_consistent(typcache, &lower,
575 : ¢roidUpper,
576 : prevCentroid ? &prevUpper : NULL);
577 132 : if (cmp > 0)
578 12 : which1 = (1 << 1) | (1 << 4);
579 120 : else if (cmp < 0)
580 30 : which1 = (1 << 2) | (1 << 3);
581 : else
582 90 : which1 = 0;
583 :
584 : /*
585 : * Also search for ranges's adjacent to argument's upper
586 : * bound. They will be found along the line adjacent to
587 : * (and just right of) X=upper, which falls in quadrants 3
588 : * and 4, or 1 and 2.
589 : */
590 132 : cmp = adjacent_inner_consistent(typcache, &upper,
591 : ¢roidLower,
592 : prevCentroid ? &prevLower : NULL);
593 132 : if (cmp > 0)
594 78 : which2 = (1 << 1) | (1 << 2);
595 54 : else if (cmp < 0)
596 42 : which2 = (1 << 3) | (1 << 4);
597 : else
598 12 : which2 = 0;
599 :
600 : /* We must chase down ranges adjacent to either bound. */
601 132 : which &= which1 | which2;
602 :
603 132 : needPrevious = true;
604 132 : break;
605 :
606 506 : case RANGESTRAT_CONTAINS:
607 :
608 : /*
609 : * Non-empty range A contains non-empty range B if lower
610 : * bound of A is lower or equal to lower bound of range B
611 : * and upper bound of range A is greater than or equal to
612 : * upper bound of range A.
613 : *
614 : * All non-empty ranges contain an empty range.
615 : */
616 506 : strictEmpty = false;
617 506 : if (!empty)
618 : {
619 96 : which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
620 96 : maxLower = &lower;
621 96 : minUpper = &upper;
622 : }
623 506 : break;
624 :
625 24 : case RANGESTRAT_CONTAINED_BY:
626 : /* The opposite of contains. */
627 24 : strictEmpty = false;
628 24 : if (empty)
629 : {
630 : /* An empty range is only contained by an empty range */
631 0 : which &= (1 << 5);
632 : }
633 : else
634 : {
635 24 : minLower = &lower;
636 24 : maxUpper = &upper;
637 : }
638 24 : break;
639 :
640 24 : case RANGESTRAT_EQ:
641 :
642 : /*
643 : * Equal range can be only in the same quadrant where
644 : * argument would be placed to.
645 : */
646 24 : strictEmpty = false;
647 24 : which &= (1 << getQuadrant(typcache, centroid, range));
648 24 : break;
649 :
650 0 : default:
651 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
652 : break;
653 : }
654 :
655 1650 : if (strictEmpty)
656 : {
657 1096 : if (empty)
658 : {
659 : /* Scan key is empty, no branches are satisfying */
660 0 : which = 0;
661 0 : break;
662 : }
663 : else
664 : {
665 : /* Shouldn't visit tree branch with empty ranges */
666 1096 : which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
667 : }
668 : }
669 :
670 : /*
671 : * Using the bounding box, see which quadrants we have to descend
672 : * into.
673 : */
674 1650 : if (minLower)
675 : {
676 : /*
677 : * If the centroid's lower bound is less than or equal to the
678 : * minimum lower bound, anything in the 3rd and 4th quadrants
679 : * will have an even smaller lower bound, and thus can't
680 : * match.
681 : */
682 784 : if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0)
683 96 : which &= (1 << 1) | (1 << 2) | (1 << 5);
684 : }
685 1650 : if (maxLower)
686 : {
687 : /*
688 : * If the centroid's lower bound is greater than the maximum
689 : * lower bound, anything in the 1st and 2nd quadrants will
690 : * also have a greater than or equal lower bound, and thus
691 : * can't match. If the centroid's lower bound is equal to the
692 : * maximum lower bound, we can still exclude the 1st and 2nd
693 : * quadrants if we're looking for a value strictly greater
694 : * than the maximum.
695 : */
696 :
697 144 : cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower);
698 144 : if (cmp > 0 || (!inclusive && cmp == 0))
699 108 : which &= (1 << 3) | (1 << 4) | (1 << 5);
700 : }
701 1650 : if (minUpper)
702 : {
703 : /*
704 : * If the centroid's upper bound is less than or equal to the
705 : * minimum upper bound, anything in the 2nd and 3rd quadrants
706 : * will have an even smaller upper bound, and thus can't
707 : * match.
708 : */
709 144 : if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0)
710 0 : which &= (1 << 1) | (1 << 4) | (1 << 5);
711 : }
712 1650 : if (maxUpper)
713 : {
714 : /*
715 : * If the centroid's upper bound is greater than the maximum
716 : * upper bound, anything in the 1st and 4th quadrants will
717 : * also have a greater than or equal upper bound, and thus
718 : * can't match. If the centroid's upper bound is equal to the
719 : * maximum upper bound, we can still exclude the 1st and 4th
720 : * quadrants if we're looking for a value strictly greater
721 : * than the maximum.
722 : */
723 :
724 180 : cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper);
725 180 : if (cmp > 0 || (!inclusive && cmp == 0))
726 84 : which &= (1 << 2) | (1 << 3) | (1 << 5);
727 : }
728 :
729 1650 : if (which == 0)
730 0 : break; /* no need to consider remaining conditions */
731 : }
732 : }
733 :
734 : /* We must descend into the quadrant(s) identified by 'which' */
735 1650 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
736 1650 : if (needPrevious)
737 132 : out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
738 1650 : out->nNodes = 0;
739 :
740 : /*
741 : * Elements of traversalValues should be allocated in
742 : * traversalMemoryContext
743 : */
744 1650 : oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext);
745 :
746 8388 : for (i = 1; i <= in->nNodes; i++)
747 : {
748 6738 : if (which & (1 << i))
749 : {
750 : /* Save previous prefix if needed */
751 5742 : if (needPrevious)
752 : {
753 : Datum previousCentroid;
754 :
755 : /*
756 : * We know, that in->prefixDatum in this place is varlena,
757 : * because it's range
758 : */
759 294 : previousCentroid = datumCopy(in->prefixDatum, false, -1);
760 294 : out->traversalValues[out->nNodes] = (void *) previousCentroid;
761 : }
762 5742 : out->nodeNumbers[out->nNodes] = i - 1;
763 5742 : out->nNodes++;
764 : }
765 : }
766 :
767 1650 : MemoryContextSwitchTo(oldCtx);
768 :
769 1650 : PG_RETURN_VOID();
770 : }
771 :
772 : /*
773 : * adjacent_cmp_bounds
774 : *
775 : * Given an argument and centroid bound, this function determines if any
776 : * bounds that are adjacent to the argument are smaller than, or greater than
777 : * or equal to centroid. For brevity, we call the arg < centroid "left", and
778 : * arg >= centroid case "right". This corresponds to how the quadrants are
779 : * arranged, if you imagine that "left" is equivalent to "down" and "right"
780 : * is equivalent to "up".
781 : *
782 : * For the "left" case, returns -1, and for the "right" case, returns 1.
783 : */
784 : static int
785 390 : adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
786 : const RangeBound *centroid)
787 : {
788 : int cmp;
789 :
790 : Assert(arg->lower != centroid->lower);
791 :
792 390 : cmp = range_cmp_bounds(typcache, arg, centroid);
793 :
794 390 : if (centroid->lower)
795 : {
796 : /*------
797 : * The argument is an upper bound, we are searching for adjacent lower
798 : * bounds. A matching adjacent lower bound must be *larger* than the
799 : * argument, but only just.
800 : *
801 : * The following table illustrates the desired result with a fixed
802 : * argument bound, and different centroids. The CMP column shows
803 : * the value of 'cmp' variable, and ADJ shows whether the argument
804 : * and centroid are adjacent, per bounds_adjacent(). (N) means we
805 : * don't need to check for that case, because it's implied by CMP.
806 : * With the argument range [..., 500), the adjacent range we're
807 : * searching for is [500, ...):
808 : *
809 : * ARGUMENT CENTROID CMP ADJ
810 : * [..., 500) [498, ...) > (N) [500, ...) is to the right
811 : * [..., 500) [499, ...) = (N) [500, ...) is to the right
812 : * [..., 500) [500, ...) < Y [500, ...) is to the right
813 : * [..., 500) [501, ...) < N [500, ...) is to the left
814 : *
815 : * So, we must search left when the argument is smaller than, and not
816 : * adjacent, to the centroid. Otherwise search right.
817 : *------
818 : */
819 234 : if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid))
820 72 : return -1;
821 : else
822 162 : return 1;
823 : }
824 : else
825 : {
826 : /*------
827 : * The argument is a lower bound, we are searching for adjacent upper
828 : * bounds. A matching adjacent upper bound must be *smaller* than the
829 : * argument, but only just.
830 : *
831 : * ARGUMENT CENTROID CMP ADJ
832 : * [500, ...) [..., 499) > (N) [..., 500) is to the right
833 : * [500, ...) [..., 500) > (Y) [..., 500) is to the right
834 : * [500, ...) [..., 501) = (N) [..., 500) is to the left
835 : * [500, ...) [..., 502) < (N) [..., 500) is to the left
836 : *
837 : * We must search left when the argument is smaller than or equal to
838 : * the centroid. Otherwise search right. We don't need to check
839 : * whether the argument is adjacent with the centroid, because it
840 : * doesn't matter.
841 : *------
842 : */
843 156 : if (cmp <= 0)
844 144 : return -1;
845 : else
846 12 : return 1;
847 : }
848 : }
849 :
850 : /*----------
851 : * adjacent_inner_consistent
852 : *
853 : * Like adjacent_cmp_bounds, but also takes into account the previous
854 : * level's centroid. We might've traversed left (or right) at the previous
855 : * node, in search for ranges adjacent to the other bound, even though we
856 : * already ruled out the possibility for any matches in that direction for
857 : * this bound. By comparing the argument with the previous centroid, and
858 : * the previous centroid with the current centroid, we can determine which
859 : * direction we should've moved in at previous level, and which direction we
860 : * actually moved.
861 : *
862 : * If there can be any matches to the left, returns -1. If to the right,
863 : * returns 1. If there can be no matches below this centroid, because we
864 : * already ruled them out at the previous level, returns 0.
865 : *
866 : * XXX: Comparing just the previous and current level isn't foolproof; we
867 : * might still search some branches unnecessarily. For example, imagine that
868 : * we are searching for value 15, and we traverse the following centroids
869 : * (only considering one bound for the moment):
870 : *
871 : * Level 1: 20
872 : * Level 2: 50
873 : * Level 3: 25
874 : *
875 : * At this point, previous centroid is 50, current centroid is 25, and the
876 : * target value is to the left. But because we already moved right from
877 : * centroid 20 to 50 in the first level, there cannot be any values < 20 in
878 : * the current branch. But we don't know that just by looking at the previous
879 : * and current centroid, so we traverse left, unnecessarily. The reason we are
880 : * down this branch is that we're searching for matches with the *other*
881 : * bound. If we kept track of which bound we are searching for explicitly,
882 : * instead of deducing that from the previous and current centroid, we could
883 : * avoid some unnecessary work.
884 : *----------
885 : */
886 : static int
887 264 : adjacent_inner_consistent(TypeCacheEntry *typcache, const RangeBound *arg,
888 : const RangeBound *centroid, const RangeBound *prev)
889 : {
890 264 : if (prev)
891 : {
892 : int prevcmp;
893 : int cmp;
894 :
895 : /*
896 : * Which direction were we supposed to traverse at previous level,
897 : * left or right?
898 : */
899 228 : prevcmp = adjacent_cmp_bounds(typcache, arg, prev);
900 :
901 : /* and which direction did we actually go? */
902 228 : cmp = range_cmp_bounds(typcache, centroid, prev);
903 :
904 : /* if the two don't agree, there's nothing to see here */
905 228 : if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0))
906 102 : return 0;
907 : }
908 :
909 162 : return adjacent_cmp_bounds(typcache, arg, centroid);
910 : }
911 :
912 : /*
913 : * SP-GiST consistent function for leaf nodes: check leaf value against query
914 : * using corresponding function.
915 : */
916 : Datum
917 227626 : spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS)
918 : {
919 227626 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
920 227626 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
921 227626 : RangeType *leafRange = DatumGetRangeTypeP(in->leafDatum);
922 : TypeCacheEntry *typcache;
923 : bool res;
924 : int i;
925 :
926 : /* all tests are exact */
927 227626 : out->recheck = false;
928 :
929 : /* leafDatum is what it is... */
930 227626 : out->leafValue = in->leafDatum;
931 :
932 227626 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange));
933 :
934 : /* Perform the required comparison(s) */
935 227626 : res = true;
936 434518 : for (i = 0; i < in->nkeys; i++)
937 : {
938 227626 : Datum keyDatum = in->scankeys[i].sk_argument;
939 :
940 : /* Call the function corresponding to the scan strategy */
941 227626 : switch (in->scankeys[i].sk_strategy)
942 : {
943 2856 : case RANGESTRAT_BEFORE:
944 2856 : res = range_before_internal(typcache, leafRange,
945 2856 : DatumGetRangeTypeP(keyDatum));
946 2856 : break;
947 18210 : case RANGESTRAT_OVERLEFT:
948 18210 : res = range_overleft_internal(typcache, leafRange,
949 18210 : DatumGetRangeTypeP(keyDatum));
950 18210 : break;
951 2900 : case RANGESTRAT_OVERLAPS:
952 2900 : res = range_overlaps_internal(typcache, leafRange,
953 2900 : DatumGetRangeTypeP(keyDatum));
954 2900 : break;
955 59500 : case RANGESTRAT_OVERRIGHT:
956 59500 : res = range_overright_internal(typcache, leafRange,
957 59500 : DatumGetRangeTypeP(keyDatum));
958 59500 : break;
959 43428 : case RANGESTRAT_AFTER:
960 43428 : res = range_after_internal(typcache, leafRange,
961 43428 : DatumGetRangeTypeP(keyDatum));
962 43428 : break;
963 5316 : case RANGESTRAT_ADJACENT:
964 5316 : res = range_adjacent_internal(typcache, leafRange,
965 5316 : DatumGetRangeTypeP(keyDatum));
966 5316 : break;
967 77300 : case RANGESTRAT_CONTAINS:
968 77300 : res = range_contains_internal(typcache, leafRange,
969 77300 : DatumGetRangeTypeP(keyDatum));
970 77300 : break;
971 13944 : case RANGESTRAT_CONTAINED_BY:
972 13944 : res = range_contained_by_internal(typcache, leafRange,
973 13944 : DatumGetRangeTypeP(keyDatum));
974 13944 : break;
975 2900 : case RANGESTRAT_CONTAINS_ELEM:
976 2900 : res = range_contains_elem_internal(typcache, leafRange,
977 : keyDatum);
978 2900 : break;
979 1272 : case RANGESTRAT_EQ:
980 1272 : res = range_eq_internal(typcache, leafRange,
981 1272 : DatumGetRangeTypeP(keyDatum));
982 1272 : break;
983 0 : default:
984 0 : elog(ERROR, "unrecognized range strategy: %d",
985 : in->scankeys[i].sk_strategy);
986 : break;
987 : }
988 :
989 : /*
990 : * If leaf datum doesn't match to a query key, no need to check
991 : * subsequent keys.
992 : */
993 227626 : if (!res)
994 20734 : break;
995 : }
996 :
997 227626 : PG_RETURN_BOOL(res);
998 : }
|