Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgtextproc.c
4 : * implementation of radix tree (compressed trie) over text
5 : *
6 : * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7 : * common prefix of all strings indexed under that tuple. The node labels
8 : * represent the next byte of the string(s) after the prefix. Assuming we
9 : * always use the longest possible prefix, we will get more than one node
10 : * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
11 : *
12 : * To reconstruct the indexed string for any index entry, concatenate the
13 : * inner-tuple prefixes and node labels starting at the root and working
14 : * down to the leaf entry, then append the datum in the leaf entry.
15 : * (While descending the tree, "level" is the number of bytes reconstructed
16 : * so far.)
17 : *
18 : * However, there are two special cases for node labels: -1 indicates that
19 : * there are no more bytes after the prefix-so-far, and -2 indicates that we
20 : * had to split an existing allTheSame tuple (in such a case we have to create
21 : * a node label that doesn't correspond to any string byte). In either case,
22 : * the node label does not contribute anything to the reconstructed string.
23 : *
24 : * Previously, we used a node label of zero for both special cases, but
25 : * this was problematic because one can't tell whether a string ending at
26 : * the current level can be pushed down into such a child node. For
27 : * backwards compatibility, we still support such node labels for reading;
28 : * but no new entries will ever be pushed down into a zero-labeled child.
29 : * No new entries ever get pushed into a -2-labeled child, either.
30 : *
31 : *
32 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
33 : * Portions Copyright (c) 1994, Regents of the University of California
34 : *
35 : * IDENTIFICATION
36 : * src/backend/access/spgist/spgtextproc.c
37 : *
38 : *-------------------------------------------------------------------------
39 : */
40 : #include "postgres.h"
41 :
42 : #include "access/spgist.h"
43 : #include "catalog/pg_type.h"
44 : #include "common/int.h"
45 : #include "mb/pg_wchar.h"
46 : #include "utils/datum.h"
47 : #include "utils/fmgrprotos.h"
48 : #include "utils/pg_locale.h"
49 : #include "utils/varlena.h"
50 : #include "varatt.h"
51 :
52 :
53 : /*
54 : * In the worst case, an inner tuple in a text radix tree could have as many
55 : * as 258 nodes (one for each possible byte value, plus the two special
56 : * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner
57 : * tuple must fit on an index page of size BLCKSZ. Rather than assuming we
58 : * know the exact amount of overhead imposed by page headers, tuple headers,
59 : * etc, we leave 100 bytes for that (the actual overhead should be no more
60 : * than 56 bytes at this writing, so there is slop in this number).
61 : * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
62 : * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
63 : * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
64 : * tuple size exceeds maximum" if there are too many distinct next-byte values
65 : * at a given place in the tree. Since use of nonstandard block sizes appears
66 : * to be negligible in the field, we just live with that fact for now,
67 : * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
68 : * than default.
69 : */
70 : #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
71 :
72 : /*
73 : * Strategy for collation aware operator on text is equal to btree strategy
74 : * plus value of 10.
75 : *
76 : * Current collation aware strategies and their corresponding btree strategies:
77 : * 11 BTLessStrategyNumber
78 : * 12 BTLessEqualStrategyNumber
79 : * 14 BTGreaterEqualStrategyNumber
80 : * 15 BTGreaterStrategyNumber
81 : */
82 : #define SPG_STRATEGY_ADDITION (10)
83 : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
84 : && (s) != RTPrefixStrategyNumber)
85 :
86 : /* Struct for sorting values in picksplit */
87 : typedef struct spgNodePtr
88 : {
89 : Datum d;
90 : int i;
91 : int16 c;
92 : } spgNodePtr;
93 :
94 :
95 : Datum
96 42 : spg_text_config(PG_FUNCTION_ARGS)
97 : {
98 : #ifdef NOT_USED
99 : spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0);
100 : #endif
101 42 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
102 :
103 42 : cfg->prefixType = TEXTOID;
104 42 : cfg->labelType = INT2OID;
105 42 : cfg->canReturnData = true;
106 42 : cfg->longValuesOK = true; /* suffixing will shorten long values */
107 42 : PG_RETURN_VOID();
108 : }
109 :
110 : /*
111 : * Form a text datum from the given not-necessarily-null-terminated string,
112 : * using short varlena header format if possible
113 : */
114 : static Datum
115 129304 : formTextDatum(const char *data, int datalen)
116 : {
117 : char *p;
118 :
119 129304 : p = (char *) palloc(datalen + VARHDRSZ);
120 :
121 129304 : if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
122 : {
123 129304 : SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
124 129304 : if (datalen)
125 121723 : memcpy(p + VARHDRSZ_SHORT, data, datalen);
126 : }
127 : else
128 : {
129 0 : SET_VARSIZE(p, datalen + VARHDRSZ);
130 0 : memcpy(p + VARHDRSZ, data, datalen);
131 : }
132 :
133 129304 : return PointerGetDatum(p);
134 : }
135 :
136 : /*
137 : * Find the length of the common prefix of a and b
138 : */
139 : static int
140 47874 : commonPrefix(const char *a, const char *b, int lena, int lenb)
141 : {
142 47874 : int i = 0;
143 :
144 3321027 : while (i < lena && i < lenb && *a == *b)
145 : {
146 3273153 : a++;
147 3273153 : b++;
148 3273153 : i++;
149 : }
150 :
151 47874 : return i;
152 : }
153 :
154 : /*
155 : * Binary search an array of int16 datums for a match to c
156 : *
157 : * On success, *i gets the match location; on failure, it gets where to insert
158 : */
159 : static bool
160 104384 : searchChar(const Datum *nodeLabels, int nNodes, int16 c, int *i)
161 : {
162 104384 : int StopLow = 0,
163 104384 : StopHigh = nNodes;
164 :
165 286621 : while (StopLow < StopHigh)
166 : {
167 285938 : int StopMiddle = (StopLow + StopHigh) >> 1;
168 285938 : int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
169 :
170 285938 : if (c < middle)
171 90436 : StopHigh = StopMiddle;
172 195502 : else if (c > middle)
173 91801 : StopLow = StopMiddle + 1;
174 : else
175 : {
176 103701 : *i = StopMiddle;
177 103701 : return true;
178 : }
179 : }
180 :
181 683 : *i = StopHigh;
182 683 : return false;
183 : }
184 :
185 : Datum
186 104698 : spg_text_choose(PG_FUNCTION_ARGS)
187 : {
188 104698 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
189 104698 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
190 104698 : text *inText = DatumGetTextPP(in->datum);
191 104698 : char *inStr = VARDATA_ANY(inText);
192 104698 : int inSize = VARSIZE_ANY_EXHDR(inText);
193 104698 : char *prefixStr = NULL;
194 104698 : int prefixSize = 0;
195 104698 : int commonLen = 0;
196 104698 : int16 nodeChar = 0;
197 104698 : int i = 0;
198 :
199 : /* Check for prefix match, set nodeChar to first byte after prefix */
200 104698 : if (in->hasPrefix)
201 : {
202 41353 : text *prefixText = DatumGetTextPP(in->prefixDatum);
203 :
204 41353 : prefixStr = VARDATA_ANY(prefixText);
205 41353 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
206 :
207 41353 : commonLen = commonPrefix(inStr + in->level,
208 : prefixStr,
209 41353 : inSize - in->level,
210 : prefixSize);
211 :
212 41353 : if (commonLen == prefixSize)
213 : {
214 41039 : if (inSize - in->level > commonLen)
215 37961 : nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
216 : else
217 3078 : nodeChar = -1;
218 : }
219 : else
220 : {
221 : /* Must split tuple because incoming value doesn't match prefix */
222 314 : out->resultType = spgSplitTuple;
223 :
224 314 : if (commonLen == 0)
225 : {
226 11 : out->result.splitTuple.prefixHasPrefix = false;
227 : }
228 : else
229 : {
230 303 : out->result.splitTuple.prefixHasPrefix = true;
231 303 : out->result.splitTuple.prefixPrefixDatum =
232 303 : formTextDatum(prefixStr, commonLen);
233 : }
234 314 : out->result.splitTuple.prefixNNodes = 1;
235 314 : out->result.splitTuple.prefixNodeLabels = palloc_object(Datum);
236 628 : out->result.splitTuple.prefixNodeLabels[0] =
237 314 : Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
238 :
239 314 : out->result.splitTuple.childNodeN = 0;
240 :
241 314 : if (prefixSize - commonLen == 1)
242 : {
243 308 : out->result.splitTuple.postfixHasPrefix = false;
244 : }
245 : else
246 : {
247 6 : out->result.splitTuple.postfixHasPrefix = true;
248 6 : out->result.splitTuple.postfixPrefixDatum =
249 6 : formTextDatum(prefixStr + commonLen + 1,
250 6 : prefixSize - commonLen - 1);
251 : }
252 :
253 314 : PG_RETURN_VOID();
254 : }
255 : }
256 63345 : else if (inSize > in->level)
257 : {
258 62866 : nodeChar = *(unsigned char *) (inStr + in->level);
259 : }
260 : else
261 : {
262 479 : nodeChar = -1;
263 : }
264 :
265 : /* Look up nodeChar in the node label array */
266 104384 : if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
267 : {
268 : /*
269 : * Descend to existing node. (If in->allTheSame, the core code will
270 : * ignore our nodeN specification here, but that's OK. We still have
271 : * to provide the correct levelAdd and restDatum values, and those are
272 : * the same regardless of which node gets chosen by core.)
273 : */
274 : int levelAdd;
275 :
276 103701 : out->resultType = spgMatchNode;
277 103701 : out->result.matchNode.nodeN = i;
278 103701 : levelAdd = commonLen;
279 103701 : if (nodeChar >= 0)
280 100147 : levelAdd++;
281 103701 : out->result.matchNode.levelAdd = levelAdd;
282 103701 : if (inSize - in->level - levelAdd > 0)
283 100144 : out->result.matchNode.restDatum =
284 100144 : formTextDatum(inStr + in->level + levelAdd,
285 100144 : inSize - in->level - levelAdd);
286 : else
287 3557 : out->result.matchNode.restDatum =
288 3557 : formTextDatum(NULL, 0);
289 : }
290 683 : else if (in->allTheSame)
291 : {
292 : /*
293 : * Can't use AddNode action, so split the tuple. The upper tuple has
294 : * the same prefix as before and uses a dummy node label -2 for the
295 : * lower tuple. The lower tuple has no prefix and the same node
296 : * labels as the original tuple.
297 : *
298 : * Note: it might seem tempting to shorten the upper tuple's prefix,
299 : * if it has one, then use its last byte as label for the lower tuple.
300 : * But that doesn't win since we know the incoming value matches the
301 : * whole prefix: we'd just end up splitting the lower tuple again.
302 : */
303 3 : out->resultType = spgSplitTuple;
304 3 : out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
305 3 : out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
306 3 : out->result.splitTuple.prefixNNodes = 1;
307 3 : out->result.splitTuple.prefixNodeLabels = palloc_object(Datum);
308 3 : out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
309 3 : out->result.splitTuple.childNodeN = 0;
310 3 : out->result.splitTuple.postfixHasPrefix = false;
311 : }
312 : else
313 : {
314 : /* Add a node for the not-previously-seen nodeChar value */
315 680 : out->resultType = spgAddNode;
316 680 : out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
317 680 : out->result.addNode.nodeN = i;
318 : }
319 :
320 104384 : PG_RETURN_VOID();
321 : }
322 :
323 : /* qsort comparator to sort spgNodePtr structs by "c" */
324 : static int
325 58287 : cmpNodePtr(const void *a, const void *b)
326 : {
327 58287 : const spgNodePtr *aa = (const spgNodePtr *) a;
328 58287 : const spgNodePtr *bb = (const spgNodePtr *) b;
329 :
330 58287 : return pg_cmp_s16(aa->c, bb->c);
331 : }
332 :
333 : Datum
334 261 : spg_text_picksplit(PG_FUNCTION_ARGS)
335 : {
336 261 : spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
337 261 : spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
338 261 : text *text0 = DatumGetTextPP(in->datums[0]);
339 : int i,
340 : commonLen;
341 : spgNodePtr *nodes;
342 :
343 : /* Identify longest common prefix, if any */
344 261 : commonLen = VARSIZE_ANY_EXHDR(text0);
345 6782 : for (i = 1; i < in->nTuples && commonLen > 0; i++)
346 : {
347 6521 : text *texti = DatumGetTextPP(in->datums[i]);
348 6521 : int tmp = commonPrefix(VARDATA_ANY(text0),
349 6521 : VARDATA_ANY(texti),
350 6521 : VARSIZE_ANY_EXHDR(text0),
351 6521 : VARSIZE_ANY_EXHDR(texti));
352 :
353 6521 : if (tmp < commonLen)
354 215 : commonLen = tmp;
355 : }
356 :
357 : /*
358 : * Limit the prefix length, if necessary, to ensure that the resulting
359 : * inner tuple will fit on a page.
360 : */
361 261 : commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
362 :
363 : /* Set node prefix to be that string, if it's not empty */
364 261 : if (commonLen == 0)
365 : {
366 215 : out->hasPrefix = false;
367 : }
368 : else
369 : {
370 46 : out->hasPrefix = true;
371 46 : out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
372 : }
373 :
374 : /* Extract the node label (first non-common byte) from each value */
375 261 : nodes = palloc_array(spgNodePtr, in->nTuples);
376 :
377 25509 : for (i = 0; i < in->nTuples; i++)
378 : {
379 25248 : text *texti = DatumGetTextPP(in->datums[i]);
380 :
381 25248 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
382 22105 : nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
383 : else
384 3143 : nodes[i].c = -1; /* use -1 if string is all common */
385 25248 : nodes[i].i = i;
386 25248 : nodes[i].d = in->datums[i];
387 : }
388 :
389 : /*
390 : * Sort by label values so that we can group the values into nodes. This
391 : * also ensures that the nodes are ordered by label value, allowing the
392 : * use of binary search in searchChar.
393 : */
394 261 : qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
395 :
396 : /* And emit results */
397 261 : out->nNodes = 0;
398 261 : out->nodeLabels = palloc_array(Datum, in->nTuples);
399 261 : out->mapTuplesToNodes = palloc_array(int, in->nTuples);
400 261 : out->leafTupleDatums = palloc_array(Datum, in->nTuples);
401 :
402 25509 : for (i = 0; i < in->nTuples; i++)
403 : {
404 25248 : text *texti = DatumGetTextPP(nodes[i].d);
405 : Datum leafD;
406 :
407 25248 : if (i == 0 || nodes[i].c != nodes[i - 1].c)
408 : {
409 1625 : out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
410 1625 : out->nNodes++;
411 : }
412 :
413 25248 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
414 22105 : leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
415 22105 : VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
416 : else
417 3143 : leafD = formTextDatum(NULL, 0);
418 :
419 25248 : out->leafTupleDatums[nodes[i].i] = leafD;
420 25248 : out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
421 : }
422 :
423 261 : PG_RETURN_VOID();
424 : }
425 :
426 : Datum
427 858 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
428 : {
429 858 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
430 858 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
431 858 : bool collate_is_c = pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c;
432 : text *reconstructedValue;
433 : text *reconstrText;
434 : int maxReconstrLen;
435 858 : text *prefixText = NULL;
436 858 : int prefixSize = 0;
437 : int i;
438 :
439 : /*
440 : * Reconstruct values represented at this tuple, including parent data,
441 : * prefix of this tuple if any, and the node label if it's non-dummy.
442 : * in->level should be the length of the previously reconstructed value,
443 : * and the number of bytes added here is prefixSize or prefixSize + 1.
444 : *
445 : * Note: we assume that in->reconstructedValue isn't toasted and doesn't
446 : * have a short varlena header. This is okay because it must have been
447 : * created by a previous invocation of this routine, and we always emit
448 : * long-format reconstructed values.
449 : */
450 858 : reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
451 : Assert(reconstructedValue == NULL ? in->level == 0 :
452 : VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
453 :
454 858 : maxReconstrLen = in->level + 1;
455 858 : if (in->hasPrefix)
456 : {
457 162 : prefixText = DatumGetTextPP(in->prefixDatum);
458 162 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
459 162 : maxReconstrLen += prefixSize;
460 : }
461 :
462 858 : reconstrText = palloc(VARHDRSZ + maxReconstrLen);
463 858 : SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
464 :
465 858 : if (in->level)
466 768 : memcpy(VARDATA(reconstrText),
467 768 : VARDATA(reconstructedValue),
468 768 : in->level);
469 858 : if (prefixSize)
470 162 : memcpy(((char *) VARDATA(reconstrText)) + in->level,
471 162 : VARDATA_ANY(prefixText),
472 : prefixSize);
473 : /* last byte of reconstrText will be filled in below */
474 :
475 : /*
476 : * Scan the child nodes. For each one, complete the reconstructed value
477 : * and see if it's consistent with the query. If so, emit an entry into
478 : * the output arrays.
479 : */
480 858 : out->nodeNumbers = palloc_array(int, in->nNodes);
481 858 : out->levelAdds = palloc_array(int, in->nNodes);
482 858 : out->reconstructedValues = palloc_array(Datum, in->nNodes);
483 858 : out->nNodes = 0;
484 :
485 9080 : for (i = 0; i < in->nNodes; i++)
486 : {
487 8222 : int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
488 : int thisLen;
489 8222 : bool res = true;
490 : int j;
491 :
492 : /* If nodeChar is a dummy value, don't include it in data */
493 8222 : if (nodeChar <= 0)
494 1598 : thisLen = maxReconstrLen - 1;
495 : else
496 : {
497 6624 : ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
498 6624 : thisLen = maxReconstrLen;
499 : }
500 :
501 14194 : for (j = 0; j < in->nkeys; j++)
502 : {
503 8222 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
504 : text *inText;
505 : int inSize;
506 : int r;
507 :
508 : /*
509 : * If it's a collation-aware operator, but the collation is C, we
510 : * can treat it as non-collation-aware. With non-C collation we
511 : * need to traverse whole tree :-( so there's no point in making
512 : * any check here. (Note also that our reconstructed value may
513 : * well end with a partial multibyte character, so that applying
514 : * any encoding-sensitive test to it would be risky anyhow.)
515 : */
516 8222 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
517 : {
518 5144 : if (collate_is_c)
519 312 : strategy -= SPG_STRATEGY_ADDITION;
520 : else
521 4832 : continue;
522 : }
523 :
524 3390 : inText = DatumGetTextPP(in->scankeys[j].sk_argument);
525 3390 : inSize = VARSIZE_ANY_EXHDR(inText);
526 :
527 3390 : r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
528 3390 : Min(inSize, thisLen));
529 :
530 3390 : switch (strategy)
531 : {
532 704 : case BTLessStrategyNumber:
533 : case BTLessEqualStrategyNumber:
534 704 : if (r > 0)
535 400 : res = false;
536 704 : break;
537 1734 : case BTEqualStrategyNumber:
538 1734 : if (r != 0 || inSize < thisLen)
539 1050 : res = false;
540 1734 : break;
541 544 : case BTGreaterEqualStrategyNumber:
542 : case BTGreaterStrategyNumber:
543 544 : if (r < 0)
544 416 : res = false;
545 544 : break;
546 408 : case RTPrefixStrategyNumber:
547 408 : if (r != 0)
548 384 : res = false;
549 408 : break;
550 0 : default:
551 0 : elog(ERROR, "unrecognized strategy number: %d",
552 : in->scankeys[j].sk_strategy);
553 : break;
554 : }
555 :
556 3390 : if (!res)
557 2250 : break; /* no need to consider remaining conditions */
558 : }
559 :
560 8222 : if (res)
561 : {
562 5972 : out->nodeNumbers[out->nNodes] = i;
563 5972 : out->levelAdds[out->nNodes] = thisLen - in->level;
564 5972 : SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
565 11944 : out->reconstructedValues[out->nNodes] =
566 5972 : datumCopy(PointerGetDatum(reconstrText), false, -1);
567 5972 : out->nNodes++;
568 : }
569 : }
570 :
571 858 : PG_RETURN_VOID();
572 : }
573 :
574 : Datum
575 117750 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
576 : {
577 117750 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
578 117750 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
579 117750 : int level = in->level;
580 : text *leafValue,
581 117750 : *reconstrValue = NULL;
582 : char *fullValue;
583 : int fullLen;
584 : bool res;
585 : int j;
586 :
587 : /* all tests are exact */
588 117750 : out->recheck = false;
589 :
590 117750 : leafValue = DatumGetTextPP(in->leafDatum);
591 :
592 : /* As above, in->reconstructedValue isn't toasted or short. */
593 117750 : if (DatumGetPointer(in->reconstructedValue))
594 117738 : reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
595 :
596 : Assert(reconstrValue == NULL ? level == 0 :
597 : VARSIZE_ANY_EXHDR(reconstrValue) == level);
598 :
599 : /* Reconstruct the full string represented by this leaf tuple */
600 117750 : fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
601 117750 : if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
602 : {
603 37176 : fullValue = VARDATA(reconstrValue);
604 37176 : out->leafValue = PointerGetDatum(reconstrValue);
605 : }
606 : else
607 : {
608 80574 : text *fullText = palloc(VARHDRSZ + fullLen);
609 :
610 80574 : SET_VARSIZE(fullText, VARHDRSZ + fullLen);
611 80574 : fullValue = VARDATA(fullText);
612 80574 : if (level)
613 80562 : memcpy(fullValue, VARDATA(reconstrValue), level);
614 80574 : if (VARSIZE_ANY_EXHDR(leafValue) > 0)
615 80574 : memcpy(fullValue + level, VARDATA_ANY(leafValue),
616 : VARSIZE_ANY_EXHDR(leafValue));
617 80574 : out->leafValue = PointerGetDatum(fullText);
618 : }
619 :
620 : /* Perform the required comparison(s) */
621 117750 : res = true;
622 131523 : for (j = 0; j < in->nkeys; j++)
623 : {
624 117750 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
625 117750 : text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
626 117750 : int queryLen = VARSIZE_ANY_EXHDR(query);
627 : int r;
628 :
629 117750 : if (strategy == RTPrefixStrategyNumber)
630 : {
631 : /*
632 : * if level >= length of query then reconstrValue must begin with
633 : * query (prefix) string, so we don't need to check it again.
634 : */
635 384 : res = (level >= queryLen) ||
636 192 : DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
637 : PG_GET_COLLATION(),
638 : out->leafValue,
639 : PointerGetDatum(query)));
640 :
641 192 : if (!res) /* no need to consider remaining conditions */
642 168 : break;
643 :
644 24 : continue;
645 : }
646 :
647 117558 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
648 : {
649 : /* Collation-aware comparison */
650 101364 : strategy -= SPG_STRATEGY_ADDITION;
651 :
652 : /* If asserts enabled, verify encoding of reconstructed string */
653 : Assert(pg_verifymbstr(fullValue, fullLen, false));
654 :
655 101364 : r = varstr_cmp(fullValue, fullLen,
656 101364 : VARDATA_ANY(query), queryLen,
657 : PG_GET_COLLATION());
658 : }
659 : else
660 : {
661 : /* Non-collation-aware comparison */
662 16194 : r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
663 :
664 16194 : if (r == 0)
665 : {
666 12081 : if (queryLen > fullLen)
667 6012 : r = -1;
668 6069 : else if (queryLen < fullLen)
669 0 : r = 1;
670 : }
671 : }
672 :
673 117558 : switch (strategy)
674 : {
675 27188 : case BTLessStrategyNumber:
676 27188 : res = (r < 0);
677 27188 : break;
678 27188 : case BTLessEqualStrategyNumber:
679 27188 : res = (r <= 0);
680 27188 : break;
681 12150 : case BTEqualStrategyNumber:
682 12150 : res = (r == 0);
683 12150 : break;
684 25516 : case BTGreaterEqualStrategyNumber:
685 25516 : res = (r >= 0);
686 25516 : break;
687 25516 : case BTGreaterStrategyNumber:
688 25516 : res = (r > 0);
689 25516 : break;
690 0 : default:
691 0 : elog(ERROR, "unrecognized strategy number: %d",
692 : in->scankeys[j].sk_strategy);
693 : res = false;
694 : break;
695 : }
696 :
697 117558 : if (!res)
698 103809 : break; /* no need to consider remaining conditions */
699 : }
700 :
701 117750 : PG_RETURN_BOOL(res);
702 : }
|