Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgtextproc.c
4 : * implementation of radix tree (compressed trie) over text
5 : *
6 : * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7 : * common prefix of all strings indexed under that tuple. The node labels
8 : * represent the next byte of the string(s) after the prefix. Assuming we
9 : * always use the longest possible prefix, we will get more than one node
10 : * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
11 : *
12 : * To reconstruct the indexed string for any index entry, concatenate the
13 : * inner-tuple prefixes and node labels starting at the root and working
14 : * down to the leaf entry, then append the datum in the leaf entry.
15 : * (While descending the tree, "level" is the number of bytes reconstructed
16 : * so far.)
17 : *
18 : * However, there are two special cases for node labels: -1 indicates that
19 : * there are no more bytes after the prefix-so-far, and -2 indicates that we
20 : * had to split an existing allTheSame tuple (in such a case we have to create
21 : * a node label that doesn't correspond to any string byte). In either case,
22 : * the node label does not contribute anything to the reconstructed string.
23 : *
24 : * Previously, we used a node label of zero for both special cases, but
25 : * this was problematic because one can't tell whether a string ending at
26 : * the current level can be pushed down into such a child node. For
27 : * backwards compatibility, we still support such node labels for reading;
28 : * but no new entries will ever be pushed down into a zero-labeled child.
29 : * No new entries ever get pushed into a -2-labeled child, either.
30 : *
31 : *
32 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
33 : * Portions Copyright (c) 1994, Regents of the University of California
34 : *
35 : * IDENTIFICATION
36 : * src/backend/access/spgist/spgtextproc.c
37 : *
38 : *-------------------------------------------------------------------------
39 : */
40 : #include "postgres.h"
41 :
42 : #include "access/spgist.h"
43 : #include "catalog/pg_type.h"
44 : #include "common/int.h"
45 : #include "mb/pg_wchar.h"
46 : #include "utils/datum.h"
47 : #include "utils/fmgrprotos.h"
48 : #include "utils/pg_locale.h"
49 : #include "utils/varlena.h"
50 : #include "varatt.h"
51 :
52 :
53 : /*
54 : * In the worst case, an inner tuple in a text radix tree could have as many
55 : * as 258 nodes (one for each possible byte value, plus the two special
56 : * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner
57 : * tuple must fit on an index page of size BLCKSZ. Rather than assuming we
58 : * know the exact amount of overhead imposed by page headers, tuple headers,
59 : * etc, we leave 100 bytes for that (the actual overhead should be no more
60 : * than 56 bytes at this writing, so there is slop in this number).
61 : * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
62 : * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
63 : * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
64 : * tuple size exceeds maximum" if there are too many distinct next-byte values
65 : * at a given place in the tree. Since use of nonstandard block sizes appears
66 : * to be negligible in the field, we just live with that fact for now,
67 : * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
68 : * than default.
69 : */
70 : #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
71 :
72 : /*
73 : * Strategy for collation aware operator on text is equal to btree strategy
74 : * plus value of 10.
75 : *
76 : * Current collation aware strategies and their corresponding btree strategies:
77 : * 11 BTLessStrategyNumber
78 : * 12 BTLessEqualStrategyNumber
79 : * 14 BTGreaterEqualStrategyNumber
80 : * 15 BTGreaterStrategyNumber
81 : */
82 : #define SPG_STRATEGY_ADDITION (10)
83 : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
84 : && (s) != RTPrefixStrategyNumber)
85 :
86 : /* Struct for sorting values in picksplit */
87 : typedef struct spgNodePtr
88 : {
89 : Datum d;
90 : int i;
91 : int16 c;
92 : } spgNodePtr;
93 :
94 :
95 : Datum
96 80 : spg_text_config(PG_FUNCTION_ARGS)
97 : {
98 : /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
99 80 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
100 :
101 80 : cfg->prefixType = TEXTOID;
102 80 : cfg->labelType = INT2OID;
103 80 : cfg->canReturnData = true;
104 80 : cfg->longValuesOK = true; /* suffixing will shorten long values */
105 80 : PG_RETURN_VOID();
106 : }
107 :
108 : /*
109 : * Form a text datum from the given not-necessarily-null-terminated string,
110 : * using short varlena header format if possible
111 : */
112 : static Datum
113 257796 : formTextDatum(const char *data, int datalen)
114 : {
115 : char *p;
116 :
117 257796 : p = (char *) palloc(datalen + VARHDRSZ);
118 :
119 257796 : if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
120 : {
121 257796 : SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
122 257796 : if (datalen)
123 242680 : memcpy(p + VARHDRSZ_SHORT, data, datalen);
124 : }
125 : else
126 : {
127 0 : SET_VARSIZE(p, datalen + VARHDRSZ);
128 0 : memcpy(p + VARHDRSZ, data, datalen);
129 : }
130 :
131 257796 : return PointerGetDatum(p);
132 : }
133 :
134 : /*
135 : * Find the length of the common prefix of a and b
136 : */
137 : static int
138 95150 : commonPrefix(const char *a, const char *b, int lena, int lenb)
139 : {
140 95150 : int i = 0;
141 :
142 6637238 : while (i < lena && i < lenb && *a == *b)
143 : {
144 6542088 : a++;
145 6542088 : b++;
146 6542088 : i++;
147 : }
148 :
149 95150 : return i;
150 : }
151 :
152 : /*
153 : * Binary search an array of int16 datums for a match to c
154 : *
155 : * On success, *i gets the match location; on failure, it gets where to insert
156 : */
157 : static bool
158 208612 : searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
159 : {
160 208612 : int StopLow = 0,
161 208612 : StopHigh = nNodes;
162 :
163 572934 : while (StopLow < StopHigh)
164 : {
165 571568 : int StopMiddle = (StopLow + StopHigh) >> 1;
166 571568 : int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
167 :
168 571568 : if (c < middle)
169 180906 : StopHigh = StopMiddle;
170 390662 : else if (c > middle)
171 183416 : StopLow = StopMiddle + 1;
172 : else
173 : {
174 207246 : *i = StopMiddle;
175 207246 : return true;
176 : }
177 : }
178 :
179 1366 : *i = StopHigh;
180 1366 : return false;
181 : }
182 :
183 : Datum
184 209240 : spg_text_choose(PG_FUNCTION_ARGS)
185 : {
186 209240 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
187 209240 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
188 209240 : text *inText = DatumGetTextPP(in->datum);
189 209240 : char *inStr = VARDATA_ANY(inText);
190 209240 : int inSize = VARSIZE_ANY_EXHDR(inText);
191 209240 : char *prefixStr = NULL;
192 209240 : int prefixSize = 0;
193 209240 : int commonLen = 0;
194 209240 : int16 nodeChar = 0;
195 209240 : int i = 0;
196 :
197 : /* Check for prefix match, set nodeChar to first byte after prefix */
198 209240 : if (in->hasPrefix)
199 : {
200 82820 : text *prefixText = DatumGetTextPP(in->prefixDatum);
201 :
202 82820 : prefixStr = VARDATA_ANY(prefixText);
203 82820 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
204 :
205 82820 : commonLen = commonPrefix(inStr + in->level,
206 : prefixStr,
207 82820 : inSize - in->level,
208 : prefixSize);
209 :
210 82820 : if (commonLen == prefixSize)
211 : {
212 82192 : if (inSize - in->level > commonLen)
213 76036 : nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
214 : else
215 6156 : nodeChar = -1;
216 : }
217 : else
218 : {
219 : /* Must split tuple because incoming value doesn't match prefix */
220 628 : out->resultType = spgSplitTuple;
221 :
222 628 : if (commonLen == 0)
223 : {
224 22 : out->result.splitTuple.prefixHasPrefix = false;
225 : }
226 : else
227 : {
228 606 : out->result.splitTuple.prefixHasPrefix = true;
229 606 : out->result.splitTuple.prefixPrefixDatum =
230 606 : formTextDatum(prefixStr, commonLen);
231 : }
232 628 : out->result.splitTuple.prefixNNodes = 1;
233 628 : out->result.splitTuple.prefixNodeLabels =
234 628 : (Datum *) palloc(sizeof(Datum));
235 1256 : out->result.splitTuple.prefixNodeLabels[0] =
236 628 : Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
237 :
238 628 : out->result.splitTuple.childNodeN = 0;
239 :
240 628 : if (prefixSize - commonLen == 1)
241 : {
242 616 : out->result.splitTuple.postfixHasPrefix = false;
243 : }
244 : else
245 : {
246 12 : out->result.splitTuple.postfixHasPrefix = true;
247 12 : out->result.splitTuple.postfixPrefixDatum =
248 12 : formTextDatum(prefixStr + commonLen + 1,
249 12 : prefixSize - commonLen - 1);
250 : }
251 :
252 628 : PG_RETURN_VOID();
253 : }
254 : }
255 126420 : else if (inSize > in->level)
256 : {
257 125526 : nodeChar = *(unsigned char *) (inStr + in->level);
258 : }
259 : else
260 : {
261 894 : nodeChar = -1;
262 : }
263 :
264 : /* Look up nodeChar in the node label array */
265 208612 : if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
266 : {
267 : /*
268 : * Descend to existing node. (If in->allTheSame, the core code will
269 : * ignore our nodeN specification here, but that's OK. We still have
270 : * to provide the correct levelAdd and restDatum values, and those are
271 : * the same regardless of which node gets chosen by core.)
272 : */
273 : int levelAdd;
274 :
275 207246 : out->resultType = spgMatchNode;
276 207246 : out->result.matchNode.nodeN = i;
277 207246 : levelAdd = commonLen;
278 207246 : if (nodeChar >= 0)
279 200202 : levelAdd++;
280 207246 : out->result.matchNode.levelAdd = levelAdd;
281 207246 : if (inSize - in->level - levelAdd > 0)
282 200196 : out->result.matchNode.restDatum =
283 200196 : formTextDatum(inStr + in->level + levelAdd,
284 200196 : inSize - in->level - levelAdd);
285 : else
286 7050 : out->result.matchNode.restDatum =
287 7050 : formTextDatum(NULL, 0);
288 : }
289 1366 : else if (in->allTheSame)
290 : {
291 : /*
292 : * Can't use AddNode action, so split the tuple. The upper tuple has
293 : * the same prefix as before and uses a dummy node label -2 for the
294 : * lower tuple. The lower tuple has no prefix and the same node
295 : * labels as the original tuple.
296 : *
297 : * Note: it might seem tempting to shorten the upper tuple's prefix,
298 : * if it has one, then use its last byte as label for the lower tuple.
299 : * But that doesn't win since we know the incoming value matches the
300 : * whole prefix: we'd just end up splitting the lower tuple again.
301 : */
302 6 : out->resultType = spgSplitTuple;
303 6 : out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
304 6 : out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
305 6 : out->result.splitTuple.prefixNNodes = 1;
306 6 : out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
307 6 : out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
308 6 : out->result.splitTuple.childNodeN = 0;
309 6 : out->result.splitTuple.postfixHasPrefix = false;
310 : }
311 : else
312 : {
313 : /* Add a node for the not-previously-seen nodeChar value */
314 1360 : out->resultType = spgAddNode;
315 1360 : out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
316 1360 : out->result.addNode.nodeN = i;
317 : }
318 :
319 208612 : PG_RETURN_VOID();
320 : }
321 :
322 : /* qsort comparator to sort spgNodePtr structs by "c" */
323 : static int
324 114158 : cmpNodePtr(const void *a, const void *b)
325 : {
326 114158 : const spgNodePtr *aa = (const spgNodePtr *) a;
327 114158 : const spgNodePtr *bb = (const spgNodePtr *) b;
328 :
329 114158 : return pg_cmp_s16(aa->c, bb->c);
330 : }
331 :
332 : Datum
333 518 : spg_text_picksplit(PG_FUNCTION_ARGS)
334 : {
335 518 : spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
336 518 : spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
337 518 : text *text0 = DatumGetTextPP(in->datums[0]);
338 : int i,
339 : commonLen;
340 : spgNodePtr *nodes;
341 :
342 : /* Identify longest common prefix, if any */
343 518 : commonLen = VARSIZE_ANY_EXHDR(text0);
344 12848 : for (i = 1; i < in->nTuples && commonLen > 0; i++)
345 : {
346 12330 : text *texti = DatumGetTextPP(in->datums[i]);
347 36990 : int tmp = commonPrefix(VARDATA_ANY(text0),
348 12330 : VARDATA_ANY(texti),
349 12330 : VARSIZE_ANY_EXHDR(text0),
350 12330 : VARSIZE_ANY_EXHDR(texti));
351 :
352 12330 : if (tmp < commonLen)
353 418 : commonLen = tmp;
354 : }
355 :
356 : /*
357 : * Limit the prefix length, if necessary, to ensure that the resulting
358 : * inner tuple will fit on a page.
359 : */
360 518 : commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
361 :
362 : /* Set node prefix to be that string, if it's not empty */
363 518 : if (commonLen == 0)
364 : {
365 426 : out->hasPrefix = false;
366 : }
367 : else
368 : {
369 92 : out->hasPrefix = true;
370 92 : out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
371 : }
372 :
373 : /* Extract the node label (first non-common byte) from each value */
374 518 : nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
375 :
376 50358 : for (i = 0; i < in->nTuples; i++)
377 : {
378 49840 : text *texti = DatumGetTextPP(in->datums[i]);
379 :
380 49840 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
381 43496 : nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
382 : else
383 6344 : nodes[i].c = -1; /* use -1 if string is all common */
384 49840 : nodes[i].i = i;
385 49840 : nodes[i].d = in->datums[i];
386 : }
387 :
388 : /*
389 : * Sort by label values so that we can group the values into nodes. This
390 : * also ensures that the nodes are ordered by label value, allowing the
391 : * use of binary search in searchChar.
392 : */
393 518 : qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
394 :
395 : /* And emit results */
396 518 : out->nNodes = 0;
397 518 : out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
398 518 : out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
399 518 : out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
400 :
401 50358 : for (i = 0; i < in->nTuples; i++)
402 : {
403 49840 : text *texti = DatumGetTextPP(nodes[i].d);
404 : Datum leafD;
405 :
406 49840 : if (i == 0 || nodes[i].c != nodes[i - 1].c)
407 : {
408 3218 : out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
409 3218 : out->nNodes++;
410 : }
411 :
412 49840 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
413 43496 : leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
414 43496 : VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
415 : else
416 6344 : leafD = formTextDatum(NULL, 0);
417 :
418 49840 : out->leafTupleDatums[nodes[i].i] = leafD;
419 49840 : out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
420 : }
421 :
422 518 : PG_RETURN_VOID();
423 : }
424 :
425 : Datum
426 1732 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
427 : {
428 1732 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
429 1732 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
430 1732 : bool collate_is_c = pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c;
431 : text *reconstructedValue;
432 : text *reconstrText;
433 : int maxReconstrLen;
434 1732 : text *prefixText = NULL;
435 1732 : int prefixSize = 0;
436 : int i;
437 :
438 : /*
439 : * Reconstruct values represented at this tuple, including parent data,
440 : * prefix of this tuple if any, and the node label if it's non-dummy.
441 : * in->level should be the length of the previously reconstructed value,
442 : * and the number of bytes added here is prefixSize or prefixSize + 1.
443 : *
444 : * Note: we assume that in->reconstructedValue isn't toasted and doesn't
445 : * have a short varlena header. This is okay because it must have been
446 : * created by a previous invocation of this routine, and we always emit
447 : * long-format reconstructed values.
448 : */
449 1732 : reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
450 : Assert(reconstructedValue == NULL ? in->level == 0 :
451 : VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
452 :
453 1732 : maxReconstrLen = in->level + 1;
454 1732 : if (in->hasPrefix)
455 : {
456 324 : prefixText = DatumGetTextPP(in->prefixDatum);
457 324 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
458 324 : maxReconstrLen += prefixSize;
459 : }
460 :
461 1732 : reconstrText = palloc(VARHDRSZ + maxReconstrLen);
462 1732 : SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
463 :
464 1732 : if (in->level)
465 1552 : memcpy(VARDATA(reconstrText),
466 1552 : VARDATA(reconstructedValue),
467 1552 : in->level);
468 1732 : if (prefixSize)
469 324 : memcpy(((char *) VARDATA(reconstrText)) + in->level,
470 324 : VARDATA_ANY(prefixText),
471 : prefixSize);
472 : /* last byte of reconstrText will be filled in below */
473 :
474 : /*
475 : * Scan the child nodes. For each one, complete the reconstructed value
476 : * and see if it's consistent with the query. If so, emit an entry into
477 : * the output arrays.
478 : */
479 1732 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
480 1732 : out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
481 1732 : out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
482 1732 : out->nNodes = 0;
483 :
484 18304 : for (i = 0; i < in->nNodes; i++)
485 : {
486 16572 : int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
487 : int thisLen;
488 16572 : bool res = true;
489 : int j;
490 :
491 : /* If nodeChar is a dummy value, don't include it in data */
492 16572 : if (nodeChar <= 0)
493 3324 : thisLen = maxReconstrLen - 1;
494 : else
495 : {
496 13248 : ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
497 13248 : thisLen = maxReconstrLen;
498 : }
499 :
500 28644 : for (j = 0; j < in->nkeys; j++)
501 : {
502 16572 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
503 : text *inText;
504 : int inSize;
505 : int r;
506 :
507 : /*
508 : * If it's a collation-aware operator, but the collation is C, we
509 : * can treat it as non-collation-aware. With non-C collation we
510 : * need to traverse whole tree :-( so there's no point in making
511 : * any check here. (Note also that our reconstructed value may
512 : * well end with a partial multibyte character, so that applying
513 : * any encoding-sensitive test to it would be risky anyhow.)
514 : */
515 16572 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
516 : {
517 10416 : if (collate_is_c)
518 624 : strategy -= SPG_STRATEGY_ADDITION;
519 : else
520 9792 : continue;
521 : }
522 :
523 6780 : inText = DatumGetTextPP(in->scankeys[j].sk_argument);
524 6780 : inSize = VARSIZE_ANY_EXHDR(inText);
525 :
526 6780 : r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
527 6780 : Min(inSize, thisLen));
528 :
529 6780 : switch (strategy)
530 : {
531 1408 : case BTLessStrategyNumber:
532 : case BTLessEqualStrategyNumber:
533 1408 : if (r > 0)
534 800 : res = false;
535 1408 : break;
536 3468 : case BTEqualStrategyNumber:
537 3468 : if (r != 0 || inSize < thisLen)
538 2100 : res = false;
539 3468 : break;
540 1088 : case BTGreaterEqualStrategyNumber:
541 : case BTGreaterStrategyNumber:
542 1088 : if (r < 0)
543 832 : res = false;
544 1088 : break;
545 816 : case RTPrefixStrategyNumber:
546 816 : if (r != 0)
547 768 : res = false;
548 816 : break;
549 0 : default:
550 0 : elog(ERROR, "unrecognized strategy number: %d",
551 : in->scankeys[j].sk_strategy);
552 : break;
553 : }
554 :
555 6780 : if (!res)
556 4500 : break; /* no need to consider remaining conditions */
557 : }
558 :
559 16572 : if (res)
560 : {
561 12072 : out->nodeNumbers[out->nNodes] = i;
562 12072 : out->levelAdds[out->nNodes] = thisLen - in->level;
563 12072 : SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
564 24144 : out->reconstructedValues[out->nNodes] =
565 12072 : datumCopy(PointerGetDatum(reconstrText), false, -1);
566 12072 : out->nNodes++;
567 : }
568 : }
569 :
570 1732 : PG_RETURN_VOID();
571 : }
572 :
573 : Datum
574 235500 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
575 : {
576 235500 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
577 235500 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
578 235500 : int level = in->level;
579 : text *leafValue,
580 235500 : *reconstrValue = NULL;
581 : char *fullValue;
582 : int fullLen;
583 : bool res;
584 : int j;
585 :
586 : /* all tests are exact */
587 235500 : out->recheck = false;
588 :
589 235500 : leafValue = DatumGetTextPP(in->leafDatum);
590 :
591 : /* As above, in->reconstructedValue isn't toasted or short. */
592 235500 : if (DatumGetPointer(in->reconstructedValue))
593 235476 : reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
594 :
595 : Assert(reconstrValue == NULL ? level == 0 :
596 : VARSIZE_ANY_EXHDR(reconstrValue) == level);
597 :
598 : /* Reconstruct the full string represented by this leaf tuple */
599 235500 : fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
600 235500 : if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
601 : {
602 74352 : fullValue = VARDATA(reconstrValue);
603 74352 : out->leafValue = PointerGetDatum(reconstrValue);
604 : }
605 : else
606 : {
607 161148 : text *fullText = palloc(VARHDRSZ + fullLen);
608 :
609 161148 : SET_VARSIZE(fullText, VARHDRSZ + fullLen);
610 161148 : fullValue = VARDATA(fullText);
611 161148 : if (level)
612 161124 : memcpy(fullValue, VARDATA(reconstrValue), level);
613 161148 : if (VARSIZE_ANY_EXHDR(leafValue) > 0)
614 161148 : memcpy(fullValue + level, VARDATA_ANY(leafValue),
615 161148 : VARSIZE_ANY_EXHDR(leafValue));
616 161148 : out->leafValue = PointerGetDatum(fullText);
617 : }
618 :
619 : /* Perform the required comparison(s) */
620 235500 : res = true;
621 263046 : for (j = 0; j < in->nkeys; j++)
622 : {
623 235500 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
624 235500 : text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
625 235500 : int queryLen = VARSIZE_ANY_EXHDR(query);
626 : int r;
627 :
628 235500 : if (strategy == RTPrefixStrategyNumber)
629 : {
630 : /*
631 : * if level >= length of query then reconstrValue must begin with
632 : * query (prefix) string, so we don't need to check it again.
633 : */
634 768 : res = (level >= queryLen) ||
635 384 : DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
636 : PG_GET_COLLATION(),
637 : out->leafValue,
638 : PointerGetDatum(query)));
639 :
640 384 : if (!res) /* no need to consider remaining conditions */
641 336 : break;
642 :
643 48 : continue;
644 : }
645 :
646 235116 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
647 : {
648 : /* Collation-aware comparison */
649 202728 : strategy -= SPG_STRATEGY_ADDITION;
650 :
651 : /* If asserts enabled, verify encoding of reconstructed string */
652 : Assert(pg_verifymbstr(fullValue, fullLen, false));
653 :
654 202728 : r = varstr_cmp(fullValue, fullLen,
655 202728 : VARDATA_ANY(query), queryLen,
656 : PG_GET_COLLATION());
657 : }
658 : else
659 : {
660 : /* Non-collation-aware comparison */
661 32388 : r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
662 :
663 32388 : if (r == 0)
664 : {
665 24162 : if (queryLen > fullLen)
666 12024 : r = -1;
667 12138 : else if (queryLen < fullLen)
668 0 : r = 1;
669 : }
670 : }
671 :
672 235116 : switch (strategy)
673 : {
674 54376 : case BTLessStrategyNumber:
675 54376 : res = (r < 0);
676 54376 : break;
677 54376 : case BTLessEqualStrategyNumber:
678 54376 : res = (r <= 0);
679 54376 : break;
680 24300 : case BTEqualStrategyNumber:
681 24300 : res = (r == 0);
682 24300 : break;
683 51032 : case BTGreaterEqualStrategyNumber:
684 51032 : res = (r >= 0);
685 51032 : break;
686 51032 : case BTGreaterStrategyNumber:
687 51032 : res = (r > 0);
688 51032 : break;
689 0 : default:
690 0 : elog(ERROR, "unrecognized strategy number: %d",
691 : in->scankeys[j].sk_strategy);
692 : res = false;
693 : break;
694 : }
695 :
696 235116 : if (!res)
697 207618 : break; /* no need to consider remaining conditions */
698 : }
699 :
700 235500 : PG_RETURN_BOOL(res);
701 : }
|