LCOV - PostgreSQL 19devel - src/backend/access/spgist/spgtextproc.c

LCOV - code coverage report

Current view:	top level - src/backend/access/spgist - spgtextproc.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	279	286	97.6 %
Date:	2026-02-09 00:18:05	Functions:	9	9	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * spgtextproc.c
       4             :  *    implementation of radix tree (compressed trie) over text
       5             :  *
       6             :  * In a text_ops SPGiST index, inner tuples can have a prefix which is the
       7             :  * common prefix of all strings indexed under that tuple.  The node labels
       8             :  * represent the next byte of the string(s) after the prefix.  Assuming we
       9             :  * always use the longest possible prefix, we will get more than one node
      10             :  * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
      11             :  *
      12             :  * To reconstruct the indexed string for any index entry, concatenate the
      13             :  * inner-tuple prefixes and node labels starting at the root and working
      14             :  * down to the leaf entry, then append the datum in the leaf entry.
      15             :  * (While descending the tree, "level" is the number of bytes reconstructed
      16             :  * so far.)
      17             :  *
      18             :  * However, there are two special cases for node labels: -1 indicates that
      19             :  * there are no more bytes after the prefix-so-far, and -2 indicates that we
      20             :  * had to split an existing allTheSame tuple (in such a case we have to create
      21             :  * a node label that doesn't correspond to any string byte).  In either case,
      22             :  * the node label does not contribute anything to the reconstructed string.
      23             :  *
      24             :  * Previously, we used a node label of zero for both special cases, but
      25             :  * this was problematic because one can't tell whether a string ending at
      26             :  * the current level can be pushed down into such a child node.  For
      27             :  * backwards compatibility, we still support such node labels for reading;
      28             :  * but no new entries will ever be pushed down into a zero-labeled child.
      29             :  * No new entries ever get pushed into a -2-labeled child, either.
      30             :  *
      31             :  *
      32             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      33             :  * Portions Copyright (c) 1994, Regents of the University of California
      34             :  *
      35             :  * IDENTIFICATION
      36             :  *          src/backend/access/spgist/spgtextproc.c
      37             :  *
      38             :  *-------------------------------------------------------------------------
      39             :  */
      40             : #include "postgres.h"
      41             : 
      42             : #include "access/spgist.h"
      43             : #include "catalog/pg_type.h"
      44             : #include "common/int.h"
      45             : #include "mb/pg_wchar.h"
      46             : #include "utils/datum.h"
      47             : #include "utils/fmgrprotos.h"
      48             : #include "utils/pg_locale.h"
      49             : #include "utils/varlena.h"
      50             : #include "varatt.h"
      51             : 
      52             : 
      53             : /*
      54             :  * In the worst case, an inner tuple in a text radix tree could have as many
      55             :  * as 258 nodes (one for each possible byte value, plus the two special
      56             :  * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
      57             :  * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
      58             :  * know the exact amount of overhead imposed by page headers, tuple headers,
      59             :  * etc, we leave 100 bytes for that (the actual overhead should be no more
      60             :  * than 56 bytes at this writing, so there is slop in this number).
      61             :  * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
      62             :  * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
      63             :  * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
      64             :  * tuple size exceeds maximum" if there are too many distinct next-byte values
      65             :  * at a given place in the tree.  Since use of nonstandard block sizes appears
      66             :  * to be negligible in the field, we just live with that fact for now,
      67             :  * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
      68             :  * than default.
      69             :  */
      70             : #define SPGIST_MAX_PREFIX_LENGTH    Max((int) (BLCKSZ - 258 * 16 - 100), 32)
      71             : 
      72             : /*
      73             :  * Strategy for collation aware operator on text is equal to btree strategy
      74             :  * plus value of 10.
      75             :  *
      76             :  * Current collation aware strategies and their corresponding btree strategies:
      77             :  * 11 BTLessStrategyNumber
      78             :  * 12 BTLessEqualStrategyNumber
      79             :  * 14 BTGreaterEqualStrategyNumber
      80             :  * 15 BTGreaterStrategyNumber
      81             :  */
      82             : #define SPG_STRATEGY_ADDITION   (10)
      83             : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
      84             :                                          && (s) != RTPrefixStrategyNumber)
      85             : 
      86             : /* Struct for sorting values in picksplit */
      87             : typedef struct spgNodePtr
      88             : {
      89             :     Datum       d;
      90             :     int         i;
      91             :     int16       c;
      92             : } spgNodePtr;
      93             : 
      94             : 
      95             : Datum
      96          84 : spg_text_config(PG_FUNCTION_ARGS)
      97             : {
      98             : #ifdef NOT_USED
      99             :     spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0);
     100             : #endif
     101          84 :     spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
     102             : 
     103          84 :     cfg->prefixType = TEXTOID;
     104          84 :     cfg->labelType = INT2OID;
     105          84 :     cfg->canReturnData = true;
     106          84 :     cfg->longValuesOK = true;    /* suffixing will shorten long values */
     107          84 :     PG_RETURN_VOID();
     108             : }
     109             : 
     110             : /*
     111             :  * Form a text datum from the given not-necessarily-null-terminated string,
     112             :  * using short varlena header format if possible
     113             :  */
     114             : static Datum
     115      258178 : formTextDatum(const char *data, int datalen)
     116             : {
     117             :     char       *p;
     118             : 
     119      258178 :     p = (char *) palloc(datalen + VARHDRSZ);
     120             : 
     121      258178 :     if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
     122             :     {
     123      258178 :         SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
     124      258178 :         if (datalen)
     125      243254 :             memcpy(p + VARHDRSZ_SHORT, data, datalen);
     126             :     }
     127             :     else
     128             :     {
     129           0 :         SET_VARSIZE(p, datalen + VARHDRSZ);
     130           0 :         memcpy(p + VARHDRSZ, data, datalen);
     131             :     }
     132             : 
     133      258178 :     return PointerGetDatum(p);
     134             : }
     135             : 
     136             : /*
     137             :  * Find the length of the common prefix of a and b
     138             :  */
     139             : static int
     140       95760 : commonPrefix(const char *a, const char *b, int lena, int lenb)
     141             : {
     142       95760 :     int         i = 0;
     143             : 
     144     6642798 :     while (i < lena && i < lenb && *a == *b)
     145             :     {
     146     6547038 :         a++;
     147     6547038 :         b++;
     148     6547038 :         i++;
     149             :     }
     150             : 
     151       95760 :     return i;
     152             : }
     153             : 
     154             : /*
     155             :  * Binary search an array of int16 datums for a match to c
     156             :  *
     157             :  * On success, *i gets the match location; on failure, it gets where to insert
     158             :  */
     159             : static bool
     160      208690 : searchChar(const Datum *nodeLabels, int nNodes, int16 c, int *i)
     161             : {
     162      208690 :     int         StopLow = 0,
     163      208690 :                 StopHigh = nNodes;
     164             : 
     165      573036 :     while (StopLow < StopHigh)
     166             :     {
     167      571670 :         int         StopMiddle = (StopLow + StopHigh) >> 1;
     168      571670 :         int16       middle = DatumGetInt16(nodeLabels[StopMiddle]);
     169             : 
     170      571670 :         if (c < middle)
     171      180922 :             StopHigh = StopMiddle;
     172      390748 :         else if (c > middle)
     173      183424 :             StopLow = StopMiddle + 1;
     174             :         else
     175             :         {
     176      207324 :             *i = StopMiddle;
     177      207324 :             return true;
     178             :         }
     179             :     }
     180             : 
     181        1366 :     *i = StopHigh;
     182        1366 :     return false;
     183             : }
     184             : 
     185             : Datum
     186      209318 : spg_text_choose(PG_FUNCTION_ARGS)
     187             : {
     188      209318 :     spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
     189      209318 :     spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
     190      209318 :     text       *inText = DatumGetTextPP(in->datum);
     191      209318 :     char       *inStr = VARDATA_ANY(inText);
     192      209318 :     int         inSize = VARSIZE_ANY_EXHDR(inText);
     193      209318 :     char       *prefixStr = NULL;
     194      209318 :     int         prefixSize = 0;
     195      209318 :     int         commonLen = 0;
     196      209318 :     int16       nodeChar = 0;
     197      209318 :     int         i = 0;
     198             : 
     199             :     /* Check for prefix match, set nodeChar to first byte after prefix */
     200      209318 :     if (in->hasPrefix)
     201             :     {
     202       82828 :         text       *prefixText = DatumGetTextPP(in->prefixDatum);
     203             : 
     204       82828 :         prefixStr = VARDATA_ANY(prefixText);
     205       82828 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
     206             : 
     207       82828 :         commonLen = commonPrefix(inStr + in->level,
     208             :                                  prefixStr,
     209       82828 :                                  inSize - in->level,
     210             :                                  prefixSize);
     211             : 
     212       82828 :         if (commonLen == prefixSize)
     213             :         {
     214       82200 :             if (inSize - in->level > commonLen)
     215       76044 :                 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
     216             :             else
     217        6156 :                 nodeChar = -1;
     218             :         }
     219             :         else
     220             :         {
     221             :             /* Must split tuple because incoming value doesn't match prefix */
     222         628 :             out->resultType = spgSplitTuple;
     223             : 
     224         628 :             if (commonLen == 0)
     225             :             {
     226          22 :                 out->result.splitTuple.prefixHasPrefix = false;
     227             :             }
     228             :             else
     229             :             {
     230         606 :                 out->result.splitTuple.prefixHasPrefix = true;
     231         606 :                 out->result.splitTuple.prefixPrefixDatum =
     232         606 :                     formTextDatum(prefixStr, commonLen);
     233             :             }
     234         628 :             out->result.splitTuple.prefixNNodes = 1;
     235         628 :             out->result.splitTuple.prefixNodeLabels = palloc_object(Datum);
     236        1256 :             out->result.splitTuple.prefixNodeLabels[0] =
     237         628 :                 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
     238             : 
     239         628 :             out->result.splitTuple.childNodeN = 0;
     240             : 
     241         628 :             if (prefixSize - commonLen == 1)
     242             :             {
     243         616 :                 out->result.splitTuple.postfixHasPrefix = false;
     244             :             }
     245             :             else
     246             :             {
     247          12 :                 out->result.splitTuple.postfixHasPrefix = true;
     248          12 :                 out->result.splitTuple.postfixPrefixDatum =
     249          12 :                     formTextDatum(prefixStr + commonLen + 1,
     250          12 :                                   prefixSize - commonLen - 1);
     251             :             }
     252             : 
     253         628 :             PG_RETURN_VOID();
     254             :         }
     255             :     }
     256      126490 :     else if (inSize > in->level)
     257             :     {
     258      125526 :         nodeChar = *(unsigned char *) (inStr + in->level);
     259             :     }
     260             :     else
     261             :     {
     262         964 :         nodeChar = -1;
     263             :     }
     264             : 
     265             :     /* Look up nodeChar in the node label array */
     266      208690 :     if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
     267             :     {
     268             :         /*
     269             :          * Descend to existing node.  (If in->allTheSame, the core code will
     270             :          * ignore our nodeN specification here, but that's OK.  We still have
     271             :          * to provide the correct levelAdd and restDatum values, and those are
     272             :          * the same regardless of which node gets chosen by core.)
     273             :          */
     274             :         int         levelAdd;
     275             : 
     276      207324 :         out->resultType = spgMatchNode;
     277      207324 :         out->result.matchNode.nodeN = i;
     278      207324 :         levelAdd = commonLen;
     279      207324 :         if (nodeChar >= 0)
     280      200210 :             levelAdd++;
     281      207324 :         out->result.matchNode.levelAdd = levelAdd;
     282      207324 :         if (inSize - in->level - levelAdd > 0)
     283      200204 :             out->result.matchNode.restDatum =
     284      200204 :                 formTextDatum(inStr + in->level + levelAdd,
     285      200204 :                               inSize - in->level - levelAdd);
     286             :         else
     287        7120 :             out->result.matchNode.restDatum =
     288        7120 :                 formTextDatum(NULL, 0);
     289             :     }
     290        1366 :     else if (in->allTheSame)
     291             :     {
     292             :         /*
     293             :          * Can't use AddNode action, so split the tuple.  The upper tuple has
     294             :          * the same prefix as before and uses a dummy node label -2 for the
     295             :          * lower tuple.  The lower tuple has no prefix and the same node
     296             :          * labels as the original tuple.
     297             :          *
     298             :          * Note: it might seem tempting to shorten the upper tuple's prefix,
     299             :          * if it has one, then use its last byte as label for the lower tuple.
     300             :          * But that doesn't win since we know the incoming value matches the
     301             :          * whole prefix: we'd just end up splitting the lower tuple again.
     302             :          */
     303           6 :         out->resultType = spgSplitTuple;
     304           6 :         out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
     305           6 :         out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
     306           6 :         out->result.splitTuple.prefixNNodes = 1;
     307           6 :         out->result.splitTuple.prefixNodeLabels = palloc_object(Datum);
     308           6 :         out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
     309           6 :         out->result.splitTuple.childNodeN = 0;
     310           6 :         out->result.splitTuple.postfixHasPrefix = false;
     311             :     }
     312             :     else
     313             :     {
     314             :         /* Add a node for the not-previously-seen nodeChar value */
     315        1360 :         out->resultType = spgAddNode;
     316        1360 :         out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
     317        1360 :         out->result.addNode.nodeN = i;
     318             :     }
     319             : 
     320      208690 :     PG_RETURN_VOID();
     321             : }
     322             : 
     323             : /* qsort comparator to sort spgNodePtr structs by "c" */
     324             : static int
     325      115276 : cmpNodePtr(const void *a, const void *b)
     326             : {
     327      115276 :     const spgNodePtr *aa = (const spgNodePtr *) a;
     328      115276 :     const spgNodePtr *bb = (const spgNodePtr *) b;
     329             : 
     330      115276 :     return pg_cmp_s16(aa->c, bb->c);
     331             : }
     332             : 
     333             : Datum
     334         518 : spg_text_picksplit(PG_FUNCTION_ARGS)
     335             : {
     336         518 :     spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
     337         518 :     spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
     338         518 :     text       *text0 = DatumGetTextPP(in->datums[0]);
     339             :     int         i,
     340             :                 commonLen;
     341             :     spgNodePtr *nodes;
     342             : 
     343             :     /* Identify longest common prefix, if any */
     344         518 :     commonLen = VARSIZE_ANY_EXHDR(text0);
     345       13450 :     for (i = 1; i < in->nTuples && commonLen > 0; i++)
     346             :     {
     347       12932 :         text       *texti = DatumGetTextPP(in->datums[i]);
     348       12932 :         int         tmp = commonPrefix(VARDATA_ANY(text0),
     349       12932 :                                        VARDATA_ANY(texti),
     350       12932 :                                        VARSIZE_ANY_EXHDR(text0),
     351       12932 :                                        VARSIZE_ANY_EXHDR(texti));
     352             : 
     353       12932 :         if (tmp < commonLen)
     354         434 :             commonLen = tmp;
     355             :     }
     356             : 
     357             :     /*
     358             :      * Limit the prefix length, if necessary, to ensure that the resulting
     359             :      * inner tuple will fit on a page.
     360             :      */
     361         518 :     commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
     362             : 
     363             :     /* Set node prefix to be that string, if it's not empty */
     364         518 :     if (commonLen == 0)
     365             :     {
     366         426 :         out->hasPrefix = false;
     367             :     }
     368             :     else
     369             :     {
     370          92 :         out->hasPrefix = true;
     371          92 :         out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
     372             :     }
     373             : 
     374             :     /* Extract the node label (first non-common byte) from each value */
     375         518 :     nodes = palloc_array(spgNodePtr, in->nTuples);
     376             : 
     377       50662 :     for (i = 0; i < in->nTuples; i++)
     378             :     {
     379       50144 :         text       *texti = DatumGetTextPP(in->datums[i]);
     380             : 
     381       50144 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
     382       44082 :             nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
     383             :         else
     384        6062 :             nodes[i].c = -1;    /* use -1 if string is all common */
     385       50144 :         nodes[i].i = i;
     386       50144 :         nodes[i].d = in->datums[i];
     387             :     }
     388             : 
     389             :     /*
     390             :      * Sort by label values so that we can group the values into nodes.  This
     391             :      * also ensures that the nodes are ordered by label value, allowing the
     392             :      * use of binary search in searchChar.
     393             :      */
     394         518 :     qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
     395             : 
     396             :     /* And emit results */
     397         518 :     out->nNodes = 0;
     398         518 :     out->nodeLabels = palloc_array(Datum, in->nTuples);
     399         518 :     out->mapTuplesToNodes = palloc_array(int, in->nTuples);
     400         518 :     out->leafTupleDatums = palloc_array(Datum, in->nTuples);
     401             : 
     402       50662 :     for (i = 0; i < in->nTuples; i++)
     403             :     {
     404       50144 :         text       *texti = DatumGetTextPP(nodes[i].d);
     405             :         Datum       leafD;
     406             : 
     407       50144 :         if (i == 0 || nodes[i].c != nodes[i - 1].c)
     408             :         {
     409        3226 :             out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
     410        3226 :             out->nNodes++;
     411             :         }
     412             : 
     413       50144 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
     414       44082 :             leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
     415       44082 :                                   VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
     416             :         else
     417        6062 :             leafD = formTextDatum(NULL, 0);
     418             : 
     419       50144 :         out->leafTupleDatums[nodes[i].i] = leafD;
     420       50144 :         out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
     421             :     }
     422             : 
     423         518 :     PG_RETURN_VOID();
     424             : }
     425             : 
     426             : Datum
     427        1692 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
     428             : {
     429        1692 :     spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
     430        1692 :     spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
     431        1692 :     bool        collate_is_c = pg_newlocale_from_collation(PG_GET_COLLATION())->collate_is_c;
     432             :     text       *reconstructedValue;
     433             :     text       *reconstrText;
     434             :     int         maxReconstrLen;
     435        1692 :     text       *prefixText = NULL;
     436        1692 :     int         prefixSize = 0;
     437             :     int         i;
     438             : 
     439             :     /*
     440             :      * Reconstruct values represented at this tuple, including parent data,
     441             :      * prefix of this tuple if any, and the node label if it's non-dummy.
     442             :      * in->level should be the length of the previously reconstructed value,
     443             :      * and the number of bytes added here is prefixSize or prefixSize + 1.
     444             :      *
     445             :      * Note: we assume that in->reconstructedValue isn't toasted and doesn't
     446             :      * have a short varlena header.  This is okay because it must have been
     447             :      * created by a previous invocation of this routine, and we always emit
     448             :      * long-format reconstructed values.
     449             :      */
     450        1692 :     reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
     451             :     Assert(reconstructedValue == NULL ? in->level == 0 :
     452             :            VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
     453             : 
     454        1692 :     maxReconstrLen = in->level + 1;
     455        1692 :     if (in->hasPrefix)
     456             :     {
     457         324 :         prefixText = DatumGetTextPP(in->prefixDatum);
     458         324 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
     459         324 :         maxReconstrLen += prefixSize;
     460             :     }
     461             : 
     462        1692 :     reconstrText = palloc(VARHDRSZ + maxReconstrLen);
     463        1692 :     SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
     464             : 
     465        1692 :     if (in->level)
     466        1512 :         memcpy(VARDATA(reconstrText),
     467        1512 :                VARDATA(reconstructedValue),
     468        1512 :                in->level);
     469        1692 :     if (prefixSize)
     470         324 :         memcpy(((char *) VARDATA(reconstrText)) + in->level,
     471         324 :                VARDATA_ANY(prefixText),
     472             :                prefixSize);
     473             :     /* last byte of reconstrText will be filled in below */
     474             : 
     475             :     /*
     476             :      * Scan the child nodes.  For each one, complete the reconstructed value
     477             :      * and see if it's consistent with the query.  If so, emit an entry into
     478             :      * the output arrays.
     479             :      */
     480        1692 :     out->nodeNumbers = palloc_array(int, in->nNodes);
     481        1692 :     out->levelAdds = palloc_array(int, in->nNodes);
     482        1692 :     out->reconstructedValues = palloc_array(Datum, in->nNodes);
     483        1692 :     out->nNodes = 0;
     484             : 
     485       17944 :     for (i = 0; i < in->nNodes; i++)
     486             :     {
     487       16252 :         int16       nodeChar = DatumGetInt16(in->nodeLabels[i]);
     488             :         int         thisLen;
     489       16252 :         bool        res = true;
     490             :         int         j;
     491             : 
     492             :         /* If nodeChar is a dummy value, don't include it in data */
     493       16252 :         if (nodeChar <= 0)
     494        3004 :             thisLen = maxReconstrLen - 1;
     495             :         else
     496             :         {
     497       13248 :             ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
     498       13248 :             thisLen = maxReconstrLen;
     499             :         }
     500             : 
     501       28004 :         for (j = 0; j < in->nkeys; j++)
     502             :         {
     503       16252 :             StrategyNumber strategy = in->scankeys[j].sk_strategy;
     504             :             text       *inText;
     505             :             int         inSize;
     506             :             int         r;
     507             : 
     508             :             /*
     509             :              * If it's a collation-aware operator, but the collation is C, we
     510             :              * can treat it as non-collation-aware.  With non-C collation we
     511             :              * need to traverse whole tree :-( so there's no point in making
     512             :              * any check here.  (Note also that our reconstructed value may
     513             :              * well end with a partial multibyte character, so that applying
     514             :              * any encoding-sensitive test to it would be risky anyhow.)
     515             :              */
     516       16252 :             if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
     517             :             {
     518       10160 :                 if (collate_is_c)
     519         624 :                     strategy -= SPG_STRATEGY_ADDITION;
     520             :                 else
     521        9536 :                     continue;
     522             :             }
     523             : 
     524        6716 :             inText = DatumGetTextPP(in->scankeys[j].sk_argument);
     525        6716 :             inSize = VARSIZE_ANY_EXHDR(inText);
     526             : 
     527        6716 :             r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
     528        6716 :                        Min(inSize, thisLen));
     529             : 
     530        6716 :             switch (strategy)
     531             :             {
     532        1408 :                 case BTLessStrategyNumber:
     533             :                 case BTLessEqualStrategyNumber:
     534        1408 :                     if (r > 0)
     535         800 :                         res = false;
     536        1408 :                     break;
     537        3404 :                 case BTEqualStrategyNumber:
     538        3404 :                     if (r != 0 || inSize < thisLen)
     539        2100 :                         res = false;
     540        3404 :                     break;
     541        1088 :                 case BTGreaterEqualStrategyNumber:
     542             :                 case BTGreaterStrategyNumber:
     543        1088 :                     if (r < 0)
     544         832 :                         res = false;
     545        1088 :                     break;
     546         816 :                 case RTPrefixStrategyNumber:
     547         816 :                     if (r != 0)
     548         768 :                         res = false;
     549         816 :                     break;
     550           0 :                 default:
     551           0 :                     elog(ERROR, "unrecognized strategy number: %d",
     552             :                          in->scankeys[j].sk_strategy);
     553             :                     break;
     554             :             }
     555             : 
     556        6716 :             if (!res)
     557        4500 :                 break;          /* no need to consider remaining conditions */
     558             :         }
     559             : 
     560       16252 :         if (res)
     561             :         {
     562       11752 :             out->nodeNumbers[out->nNodes] = i;
     563       11752 :             out->levelAdds[out->nNodes] = thisLen - in->level;
     564       11752 :             SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
     565       23504 :             out->reconstructedValues[out->nNodes] =
     566       11752 :                 datumCopy(PointerGetDatum(reconstrText), false, -1);
     567       11752 :             out->nNodes++;
     568             :         }
     569             :     }
     570             : 
     571        1692 :     PG_RETURN_VOID();
     572             : }
     573             : 
     574             : Datum
     575      235500 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
     576             : {
     577      235500 :     spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
     578      235500 :     spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
     579      235500 :     int         level = in->level;
     580             :     text       *leafValue,
     581      235500 :                *reconstrValue = NULL;
     582             :     char       *fullValue;
     583             :     int         fullLen;
     584             :     bool        res;
     585             :     int         j;
     586             : 
     587             :     /* all tests are exact */
     588      235500 :     out->recheck = false;
     589             : 
     590      235500 :     leafValue = DatumGetTextPP(in->leafDatum);
     591             : 
     592             :     /* As above, in->reconstructedValue isn't toasted or short. */
     593      235500 :     if (DatumGetPointer(in->reconstructedValue))
     594      235476 :         reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
     595             : 
     596             :     Assert(reconstrValue == NULL ? level == 0 :
     597             :            VARSIZE_ANY_EXHDR(reconstrValue) == level);
     598             : 
     599             :     /* Reconstruct the full string represented by this leaf tuple */
     600      235500 :     fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
     601      235500 :     if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
     602             :     {
     603       74352 :         fullValue = VARDATA(reconstrValue);
     604       74352 :         out->leafValue = PointerGetDatum(reconstrValue);
     605             :     }
     606             :     else
     607             :     {
     608      161148 :         text       *fullText = palloc(VARHDRSZ + fullLen);
     609             : 
     610      161148 :         SET_VARSIZE(fullText, VARHDRSZ + fullLen);
     611      161148 :         fullValue = VARDATA(fullText);
     612      161148 :         if (level)
     613      161124 :             memcpy(fullValue, VARDATA(reconstrValue), level);
     614      161148 :         if (VARSIZE_ANY_EXHDR(leafValue) > 0)
     615      161148 :             memcpy(fullValue + level, VARDATA_ANY(leafValue),
     616             :                    VARSIZE_ANY_EXHDR(leafValue));
     617      161148 :         out->leafValue = PointerGetDatum(fullText);
     618             :     }
     619             : 
     620             :     /* Perform the required comparison(s) */
     621      235500 :     res = true;
     622      263046 :     for (j = 0; j < in->nkeys; j++)
     623             :     {
     624      235500 :         StrategyNumber strategy = in->scankeys[j].sk_strategy;
     625      235500 :         text       *query = DatumGetTextPP(in->scankeys[j].sk_argument);
     626      235500 :         int         queryLen = VARSIZE_ANY_EXHDR(query);
     627             :         int         r;
     628             : 
     629      235500 :         if (strategy == RTPrefixStrategyNumber)
     630             :         {
     631             :             /*
     632             :              * if level >= length of query then reconstrValue must begin with
     633             :              * query (prefix) string, so we don't need to check it again.
     634             :              */
     635         768 :             res = (level >= queryLen) ||
     636         384 :                 DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
     637             :                                                      PG_GET_COLLATION(),
     638             :                                                      out->leafValue,
     639             :                                                      PointerGetDatum(query)));
     640             : 
     641         384 :             if (!res)           /* no need to consider remaining conditions */
     642         336 :                 break;
     643             : 
     644          48 :             continue;
     645             :         }
     646             : 
     647      235116 :         if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
     648             :         {
     649             :             /* Collation-aware comparison */
     650      202728 :             strategy -= SPG_STRATEGY_ADDITION;
     651             : 
     652             :             /* If asserts enabled, verify encoding of reconstructed string */
     653             :             Assert(pg_verifymbstr(fullValue, fullLen, false));
     654             : 
     655      202728 :             r = varstr_cmp(fullValue, fullLen,
     656      202728 :                            VARDATA_ANY(query), queryLen,
     657             :                            PG_GET_COLLATION());
     658             :         }
     659             :         else
     660             :         {
     661             :             /* Non-collation-aware comparison */
     662       32388 :             r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
     663             : 
     664       32388 :             if (r == 0)
     665             :             {
     666       24162 :                 if (queryLen > fullLen)
     667       12024 :                     r = -1;
     668       12138 :                 else if (queryLen < fullLen)
     669           0 :                     r = 1;
     670             :             }
     671             :         }
     672             : 
     673      235116 :         switch (strategy)
     674             :         {
     675       54376 :             case BTLessStrategyNumber:
     676       54376 :                 res = (r < 0);
     677       54376 :                 break;
     678       54376 :             case BTLessEqualStrategyNumber:
     679       54376 :                 res = (r <= 0);
     680       54376 :                 break;
     681       24300 :             case BTEqualStrategyNumber:
     682       24300 :                 res = (r == 0);
     683       24300 :                 break;
     684       51032 :             case BTGreaterEqualStrategyNumber:
     685       51032 :                 res = (r >= 0);
     686       51032 :                 break;
     687       51032 :             case BTGreaterStrategyNumber:
     688       51032 :                 res = (r > 0);
     689       51032 :                 break;
     690           0 :             default:
     691           0 :                 elog(ERROR, "unrecognized strategy number: %d",
     692             :                      in->scankeys[j].sk_strategy);
     693             :                 res = false;
     694             :                 break;
     695             :         }
     696             : 
     697      235116 :         if (!res)
     698      207618 :             break;              /* no need to consider remaining conditions */
     699             :     }
     700             : 
     701      235500 :     PG_RETURN_BOOL(res);
     702             : }

Generated by: LCOV version 1.16