LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsquery_cleanup.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 76.7 % 159 122
Test Date: 2026-04-07 14:16:30 Functions: 77.8 % 9 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * tsquery_cleanup.c
       4              :  *   Cleanup query from NOT values and/or stopword
       5              :  *   Utility functions to correct work.
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/utils/adt/tsquery_cleanup.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "miscadmin.h"
      19              : #include "tsearch/ts_utils.h"
      20              : #include "varatt.h"
      21              : 
      22              : typedef struct NODE
      23              : {
      24              :     struct NODE *left;
      25              :     struct NODE *right;
      26              :     QueryItem  *valnode;
      27              : } NODE;
      28              : 
      29              : /*
      30              :  * make query tree from plain view of query
      31              :  */
      32              : static NODE *
      33         2425 : maketree(QueryItem *in)
      34              : {
      35         2425 :     NODE       *node = palloc_object(NODE);
      36              : 
      37              :     /* since this function recurses, it could be driven to stack overflow. */
      38         2425 :     check_stack_depth();
      39              : 
      40         2425 :     node->valnode = in;
      41         2425 :     node->right = node->left = NULL;
      42         2425 :     if (in->type == QI_OPR)
      43              :     {
      44         1070 :         node->right = maketree(in + 1);
      45         1070 :         if (in->qoperator.oper != OP_NOT)
      46         1000 :             node->left = maketree(in + in->qoperator.left);
      47              :     }
      48         2425 :     return node;
      49              : }
      50              : 
      51              : /*
      52              :  * Internal state for plaintree and plainnode
      53              :  */
      54              : typedef struct
      55              : {
      56              :     QueryItem  *ptr;
      57              :     int         len;            /* allocated size of ptr */
      58              :     int         cur;            /* number of elements in ptr */
      59              : } PLAINTREE;
      60              : 
      61              : static void
      62         1328 : plainnode(PLAINTREE *state, NODE *node)
      63              : {
      64              :     /* since this function recurses, it could be driven to stack overflow. */
      65         1328 :     check_stack_depth();
      66              : 
      67         1328 :     if (state->cur == state->len)
      68              :     {
      69            0 :         state->len *= 2;
      70            0 :         state->ptr = (QueryItem *) repalloc(state->ptr, state->len * sizeof(QueryItem));
      71              :     }
      72         1328 :     memcpy(&(state->ptr[state->cur]), node->valnode, sizeof(QueryItem));
      73         1328 :     if (node->valnode->type == QI_VAL)
      74          805 :         state->cur++;
      75          523 :     else if (node->valnode->qoperator.oper == OP_NOT)
      76              :     {
      77           55 :         state->ptr[state->cur].qoperator.left = 1;
      78           55 :         state->cur++;
      79           55 :         plainnode(state, node->right);
      80              :     }
      81              :     else
      82              :     {
      83          468 :         int         cur = state->cur;
      84              : 
      85          468 :         state->cur++;
      86          468 :         plainnode(state, node->right);
      87          468 :         state->ptr[cur].qoperator.left = state->cur - cur;
      88          468 :         plainnode(state, node->left);
      89              :     }
      90         1328 :     pfree(node);
      91         1328 : }
      92              : 
      93              : /*
      94              :  * make plain view of tree from a NODE-tree representation
      95              :  */
      96              : static QueryItem *
      97          337 : plaintree(NODE *root, int *len)
      98              : {
      99              :     PLAINTREE   pl;
     100              : 
     101          337 :     pl.cur = 0;
     102          337 :     pl.len = 16;
     103          337 :     if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
     104              :     {
     105          337 :         pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
     106          337 :         plainnode(&pl, root);
     107              :     }
     108              :     else
     109            0 :         pl.ptr = NULL;
     110          337 :     *len = pl.cur;
     111          337 :     return pl.ptr;
     112              : }
     113              : 
     114              : static void
     115           35 : freetree(NODE *node)
     116              : {
     117              :     /* since this function recurses, it could be driven to stack overflow. */
     118           35 :     check_stack_depth();
     119              : 
     120           35 :     if (!node)
     121            0 :         return;
     122           35 :     if (node->left)
     123            0 :         freetree(node->left);
     124           35 :     if (node->right)
     125            0 :         freetree(node->right);
     126           35 :     pfree(node);
     127              : }
     128              : 
     129              : /*
     130              :  * clean tree for ! operator.
     131              :  * It's useful for debug, but in
     132              :  * other case, such view is used with search in index.
     133              :  * Operator ! always return TRUE
     134              :  */
     135              : static NODE *
     136            0 : clean_NOT_intree(NODE *node)
     137              : {
     138              :     /* since this function recurses, it could be driven to stack overflow. */
     139            0 :     check_stack_depth();
     140              : 
     141            0 :     if (node->valnode->type == QI_VAL)
     142            0 :         return node;
     143              : 
     144            0 :     if (node->valnode->qoperator.oper == OP_NOT)
     145              :     {
     146            0 :         freetree(node);
     147            0 :         return NULL;
     148              :     }
     149              : 
     150              :     /* operator & or | */
     151            0 :     if (node->valnode->qoperator.oper == OP_OR)
     152              :     {
     153            0 :         if ((node->left = clean_NOT_intree(node->left)) == NULL ||
     154            0 :             (node->right = clean_NOT_intree(node->right)) == NULL)
     155              :         {
     156            0 :             freetree(node);
     157            0 :             return NULL;
     158              :         }
     159              :     }
     160              :     else
     161              :     {
     162            0 :         NODE       *res = node;
     163              : 
     164              :         Assert(node->valnode->qoperator.oper == OP_AND ||
     165              :                node->valnode->qoperator.oper == OP_PHRASE);
     166              : 
     167            0 :         node->left = clean_NOT_intree(node->left);
     168            0 :         node->right = clean_NOT_intree(node->right);
     169            0 :         if (node->left == NULL && node->right == NULL)
     170              :         {
     171            0 :             pfree(node);
     172            0 :             res = NULL;
     173              :         }
     174            0 :         else if (node->left == NULL)
     175              :         {
     176            0 :             res = node->right;
     177            0 :             pfree(node);
     178              :         }
     179            0 :         else if (node->right == NULL)
     180              :         {
     181            0 :             res = node->left;
     182            0 :             pfree(node);
     183              :         }
     184            0 :         return res;
     185              :     }
     186            0 :     return node;
     187              : }
     188              : 
     189              : QueryItem *
     190            0 : clean_NOT(QueryItem *ptr, int *len)
     191              : {
     192            0 :     NODE       *root = maketree(ptr);
     193              : 
     194            0 :     return plaintree(clean_NOT_intree(root), len);
     195              : }
     196              : 
     197              : 
     198              : /*
     199              :  * Remove QI_VALSTOP (stopword) nodes from query tree.
     200              :  *
     201              :  * Returns NULL if the query degenerates to nothing.  Input must not be NULL.
     202              :  *
     203              :  * When we remove a phrase operator due to removing one or both of its
     204              :  * arguments, we might need to adjust the distance of a parent phrase
     205              :  * operator.  For example, 'a' is a stopword, so:
     206              :  *      (b <-> a) <-> c  should become  b <2> c
     207              :  *      b <-> (a <-> c)  should become  b <2> c
     208              :  *      (b <-> (a <-> a)) <-> c  should become    b <3> c
     209              :  *      b <-> ((a <-> a) <-> c)  should become    b <3> c
     210              :  * To handle that, we define two output parameters:
     211              :  *      ladd: amount to add to a phrase distance to the left of this node
     212              :  *      radd: amount to add to a phrase distance to the right of this node
     213              :  * We need two outputs because we could need to bubble up adjustments to two
     214              :  * different parent phrase operators.  Consider
     215              :  *      w <-> (((a <-> x) <2> (y <3> a)) <-> z)
     216              :  * After we've removed the two a's and are considering the <2> node (which is
     217              :  * now just x <2> y), we have an ladd distance of 1 that needs to propagate
     218              :  * up to the topmost (leftmost) <->, and an radd distance of 3 that needs to
     219              :  * propagate to the rightmost <->, so that we'll end up with
     220              :  *      w <2> ((x <2> y) <4> z)
     221              :  * Near the bottom of the tree, we may have subtrees consisting only of
     222              :  * stopwords.  The distances of any phrase operators within such a subtree are
     223              :  * summed and propagated to both ladd and radd, since we don't know which side
     224              :  * of the lowest surviving phrase operator we are in.  The rule is that any
     225              :  * subtree that degenerates to NULL must return equal values of ladd and radd,
     226              :  * and the parent node dealing with it should incorporate only one of those.
     227              :  *
     228              :  * Currently, we only implement this adjustment for adjacent phrase operators.
     229              :  * Thus for example 'x <-> ((a <-> y) | z)' will become 'x <-> (y | z)', which
     230              :  * isn't ideal, but there is no way to represent the really desired semantics
     231              :  * without some redesign of the tsquery structure.  Certainly it would not be
     232              :  * any better to convert that to 'x <2> (y | z)'.  Since this is such a weird
     233              :  * corner case, let it go for now.  But we can fix it in cases where the
     234              :  * intervening non-phrase operator also gets removed, for example
     235              :  * '((x <-> a) | a) <-> y' will become 'x <2> y'.
     236              :  */
     237              : static NODE *
     238         2425 : clean_stopword_intree(NODE *node, int *ladd, int *radd)
     239              : {
     240              :     /* since this function recurses, it could be driven to stack overflow. */
     241         2425 :     check_stack_depth();
     242              : 
     243              :     /* default output parameters indicate no change in parent distance */
     244         2425 :     *ladd = *radd = 0;
     245              : 
     246         2425 :     if (node->valnode->type == QI_VAL)
     247          805 :         return node;
     248         1620 :     else if (node->valnode->type == QI_VALSTOP)
     249              :     {
     250          550 :         pfree(node);
     251          550 :         return NULL;
     252              :     }
     253              : 
     254              :     Assert(node->valnode->type == QI_OPR);
     255              : 
     256         1070 :     if (node->valnode->qoperator.oper == OP_NOT)
     257              :     {
     258              :         /* NOT doesn't change pattern width, so just report child distances */
     259           70 :         node->right = clean_stopword_intree(node->right, ladd, radd);
     260           70 :         if (!node->right)
     261              :         {
     262           15 :             freetree(node);
     263           15 :             return NULL;
     264              :         }
     265              :     }
     266              :     else
     267              :     {
     268         1000 :         NODE       *res = node;
     269              :         bool        isphrase;
     270              :         int         ndistance,
     271              :                     lladd,
     272              :                     lradd,
     273              :                     rladd,
     274              :                     rradd;
     275              : 
     276              :         /* First, recurse */
     277         1000 :         node->left = clean_stopword_intree(node->left, &lladd, &lradd);
     278         1000 :         node->right = clean_stopword_intree(node->right, &rladd, &rradd);
     279              : 
     280              :         /* Check if current node is OP_PHRASE, get its distance */
     281         1000 :         isphrase = (node->valnode->qoperator.oper == OP_PHRASE);
     282         1000 :         ndistance = isphrase ? node->valnode->qoperator.distance : 0;
     283              : 
     284         1000 :         if (node->left == NULL && node->right == NULL)
     285              :         {
     286              :             /*
     287              :              * When we collapse out a phrase node entirely, propagate its own
     288              :              * distance into both *ladd and *radd; it is the responsibility of
     289              :              * the parent node to count it only once.  Also, for a phrase
     290              :              * node, distances coming from children are summed and propagated
     291              :              * up to parent (we assume lladd == lradd and rladd == rradd, else
     292              :              * rule was broken at a lower level).  But if this isn't a phrase
     293              :              * node, take the larger of the two child distances; that
     294              :              * corresponds to what TS_execute will do in non-stopword cases.
     295              :              */
     296           20 :             if (isphrase)
     297            0 :                 *ladd = *radd = lladd + ndistance + rladd;
     298              :             else
     299           20 :                 *ladd = *radd = Max(lladd, rladd);
     300           20 :             freetree(node);
     301           20 :             return NULL;
     302              :         }
     303          980 :         else if (node->left == NULL)
     304              :         {
     305              :             /* Removing this operator and left subnode */
     306              :             /* lladd and lradd are equal/redundant, don't count both */
     307          209 :             if (isphrase)
     308              :             {
     309              :                 /* operator's own distance must propagate to left */
     310          135 :                 *ladd = lladd + ndistance + rladd;
     311          135 :                 *radd = rradd;
     312              :             }
     313              :             else
     314              :             {
     315              :                 /* at non-phrase op, just forget the left subnode entirely */
     316           74 :                 *ladd = rladd;
     317           74 :                 *radd = rradd;
     318              :             }
     319          209 :             res = node->right;
     320          209 :             pfree(node);
     321              :         }
     322          771 :         else if (node->right == NULL)
     323              :         {
     324              :             /* Removing this operator and right subnode */
     325              :             /* rladd and rradd are equal/redundant, don't count both */
     326          303 :             if (isphrase)
     327              :             {
     328              :                 /* operator's own distance must propagate to right */
     329          180 :                 *ladd = lladd;
     330          180 :                 *radd = lradd + ndistance + rradd;
     331              :             }
     332              :             else
     333              :             {
     334              :                 /* at non-phrase op, just forget the right subnode entirely */
     335          123 :                 *ladd = lladd;
     336          123 :                 *radd = lradd;
     337              :             }
     338          303 :             res = node->left;
     339          303 :             pfree(node);
     340              :         }
     341          468 :         else if (isphrase)
     342              :         {
     343              :             /* Absorb appropriate corrections at this level */
     344          285 :             node->valnode->qoperator.distance += lradd + rladd;
     345              :             /* Propagate up any unaccounted-for corrections */
     346          285 :             *ladd = lladd;
     347          285 :             *radd = rradd;
     348              :         }
     349              :         else
     350              :         {
     351              :             /* We're keeping a non-phrase operator, so ladd/radd remain 0 */
     352              :         }
     353              : 
     354          980 :         return res;
     355              :     }
     356           55 :     return node;
     357              : }
     358              : 
     359              : /*
     360              :  * Number of elements in query tree
     361              :  */
     362              : static int32
     363         1328 : calcstrlen(NODE *node)
     364              : {
     365         1328 :     int32       size = 0;
     366              : 
     367         1328 :     if (node->valnode->type == QI_VAL)
     368              :     {
     369          805 :         size = node->valnode->qoperand.length + 1;
     370              :     }
     371              :     else
     372              :     {
     373              :         Assert(node->valnode->type == QI_OPR);
     374              : 
     375          523 :         size = calcstrlen(node->right);
     376          523 :         if (node->valnode->qoperator.oper != OP_NOT)
     377          468 :             size += calcstrlen(node->left);
     378              :     }
     379              : 
     380         1328 :     return size;
     381              : }
     382              : 
     383              : /*
     384              :  * Remove QI_VALSTOP (stopword) nodes from TSQuery.
     385              :  */
     386              : TSQuery
     387          355 : cleanup_tsquery_stopwords(TSQuery in, bool noisy)
     388              : {
     389              :     int32       len,
     390              :                 lenstr,
     391              :                 commonlen,
     392              :                 i;
     393              :     NODE       *root;
     394              :     int         ladd,
     395              :                 radd;
     396              :     TSQuery     out;
     397              :     QueryItem  *items;
     398              :     char       *operands;
     399              : 
     400          355 :     if (in->size == 0)
     401            0 :         return in;
     402              : 
     403              :     /* eliminate stop words */
     404          355 :     root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
     405          355 :     if (root == NULL)
     406              :     {
     407           18 :         if (noisy)
     408           18 :             ereport(NOTICE,
     409              :                     (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
     410           18 :         out = palloc(HDRSIZETQ);
     411           18 :         out->size = 0;
     412           18 :         SET_VARSIZE(out, HDRSIZETQ);
     413           18 :         return out;
     414              :     }
     415              : 
     416              :     /*
     417              :      * Build TSQuery from plain view
     418              :      */
     419              : 
     420          337 :     lenstr = calcstrlen(root);
     421          337 :     items = plaintree(root, &len);
     422          337 :     commonlen = COMPUTESIZE(len, lenstr);
     423              : 
     424          337 :     out = palloc(commonlen);
     425          337 :     SET_VARSIZE(out, commonlen);
     426          337 :     out->size = len;
     427              : 
     428          337 :     memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
     429              : 
     430          337 :     items = GETQUERY(out);
     431          337 :     operands = GETOPERAND(out);
     432         1665 :     for (i = 0; i < out->size; i++)
     433              :     {
     434         1328 :         QueryOperand *op = (QueryOperand *) &items[i];
     435              : 
     436         1328 :         if (op->type != QI_VAL)
     437          523 :             continue;
     438              : 
     439          805 :         memcpy(operands, GETOPERAND(in) + op->distance, op->length);
     440          805 :         operands[op->length] = '\0';
     441          805 :         op->distance = operands - GETOPERAND(out);
     442          805 :         operands += op->length + 1;
     443              :     }
     444              : 
     445          337 :     return out;
     446              : }
        

Generated by: LCOV version 2.0-1