LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsquery_cleanup.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 76.7 % 159 122
Test Date: 2026-02-17 17:20:33 Functions: 77.8 % 9 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * tsquery_cleanup.c
       4              :  *   Cleanup query from NOT values and/or stopword
       5              :  *   Utility functions to correct work.
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/utils/adt/tsquery_cleanup.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "miscadmin.h"
      19              : #include "tsearch/ts_utils.h"
      20              : #include "varatt.h"
      21              : 
      22              : typedef struct NODE
      23              : {
      24              :     struct NODE *left;
      25              :     struct NODE *right;
      26              :     QueryItem  *valnode;
      27              : } NODE;
      28              : 
      29              : /*
      30              :  * make query tree from plain view of query
      31              :  */
      32              : static NODE *
      33         1464 : maketree(QueryItem *in)
      34              : {
      35         1464 :     NODE       *node = palloc_object(NODE);
      36              : 
      37              :     /* since this function recurses, it could be driven to stack overflow. */
      38         1464 :     check_stack_depth();
      39              : 
      40         1464 :     node->valnode = in;
      41         1464 :     node->right = node->left = NULL;
      42         1464 :     if (in->type == QI_OPR)
      43              :     {
      44          645 :         node->right = maketree(in + 1);
      45          645 :         if (in->qoperator.oper != OP_NOT)
      46          603 :             node->left = maketree(in + in->qoperator.left);
      47              :     }
      48         1464 :     return node;
      49              : }
      50              : 
      51              : /*
      52              :  * Internal state for plaintree and plainnode
      53              :  */
      54              : typedef struct
      55              : {
      56              :     QueryItem  *ptr;
      57              :     int         len;            /* allocated size of ptr */
      58              :     int         cur;            /* number of elements in ptr */
      59              : } PLAINTREE;
      60              : 
      61              : static void
      62          801 : plainnode(PLAINTREE *state, NODE *node)
      63              : {
      64              :     /* since this function recurses, it could be driven to stack overflow. */
      65          801 :     check_stack_depth();
      66              : 
      67          801 :     if (state->cur == state->len)
      68              :     {
      69            0 :         state->len *= 2;
      70            0 :         state->ptr = (QueryItem *) repalloc(state->ptr, state->len * sizeof(QueryItem));
      71              :     }
      72          801 :     memcpy(&(state->ptr[state->cur]), node->valnode, sizeof(QueryItem));
      73          801 :     if (node->valnode->type == QI_VAL)
      74          486 :         state->cur++;
      75          315 :     else if (node->valnode->qoperator.oper == OP_NOT)
      76              :     {
      77           33 :         state->ptr[state->cur].qoperator.left = 1;
      78           33 :         state->cur++;
      79           33 :         plainnode(state, node->right);
      80              :     }
      81              :     else
      82              :     {
      83          282 :         int         cur = state->cur;
      84              : 
      85          282 :         state->cur++;
      86          282 :         plainnode(state, node->right);
      87          282 :         state->ptr[cur].qoperator.left = state->cur - cur;
      88          282 :         plainnode(state, node->left);
      89              :     }
      90          801 :     pfree(node);
      91          801 : }
      92              : 
      93              : /*
      94              :  * make plain view of tree from a NODE-tree representation
      95              :  */
      96              : static QueryItem *
      97          204 : plaintree(NODE *root, int *len)
      98              : {
      99              :     PLAINTREE   pl;
     100              : 
     101          204 :     pl.cur = 0;
     102          204 :     pl.len = 16;
     103          204 :     if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
     104              :     {
     105          204 :         pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
     106          204 :         plainnode(&pl, root);
     107              :     }
     108              :     else
     109            0 :         pl.ptr = NULL;
     110          204 :     *len = pl.cur;
     111          204 :     return pl.ptr;
     112              : }
     113              : 
     114              : static void
     115           21 : freetree(NODE *node)
     116              : {
     117              :     /* since this function recurses, it could be driven to stack overflow. */
     118           21 :     check_stack_depth();
     119              : 
     120           21 :     if (!node)
     121            0 :         return;
     122           21 :     if (node->left)
     123            0 :         freetree(node->left);
     124           21 :     if (node->right)
     125            0 :         freetree(node->right);
     126           21 :     pfree(node);
     127              : }
     128              : 
     129              : /*
     130              :  * clean tree for ! operator.
     131              :  * It's useful for debug, but in
     132              :  * other case, such view is used with search in index.
     133              :  * Operator ! always return TRUE
     134              :  */
     135              : static NODE *
     136            0 : clean_NOT_intree(NODE *node)
     137              : {
     138              :     /* since this function recurses, it could be driven to stack overflow. */
     139            0 :     check_stack_depth();
     140              : 
     141            0 :     if (node->valnode->type == QI_VAL)
     142            0 :         return node;
     143              : 
     144            0 :     if (node->valnode->qoperator.oper == OP_NOT)
     145              :     {
     146            0 :         freetree(node);
     147            0 :         return NULL;
     148              :     }
     149              : 
     150              :     /* operator & or | */
     151            0 :     if (node->valnode->qoperator.oper == OP_OR)
     152              :     {
     153            0 :         if ((node->left = clean_NOT_intree(node->left)) == NULL ||
     154            0 :             (node->right = clean_NOT_intree(node->right)) == NULL)
     155              :         {
     156            0 :             freetree(node);
     157            0 :             return NULL;
     158              :         }
     159              :     }
     160              :     else
     161              :     {
     162            0 :         NODE       *res = node;
     163              : 
     164              :         Assert(node->valnode->qoperator.oper == OP_AND ||
     165              :                node->valnode->qoperator.oper == OP_PHRASE);
     166              : 
     167            0 :         node->left = clean_NOT_intree(node->left);
     168            0 :         node->right = clean_NOT_intree(node->right);
     169            0 :         if (node->left == NULL && node->right == NULL)
     170              :         {
     171            0 :             pfree(node);
     172            0 :             res = NULL;
     173              :         }
     174            0 :         else if (node->left == NULL)
     175              :         {
     176            0 :             res = node->right;
     177            0 :             pfree(node);
     178              :         }
     179            0 :         else if (node->right == NULL)
     180              :         {
     181            0 :             res = node->left;
     182            0 :             pfree(node);
     183              :         }
     184            0 :         return res;
     185              :     }
     186            0 :     return node;
     187              : }
     188              : 
     189              : QueryItem *
     190            0 : clean_NOT(QueryItem *ptr, int *len)
     191              : {
     192            0 :     NODE       *root = maketree(ptr);
     193              : 
     194            0 :     return plaintree(clean_NOT_intree(root), len);
     195              : }
     196              : 
     197              : 
     198              : /*
     199              :  * Remove QI_VALSTOP (stopword) nodes from query tree.
     200              :  *
     201              :  * Returns NULL if the query degenerates to nothing.  Input must not be NULL.
     202              :  *
     203              :  * When we remove a phrase operator due to removing one or both of its
     204              :  * arguments, we might need to adjust the distance of a parent phrase
     205              :  * operator.  For example, 'a' is a stopword, so:
     206              :  *      (b <-> a) <-> c  should become  b <2> c
     207              :  *      b <-> (a <-> c)  should become  b <2> c
     208              :  *      (b <-> (a <-> a)) <-> c  should become    b <3> c
     209              :  *      b <-> ((a <-> a) <-> c)  should become    b <3> c
     210              :  * To handle that, we define two output parameters:
     211              :  *      ladd: amount to add to a phrase distance to the left of this node
     212              :  *      radd: amount to add to a phrase distance to the right of this node
     213              :  * We need two outputs because we could need to bubble up adjustments to two
     214              :  * different parent phrase operators.  Consider
     215              :  *      w <-> (((a <-> x) <2> (y <3> a)) <-> z)
     216              :  * After we've removed the two a's and are considering the <2> node (which is
     217              :  * now just x <2> y), we have an ladd distance of 1 that needs to propagate
     218              :  * up to the topmost (leftmost) <->, and an radd distance of 3 that needs to
     219              :  * propagate to the rightmost <->, so that we'll end up with
     220              :  *      w <2> ((x <2> y) <4> z)
     221              :  * Near the bottom of the tree, we may have subtrees consisting only of
     222              :  * stopwords.  The distances of any phrase operators within such a subtree are
     223              :  * summed and propagated to both ladd and radd, since we don't know which side
     224              :  * of the lowest surviving phrase operator we are in.  The rule is that any
     225              :  * subtree that degenerates to NULL must return equal values of ladd and radd,
     226              :  * and the parent node dealing with it should incorporate only one of those.
     227              :  *
     228              :  * Currently, we only implement this adjustment for adjacent phrase operators.
     229              :  * Thus for example 'x <-> ((a <-> y) | z)' will become 'x <-> (y | z)', which
     230              :  * isn't ideal, but there is no way to represent the really desired semantics
     231              :  * without some redesign of the tsquery structure.  Certainly it would not be
     232              :  * any better to convert that to 'x <2> (y | z)'.  Since this is such a weird
     233              :  * corner case, let it go for now.  But we can fix it in cases where the
     234              :  * intervening non-phrase operator also gets removed, for example
     235              :  * '((x <-> a) | a) <-> y' will become 'x <2> y'.
     236              :  */
     237              : static NODE *
     238         1464 : clean_stopword_intree(NODE *node, int *ladd, int *radd)
     239              : {
     240              :     /* since this function recurses, it could be driven to stack overflow. */
     241         1464 :     check_stack_depth();
     242              : 
     243              :     /* default output parameters indicate no change in parent distance */
     244         1464 :     *ladd = *radd = 0;
     245              : 
     246         1464 :     if (node->valnode->type == QI_VAL)
     247          486 :         return node;
     248          978 :     else if (node->valnode->type == QI_VALSTOP)
     249              :     {
     250          333 :         pfree(node);
     251          333 :         return NULL;
     252              :     }
     253              : 
     254              :     Assert(node->valnode->type == QI_OPR);
     255              : 
     256          645 :     if (node->valnode->qoperator.oper == OP_NOT)
     257              :     {
     258              :         /* NOT doesn't change pattern width, so just report child distances */
     259           42 :         node->right = clean_stopword_intree(node->right, ladd, radd);
     260           42 :         if (!node->right)
     261              :         {
     262            9 :             freetree(node);
     263            9 :             return NULL;
     264              :         }
     265              :     }
     266              :     else
     267              :     {
     268          603 :         NODE       *res = node;
     269              :         bool        isphrase;
     270              :         int         ndistance,
     271              :                     lladd,
     272              :                     lradd,
     273              :                     rladd,
     274              :                     rradd;
     275              : 
     276              :         /* First, recurse */
     277          603 :         node->left = clean_stopword_intree(node->left, &lladd, &lradd);
     278          603 :         node->right = clean_stopword_intree(node->right, &rladd, &rradd);
     279              : 
     280              :         /* Check if current node is OP_PHRASE, get its distance */
     281          603 :         isphrase = (node->valnode->qoperator.oper == OP_PHRASE);
     282          603 :         ndistance = isphrase ? node->valnode->qoperator.distance : 0;
     283              : 
     284          603 :         if (node->left == NULL && node->right == NULL)
     285              :         {
     286              :             /*
     287              :              * When we collapse out a phrase node entirely, propagate its own
     288              :              * distance into both *ladd and *radd; it is the responsibility of
     289              :              * the parent node to count it only once.  Also, for a phrase
     290              :              * node, distances coming from children are summed and propagated
     291              :              * up to parent (we assume lladd == lradd and rladd == rradd, else
     292              :              * rule was broken at a lower level).  But if this isn't a phrase
     293              :              * node, take the larger of the two child distances; that
     294              :              * corresponds to what TS_execute will do in non-stopword cases.
     295              :              */
     296           12 :             if (isphrase)
     297            0 :                 *ladd = *radd = lladd + ndistance + rladd;
     298              :             else
     299           12 :                 *ladd = *radd = Max(lladd, rladd);
     300           12 :             freetree(node);
     301           12 :             return NULL;
     302              :         }
     303          591 :         else if (node->left == NULL)
     304              :         {
     305              :             /* Removing this operator and left subnode */
     306              :             /* lladd and lradd are equal/redundant, don't count both */
     307          126 :             if (isphrase)
     308              :             {
     309              :                 /* operator's own distance must propagate to left */
     310           81 :                 *ladd = lladd + ndistance + rladd;
     311           81 :                 *radd = rradd;
     312              :             }
     313              :             else
     314              :             {
     315              :                 /* at non-phrase op, just forget the left subnode entirely */
     316           45 :                 *ladd = rladd;
     317           45 :                 *radd = rradd;
     318              :             }
     319          126 :             res = node->right;
     320          126 :             pfree(node);
     321              :         }
     322          465 :         else if (node->right == NULL)
     323              :         {
     324              :             /* Removing this operator and right subnode */
     325              :             /* rladd and rradd are equal/redundant, don't count both */
     326          183 :             if (isphrase)
     327              :             {
     328              :                 /* operator's own distance must propagate to right */
     329          108 :                 *ladd = lladd;
     330          108 :                 *radd = lradd + ndistance + rradd;
     331              :             }
     332              :             else
     333              :             {
     334              :                 /* at non-phrase op, just forget the right subnode entirely */
     335           75 :                 *ladd = lladd;
     336           75 :                 *radd = lradd;
     337              :             }
     338          183 :             res = node->left;
     339          183 :             pfree(node);
     340              :         }
     341          282 :         else if (isphrase)
     342              :         {
     343              :             /* Absorb appropriate corrections at this level */
     344          171 :             node->valnode->qoperator.distance += lradd + rladd;
     345              :             /* Propagate up any unaccounted-for corrections */
     346          171 :             *ladd = lladd;
     347          171 :             *radd = rradd;
     348              :         }
     349              :         else
     350              :         {
     351              :             /* We're keeping a non-phrase operator, so ladd/radd remain 0 */
     352              :         }
     353              : 
     354          591 :         return res;
     355              :     }
     356           33 :     return node;
     357              : }
     358              : 
     359              : /*
     360              :  * Number of elements in query tree
     361              :  */
     362              : static int32
     363          801 : calcstrlen(NODE *node)
     364              : {
     365          801 :     int32       size = 0;
     366              : 
     367          801 :     if (node->valnode->type == QI_VAL)
     368              :     {
     369          486 :         size = node->valnode->qoperand.length + 1;
     370              :     }
     371              :     else
     372              :     {
     373              :         Assert(node->valnode->type == QI_OPR);
     374              : 
     375          315 :         size = calcstrlen(node->right);
     376          315 :         if (node->valnode->qoperator.oper != OP_NOT)
     377          282 :             size += calcstrlen(node->left);
     378              :     }
     379              : 
     380          801 :     return size;
     381              : }
     382              : 
     383              : /*
     384              :  * Remove QI_VALSTOP (stopword) nodes from TSQuery.
     385              :  */
     386              : TSQuery
     387          216 : cleanup_tsquery_stopwords(TSQuery in, bool noisy)
     388              : {
     389              :     int32       len,
     390              :                 lenstr,
     391              :                 commonlen,
     392              :                 i;
     393              :     NODE       *root;
     394              :     int         ladd,
     395              :                 radd;
     396              :     TSQuery     out;
     397              :     QueryItem  *items;
     398              :     char       *operands;
     399              : 
     400          216 :     if (in->size == 0)
     401            0 :         return in;
     402              : 
     403              :     /* eliminate stop words */
     404          216 :     root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
     405          216 :     if (root == NULL)
     406              :     {
     407           12 :         if (noisy)
     408           12 :             ereport(NOTICE,
     409              :                     (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
     410           12 :         out = palloc(HDRSIZETQ);
     411           12 :         out->size = 0;
     412           12 :         SET_VARSIZE(out, HDRSIZETQ);
     413           12 :         return out;
     414              :     }
     415              : 
     416              :     /*
     417              :      * Build TSQuery from plain view
     418              :      */
     419              : 
     420          204 :     lenstr = calcstrlen(root);
     421          204 :     items = plaintree(root, &len);
     422          204 :     commonlen = COMPUTESIZE(len, lenstr);
     423              : 
     424          204 :     out = palloc(commonlen);
     425          204 :     SET_VARSIZE(out, commonlen);
     426          204 :     out->size = len;
     427              : 
     428          204 :     memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
     429              : 
     430          204 :     items = GETQUERY(out);
     431          204 :     operands = GETOPERAND(out);
     432         1005 :     for (i = 0; i < out->size; i++)
     433              :     {
     434          801 :         QueryOperand *op = (QueryOperand *) &items[i];
     435              : 
     436          801 :         if (op->type != QI_VAL)
     437          315 :             continue;
     438              : 
     439          486 :         memcpy(operands, GETOPERAND(in) + op->distance, op->length);
     440          486 :         operands[op->length] = '\0';
     441          486 :         op->distance = operands - GETOPERAND(out);
     442          486 :         operands += op->length + 1;
     443              :     }
     444              : 
     445          204 :     return out;
     446              : }
        

Generated by: LCOV version 2.0-1