LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 958 1111 86.2 %
Date: 2021-01-26 03:06:49 Functions: 40 49 81.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsvector_op.c
       4             :  *    operations over tsvector
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsvector_op.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <limits.h>
      17             : 
      18             : #include "access/htup_details.h"
      19             : #include "catalog/namespace.h"
      20             : #include "catalog/pg_type.h"
      21             : #include "commands/trigger.h"
      22             : #include "executor/spi.h"
      23             : #include "funcapi.h"
      24             : #include "lib/qunique.h"
      25             : #include "mb/pg_wchar.h"
      26             : #include "miscadmin.h"
      27             : #include "parser/parse_coerce.h"
      28             : #include "tsearch/ts_utils.h"
      29             : #include "utils/array.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "utils/regproc.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : 
      36             : typedef struct
      37             : {
      38             :     WordEntry  *arrb;
      39             :     WordEntry  *arre;
      40             :     char       *values;
      41             :     char       *operand;
      42             : } CHKVAL;
      43             : 
      44             : 
      45             : typedef struct StatEntry
      46             : {
      47             :     uint32      ndoc;           /* zero indicates that we were already here
      48             :                                  * while walking through the tree */
      49             :     uint32      nentry;
      50             :     struct StatEntry *left;
      51             :     struct StatEntry *right;
      52             :     uint32      lenlexeme;
      53             :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54             : } StatEntry;
      55             : 
      56             : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57             : 
      58             : typedef struct
      59             : {
      60             :     int32       weight;
      61             : 
      62             :     uint32      maxdepth;
      63             : 
      64             :     StatEntry **stack;
      65             :     uint32      stackpos;
      66             : 
      67             :     StatEntry  *root;
      68             : } TSVectorStat;
      69             : 
      70             : 
      71             : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      72             :                                          uint32 flags,
      73             :                                          TSExecuteCallback chkcond);
      74             : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      75             : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      76             : 
      77             : 
      78             : /*
      79             :  * Order: haspos, len, word, for all positions (pos, weight)
      80             :  */
      81             : static int
      82           2 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      83             : {
      84           2 :     if (VARSIZE(a) < VARSIZE(b))
      85           0 :         return -1;
      86           2 :     else if (VARSIZE(a) > VARSIZE(b))
      87           0 :         return 1;
      88           2 :     else if (a->size < b->size)
      89           0 :         return -1;
      90           2 :     else if (a->size > b->size)
      91           0 :         return 1;
      92             :     else
      93             :     {
      94           2 :         WordEntry  *aptr = ARRPTR(a);
      95           2 :         WordEntry  *bptr = ARRPTR(b);
      96           2 :         int         i = 0;
      97             :         int         res;
      98             : 
      99             : 
     100           8 :         for (i = 0; i < a->size; i++)
     101             :         {
     102           6 :             if (aptr->haspos != bptr->haspos)
     103             :             {
     104           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     105             :             }
     106           6 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     107             :             {
     108           0 :                 return res;
     109             :             }
     110           6 :             else if (aptr->haspos)
     111             :             {
     112           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     113           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     114             :                 int         j;
     115             : 
     116           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     117           0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     118             : 
     119           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     120             :                 {
     121           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     122             :                     {
     123           0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     124             :                     }
     125           0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     126             :                     {
     127           0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     128             :                     }
     129           0 :                     ap++, bp++;
     130             :                 }
     131             :             }
     132             : 
     133           6 :             aptr++;
     134           6 :             bptr++;
     135             :         }
     136             :     }
     137             : 
     138           2 :     return 0;
     139             : }
     140             : 
     141             : #define TSVECTORCMPFUNC( type, action, ret )            \
     142             : Datum                                                   \
     143             : tsvector_##type(PG_FUNCTION_ARGS)                       \
     144             : {                                                       \
     145             :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     146             :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     147             :     int         res = silly_cmp_tsvector(a, b);         \
     148             :     PG_FREE_IF_COPY(a,0);                               \
     149             :     PG_FREE_IF_COPY(b,1);                               \
     150             :     PG_RETURN_##ret( res action 0 );                    \
     151             : }   \
     152             : /* keep compiler quiet - no extra ; */                  \
     153             : extern int no_such_variable
     154             : 
     155           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     156           0 : TSVECTORCMPFUNC(le, <=, BOOL);
     157           2 : TSVECTORCMPFUNC(eq, ==, BOOL);
     158           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     159           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     160           0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     161           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     162             : 
     163             : Datum
     164          60 : tsvector_strip(PG_FUNCTION_ARGS)
     165             : {
     166          60 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     167             :     TSVector    out;
     168             :     int         i,
     169          60 :                 len = 0;
     170          60 :     WordEntry  *arrin = ARRPTR(in),
     171             :                *arrout;
     172             :     char       *cur;
     173             : 
     174         212 :     for (i = 0; i < in->size; i++)
     175         152 :         len += arrin[i].len;
     176             : 
     177          60 :     len = CALCDATASIZE(in->size, len);
     178          60 :     out = (TSVector) palloc0(len);
     179          60 :     SET_VARSIZE(out, len);
     180          60 :     out->size = in->size;
     181          60 :     arrout = ARRPTR(out);
     182          60 :     cur = STRPTR(out);
     183         212 :     for (i = 0; i < in->size; i++)
     184             :     {
     185         152 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     186         152 :         arrout[i].haspos = 0;
     187         152 :         arrout[i].len = arrin[i].len;
     188         152 :         arrout[i].pos = cur - STRPTR(out);
     189         152 :         cur += arrout[i].len;
     190             :     }
     191             : 
     192          60 :     PG_FREE_IF_COPY(in, 0);
     193          60 :     PG_RETURN_POINTER(out);
     194             : }
     195             : 
     196             : Datum
     197           4 : tsvector_length(PG_FUNCTION_ARGS)
     198             : {
     199           4 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     200           4 :     int32       ret = in->size;
     201             : 
     202           4 :     PG_FREE_IF_COPY(in, 0);
     203           4 :     PG_RETURN_INT32(ret);
     204             : }
     205             : 
     206             : Datum
     207           8 : tsvector_setweight(PG_FUNCTION_ARGS)
     208             : {
     209           8 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     210           8 :     char        cw = PG_GETARG_CHAR(1);
     211             :     TSVector    out;
     212             :     int         i,
     213             :                 j;
     214             :     WordEntry  *entry;
     215             :     WordEntryPos *p;
     216           8 :     int         w = 0;
     217             : 
     218           8 :     switch (cw)
     219             :     {
     220           0 :         case 'A':
     221             :         case 'a':
     222           0 :             w = 3;
     223           0 :             break;
     224           0 :         case 'B':
     225             :         case 'b':
     226           0 :             w = 2;
     227           0 :             break;
     228           8 :         case 'C':
     229             :         case 'c':
     230           8 :             w = 1;
     231           8 :             break;
     232           0 :         case 'D':
     233             :         case 'd':
     234           0 :             w = 0;
     235           0 :             break;
     236           0 :         default:
     237             :             /* internal error */
     238           0 :             elog(ERROR, "unrecognized weight: %d", cw);
     239             :     }
     240             : 
     241           8 :     out = (TSVector) palloc(VARSIZE(in));
     242           8 :     memcpy(out, in, VARSIZE(in));
     243           8 :     entry = ARRPTR(out);
     244           8 :     i = out->size;
     245          40 :     while (i--)
     246             :     {
     247          32 :         if ((j = POSDATALEN(out, entry)) != 0)
     248             :         {
     249          32 :             p = POSDATAPTR(out, entry);
     250         112 :             while (j--)
     251             :             {
     252          80 :                 WEP_SETWEIGHT(*p, w);
     253          80 :                 p++;
     254             :             }
     255             :         }
     256          32 :         entry++;
     257             :     }
     258             : 
     259           8 :     PG_FREE_IF_COPY(in, 0);
     260           8 :     PG_RETURN_POINTER(out);
     261             : }
     262             : 
     263             : /*
     264             :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     265             :  *
     266             :  * Assign weight w to elements of tsin that are listed in lexemes.
     267             :  */
     268             : Datum
     269          20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     270             : {
     271          20 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     272          20 :     char        char_weight = PG_GETARG_CHAR(1);
     273          20 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     274             : 
     275             :     TSVector    tsout;
     276             :     int         i,
     277             :                 j,
     278             :                 nlexemes,
     279             :                 weight;
     280             :     WordEntry  *entry;
     281             :     Datum      *dlexemes;
     282             :     bool       *nulls;
     283             : 
     284          20 :     switch (char_weight)
     285             :     {
     286           0 :         case 'A':
     287             :         case 'a':
     288           0 :             weight = 3;
     289           0 :             break;
     290           0 :         case 'B':
     291             :         case 'b':
     292           0 :             weight = 2;
     293           0 :             break;
     294          20 :         case 'C':
     295             :         case 'c':
     296          20 :             weight = 1;
     297          20 :             break;
     298           0 :         case 'D':
     299             :         case 'd':
     300           0 :             weight = 0;
     301           0 :             break;
     302           0 :         default:
     303             :             /* internal error */
     304           0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     305             :     }
     306             : 
     307          20 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     308          20 :     memcpy(tsout, tsin, VARSIZE(tsin));
     309          20 :     entry = ARRPTR(tsout);
     310             : 
     311          20 :     deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
     312             :                       &dlexemes, &nulls, &nlexemes);
     313             : 
     314             :     /*
     315             :      * Assuming that lexemes array is significantly shorter than tsvector we
     316             :      * can iterate through lexemes performing binary search of each lexeme
     317             :      * from lexemes in tsvector.
     318             :      */
     319          52 :     for (i = 0; i < nlexemes; i++)
     320             :     {
     321             :         char       *lex;
     322             :         int         lex_len,
     323             :                     lex_pos;
     324             : 
     325          36 :         if (nulls[i])
     326           4 :             ereport(ERROR,
     327             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     328             :                      errmsg("lexeme array may not contain nulls")));
     329             : 
     330          32 :         lex = VARDATA(dlexemes[i]);
     331          32 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     332          32 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     333             : 
     334          32 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     335             :         {
     336          16 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     337             : 
     338          52 :             while (j--)
     339             :             {
     340          36 :                 WEP_SETWEIGHT(*p, weight);
     341          36 :                 p++;
     342             :             }
     343             :         }
     344             :     }
     345             : 
     346          16 :     PG_FREE_IF_COPY(tsin, 0);
     347          16 :     PG_FREE_IF_COPY(lexemes, 2);
     348             : 
     349          16 :     PG_RETURN_POINTER(tsout);
     350             : }
     351             : 
     352             : #define compareEntry(pa, a, pb, b) \
     353             :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     354             :                     (pb) + (b)->pos, (b)->len,    \
     355             :                     false)
     356             : 
     357             : /*
     358             :  * Add positions from src to dest after offsetting them by maxpos.
     359             :  * Return the number added (might be less than expected due to overflow)
     360             :  */
     361             : static int32
     362           8 : add_pos(TSVector src, WordEntry *srcptr,
     363             :         TSVector dest, WordEntry *destptr,
     364             :         int32 maxpos)
     365             : {
     366           8 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     367             :     int         i;
     368           8 :     uint16      slen = POSDATALEN(src, srcptr),
     369             :                 startlen;
     370           8 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     371           8 :                *dpos = POSDATAPTR(dest, destptr);
     372             : 
     373           8 :     if (!destptr->haspos)
     374           0 :         *clen = 0;
     375             : 
     376           8 :     startlen = *clen;
     377           8 :     for (i = 0;
     378          16 :          i < slen && *clen < MAXNUMPOS &&
     379           8 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     380           8 :          i++)
     381             :     {
     382           8 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     383           8 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     384           8 :         (*clen)++;
     385             :     }
     386             : 
     387           8 :     if (*clen != startlen)
     388           8 :         destptr->haspos = 1;
     389           8 :     return *clen - startlen;
     390             : }
     391             : 
     392             : /*
     393             :  * Perform binary search of given lexeme in TSVector.
     394             :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     395             :  * found.
     396             :  */
     397             : static int
     398         132 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     399             : {
     400         132 :     WordEntry  *arrin = ARRPTR(tsv);
     401         132 :     int         StopLow = 0,
     402         132 :                 StopHigh = tsv->size,
     403             :                 StopMiddle,
     404             :                 cmp;
     405             : 
     406         336 :     while (StopLow < StopHigh)
     407             :     {
     408         304 :         StopMiddle = (StopLow + StopHigh) / 2;
     409             : 
     410         608 :         cmp = tsCompareString(lexeme, lexeme_len,
     411         304 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     412         304 :                               arrin[StopMiddle].len,
     413             :                               false);
     414             : 
     415         304 :         if (cmp < 0)
     416         128 :             StopHigh = StopMiddle;
     417         176 :         else if (cmp > 0)
     418          76 :             StopLow = StopMiddle + 1;
     419             :         else                    /* found it */
     420         100 :             return StopMiddle;
     421             :     }
     422             : 
     423          32 :     return -1;
     424             : }
     425             : 
     426             : /*
     427             :  * qsort comparator functions
     428             :  */
     429             : 
     430             : static int
     431          44 : compare_int(const void *va, const void *vb)
     432             : {
     433          44 :     int         a = *((const int *) va);
     434          44 :     int         b = *((const int *) vb);
     435             : 
     436          44 :     if (a == b)
     437           8 :         return 0;
     438          36 :     return (a > b) ? 1 : -1;
     439             : }
     440             : 
     441             : static int
     442          68 : compare_text_lexemes(const void *va, const void *vb)
     443             : {
     444          68 :     Datum       a = *((const Datum *) va);
     445          68 :     Datum       b = *((const Datum *) vb);
     446          68 :     char       *alex = VARDATA_ANY(a);
     447          68 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     448          68 :     char       *blex = VARDATA_ANY(b);
     449          68 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     450             : 
     451          68 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     452             : }
     453             : 
     454             : /*
     455             :  * Internal routine to delete lexemes from TSVector by array of offsets.
     456             :  *
     457             :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     458             :  * int indices_count -- size of that array
     459             :  *
     460             :  * Returns new TSVector without given lexemes along with their positions
     461             :  * and weights.
     462             :  */
     463             : static TSVector
     464          40 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     465             :                            int indices_count)
     466             : {
     467             :     TSVector    tsout;
     468          40 :     WordEntry  *arrin = ARRPTR(tsv),
     469             :                *arrout;
     470          40 :     char       *data = STRPTR(tsv),
     471             :                *dataout;
     472             :     int         i,              /* index in arrin */
     473             :                 j,              /* index in arrout */
     474             :                 k,              /* index in indices_to_delete */
     475             :                 curoff;         /* index in dataout area */
     476             : 
     477             :     /*
     478             :      * Sort the filter array to simplify membership checks below.  Also, get
     479             :      * rid of any duplicate entries, so that we can assume that indices_count
     480             :      * is exactly equal to the number of lexemes that will be removed.
     481             :      */
     482          40 :     if (indices_count > 1)
     483             :     {
     484          16 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     485          16 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     486             :                                 compare_int);
     487             :     }
     488             : 
     489             :     /*
     490             :      * Here we overestimate tsout size, since we don't know how much space is
     491             :      * used by the deleted lexeme(s).  We will set exact size below.
     492             :      */
     493          40 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     494             : 
     495             :     /* This count must be correct because STRPTR(tsout) relies on it. */
     496          40 :     tsout->size = tsv->size - indices_count;
     497             : 
     498             :     /*
     499             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     500             :      */
     501          40 :     arrout = ARRPTR(tsout);
     502          40 :     dataout = STRPTR(tsout);
     503          40 :     curoff = 0;
     504         240 :     for (i = j = k = 0; i < tsv->size; i++)
     505             :     {
     506             :         /*
     507             :          * If current i is present in indices_to_delete, skip this lexeme.
     508             :          * Since indices_to_delete is already sorted, we only need to check
     509             :          * the current (k'th) entry.
     510             :          */
     511         200 :         if (k < indices_count && i == indices_to_delete[k])
     512             :         {
     513          56 :             k++;
     514          56 :             continue;
     515             :         }
     516             : 
     517             :         /* Copy lexeme and its positions and weights */
     518         144 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     519         144 :         arrout[j].haspos = arrin[i].haspos;
     520         144 :         arrout[j].len = arrin[i].len;
     521         144 :         arrout[j].pos = curoff;
     522         144 :         curoff += arrin[i].len;
     523         144 :         if (arrin[i].haspos)
     524             :         {
     525         104 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     526             :             + sizeof(uint16);
     527             : 
     528         104 :             curoff = SHORTALIGN(curoff);
     529         208 :             memcpy(dataout + curoff,
     530         104 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     531             :                    len);
     532         104 :             curoff += len;
     533             :         }
     534             : 
     535         144 :         j++;
     536             :     }
     537             : 
     538             :     /*
     539             :      * k should now be exactly equal to indices_count. If it isn't then the
     540             :      * caller provided us with indices outside of [0, tsv->size) range and
     541             :      * estimation of tsout's size is wrong.
     542             :      */
     543             :     Assert(k == indices_count);
     544             : 
     545          40 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     546          40 :     return tsout;
     547             : }
     548             : 
     549             : /*
     550             :  * Delete given lexeme from tsvector.
     551             :  * Implementation of user-level ts_delete(tsvector, text).
     552             :  */
     553             : Datum
     554          24 : tsvector_delete_str(PG_FUNCTION_ARGS)
     555             : {
     556          24 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     557             :                 tsout;
     558          24 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     559          24 :     char       *lexeme = VARDATA_ANY(tlexeme);
     560          24 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     561             :                 skip_index;
     562             : 
     563          24 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     564           8 :         PG_RETURN_POINTER(tsin);
     565             : 
     566          16 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     567             : 
     568          16 :     PG_FREE_IF_COPY(tsin, 0);
     569          16 :     PG_FREE_IF_COPY(tlexeme, 1);
     570          16 :     PG_RETURN_POINTER(tsout);
     571             : }
     572             : 
     573             : /*
     574             :  * Delete given array of lexemes from tsvector.
     575             :  * Implementation of user-level ts_delete(tsvector, text[]).
     576             :  */
     577             : Datum
     578          28 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     579             : {
     580          28 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     581             :                 tsout;
     582          28 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     583             :     int         i,
     584             :                 nlex,
     585             :                 skip_count,
     586             :                *skip_indices;
     587             :     Datum      *dlexemes;
     588             :     bool       *nulls;
     589             : 
     590          28 :     deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
     591             :                       &dlexemes, &nulls, &nlex);
     592             : 
     593             :     /*
     594             :      * In typical use case array of lexemes to delete is relatively small. So
     595             :      * here we optimize things for that scenario: iterate through lexarr
     596             :      * performing binary search of each lexeme from lexarr in tsvector.
     597             :      */
     598          28 :     skip_indices = palloc0(nlex * sizeof(int));
     599         104 :     for (i = skip_count = 0; i < nlex; i++)
     600             :     {
     601             :         char       *lex;
     602             :         int         lex_len,
     603             :                     lex_pos;
     604             : 
     605          80 :         if (nulls[i])
     606           4 :             ereport(ERROR,
     607             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     608             :                      errmsg("lexeme array may not contain nulls")));
     609             : 
     610          76 :         lex = VARDATA(dlexemes[i]);
     611          76 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     612          76 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     613             : 
     614          76 :         if (lex_pos >= 0)
     615          52 :             skip_indices[skip_count++] = lex_pos;
     616             :     }
     617             : 
     618          24 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     619             : 
     620          24 :     pfree(skip_indices);
     621          24 :     PG_FREE_IF_COPY(tsin, 0);
     622          24 :     PG_FREE_IF_COPY(lexemes, 1);
     623             : 
     624          24 :     PG_RETURN_POINTER(tsout);
     625             : }
     626             : 
     627             : /*
     628             :  * Expand tsvector as table with following columns:
     629             :  *     lexeme: lexeme text
     630             :  *     positions: integer array of lexeme positions
     631             :  *     weights: char array of weights corresponding to positions
     632             :  */
     633             : Datum
     634         120 : tsvector_unnest(PG_FUNCTION_ARGS)
     635             : {
     636             :     FuncCallContext *funcctx;
     637             :     TSVector    tsin;
     638             : 
     639         120 :     if (SRF_IS_FIRSTCALL())
     640             :     {
     641             :         MemoryContext oldcontext;
     642             :         TupleDesc   tupdesc;
     643             : 
     644          20 :         funcctx = SRF_FIRSTCALL_INIT();
     645          20 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     646             : 
     647          20 :         tupdesc = CreateTemplateTupleDesc(3);
     648          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     649             :                            TEXTOID, -1, 0);
     650          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     651             :                            INT2ARRAYOID, -1, 0);
     652          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     653             :                            TEXTARRAYOID, -1, 0);
     654          20 :         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     655             : 
     656          20 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     657             : 
     658          20 :         MemoryContextSwitchTo(oldcontext);
     659             :     }
     660             : 
     661         120 :     funcctx = SRF_PERCALL_SETUP();
     662         120 :     tsin = (TSVector) funcctx->user_fctx;
     663             : 
     664         120 :     if (funcctx->call_cntr < tsin->size)
     665             :     {
     666         100 :         WordEntry  *arrin = ARRPTR(tsin);
     667         100 :         char       *data = STRPTR(tsin);
     668             :         HeapTuple   tuple;
     669             :         int         j,
     670         100 :                     i = funcctx->call_cntr;
     671         100 :         bool        nulls[] = {false, false, false};
     672             :         Datum       values[3];
     673             : 
     674         100 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     675             : 
     676         100 :         if (arrin[i].haspos)
     677             :         {
     678             :             WordEntryPosVector *posv;
     679             :             Datum      *positions;
     680             :             Datum      *weights;
     681             :             char        weight;
     682             : 
     683             :             /*
     684             :              * Internally tsvector stores position and weight in the same
     685             :              * uint16 (2 bits for weight, 14 for position). Here we extract
     686             :              * that in two separate arrays.
     687             :              */
     688          60 :             posv = _POSVECPTR(tsin, arrin + i);
     689          60 :             positions = palloc(posv->npos * sizeof(Datum));
     690          60 :             weights = palloc(posv->npos * sizeof(Datum));
     691         168 :             for (j = 0; j < posv->npos; j++)
     692             :             {
     693         108 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     694         108 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     695         108 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     696             :                                                                       1));
     697             :             }
     698             : 
     699          60 :             values[1] = PointerGetDatum(construct_array(positions, posv->npos,
     700             :                                                         INT2OID, 2, true, TYPALIGN_SHORT));
     701          60 :             values[2] = PointerGetDatum(construct_array(weights, posv->npos,
     702             :                                                         TEXTOID, -1, false, TYPALIGN_INT));
     703             :         }
     704             :         else
     705             :         {
     706          40 :             nulls[1] = nulls[2] = true;
     707             :         }
     708             : 
     709         100 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     710         100 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     711             :     }
     712             :     else
     713             :     {
     714          20 :         SRF_RETURN_DONE(funcctx);
     715             :     }
     716             : }
     717             : 
     718             : /*
     719             :  * Convert tsvector to array of lexemes.
     720             :  */
     721             : Datum
     722           8 : tsvector_to_array(PG_FUNCTION_ARGS)
     723             : {
     724           8 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     725           8 :     WordEntry  *arrin = ARRPTR(tsin);
     726             :     Datum      *elements;
     727             :     int         i;
     728             :     ArrayType  *array;
     729             : 
     730           8 :     elements = palloc(tsin->size * sizeof(Datum));
     731             : 
     732          48 :     for (i = 0; i < tsin->size; i++)
     733             :     {
     734          40 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     735             :                                                                arrin[i].len));
     736             :     }
     737             : 
     738           8 :     array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
     739             : 
     740           8 :     pfree(elements);
     741           8 :     PG_FREE_IF_COPY(tsin, 0);
     742           8 :     PG_RETURN_POINTER(array);
     743             : }
     744             : 
     745             : /*
     746             :  * Build tsvector from array of lexemes.
     747             :  */
     748             : Datum
     749          12 : array_to_tsvector(PG_FUNCTION_ARGS)
     750             : {
     751          12 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     752             :     TSVector    tsout;
     753             :     Datum      *dlexemes;
     754             :     WordEntry  *arrout;
     755             :     bool       *nulls;
     756             :     int         nitems,
     757             :                 i,
     758             :                 tslen,
     759          12 :                 datalen = 0;
     760             :     char       *cur;
     761             : 
     762          12 :     deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
     763             : 
     764             :     /* Reject nulls (maybe we should just ignore them, instead?) */
     765          64 :     for (i = 0; i < nitems; i++)
     766             :     {
     767          56 :         if (nulls[i])
     768           4 :             ereport(ERROR,
     769             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     770             :                      errmsg("lexeme array may not contain nulls")));
     771             :     }
     772             : 
     773             :     /* Sort and de-dup, because this is required for a valid tsvector. */
     774           8 :     if (nitems > 1)
     775             :     {
     776           8 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     777           8 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     778             :                          compare_text_lexemes);
     779             :     }
     780             : 
     781             :     /* Calculate space needed for surviving lexemes. */
     782          40 :     for (i = 0; i < nitems; i++)
     783          32 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     784           8 :     tslen = CALCDATASIZE(nitems, datalen);
     785             : 
     786             :     /* Allocate and fill tsvector. */
     787           8 :     tsout = (TSVector) palloc0(tslen);
     788           8 :     SET_VARSIZE(tsout, tslen);
     789           8 :     tsout->size = nitems;
     790             : 
     791           8 :     arrout = ARRPTR(tsout);
     792           8 :     cur = STRPTR(tsout);
     793          40 :     for (i = 0; i < nitems; i++)
     794             :     {
     795          32 :         char       *lex = VARDATA(dlexemes[i]);
     796          32 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     797             : 
     798          32 :         memcpy(cur, lex, lex_len);
     799          32 :         arrout[i].haspos = 0;
     800          32 :         arrout[i].len = lex_len;
     801          32 :         arrout[i].pos = cur - STRPTR(tsout);
     802          32 :         cur += lex_len;
     803             :     }
     804             : 
     805           8 :     PG_FREE_IF_COPY(v, 0);
     806           8 :     PG_RETURN_POINTER(tsout);
     807             : }
     808             : 
     809             : /*
     810             :  * ts_filter(): keep only lexemes with given weights in tsvector.
     811             :  */
     812             : Datum
     813          12 : tsvector_filter(PG_FUNCTION_ARGS)
     814             : {
     815          12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     816             :                 tsout;
     817          12 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     818          12 :     WordEntry  *arrin = ARRPTR(tsin),
     819             :                *arrout;
     820          12 :     char       *datain = STRPTR(tsin),
     821             :                *dataout;
     822             :     Datum      *dweights;
     823             :     bool       *nulls;
     824             :     int         nweights;
     825             :     int         i,
     826             :                 j;
     827          12 :     int         cur_pos = 0;
     828          12 :     char        mask = 0;
     829             : 
     830          12 :     deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
     831             :                       &dweights, &nulls, &nweights);
     832             : 
     833          28 :     for (i = 0; i < nweights; i++)
     834             :     {
     835             :         char        char_weight;
     836             : 
     837          20 :         if (nulls[i])
     838           4 :             ereport(ERROR,
     839             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     840             :                      errmsg("weight array may not contain nulls")));
     841             : 
     842          16 :         char_weight = DatumGetChar(dweights[i]);
     843          16 :         switch (char_weight)
     844             :         {
     845          12 :             case 'A':
     846             :             case 'a':
     847          12 :                 mask = mask | 8;
     848          12 :                 break;
     849           4 :             case 'B':
     850             :             case 'b':
     851           4 :                 mask = mask | 4;
     852           4 :                 break;
     853           0 :             case 'C':
     854             :             case 'c':
     855           0 :                 mask = mask | 2;
     856           0 :                 break;
     857           0 :             case 'D':
     858             :             case 'd':
     859           0 :                 mask = mask | 1;
     860           0 :                 break;
     861           0 :             default:
     862           0 :                 ereport(ERROR,
     863             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     864             :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     865             :         }
     866             :     }
     867             : 
     868           8 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     869           8 :     tsout->size = tsin->size;
     870           8 :     arrout = ARRPTR(tsout);
     871           8 :     dataout = STRPTR(tsout);
     872             : 
     873          72 :     for (i = j = 0; i < tsin->size; i++)
     874             :     {
     875             :         WordEntryPosVector *posvin,
     876             :                    *posvout;
     877          64 :         int         npos = 0;
     878             :         int         k;
     879             : 
     880          64 :         if (!arrin[i].haspos)
     881          20 :             continue;
     882             : 
     883          44 :         posvin = _POSVECPTR(tsin, arrin + i);
     884          44 :         posvout = (WordEntryPosVector *)
     885          44 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     886             : 
     887          88 :         for (k = 0; k < posvin->npos; k++)
     888             :         {
     889          44 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     890          20 :                 posvout->pos[npos++] = posvin->pos[k];
     891             :         }
     892             : 
     893             :         /* if no satisfactory positions found, skip lexeme */
     894          44 :         if (!npos)
     895          24 :             continue;
     896             : 
     897          20 :         arrout[j].haspos = true;
     898          20 :         arrout[j].len = arrin[i].len;
     899          20 :         arrout[j].pos = cur_pos;
     900             : 
     901          20 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     902          20 :         posvout->npos = npos;
     903          20 :         cur_pos += SHORTALIGN(arrin[i].len);
     904          20 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     905             :             sizeof(uint16);
     906          20 :         j++;
     907             :     }
     908             : 
     909           8 :     tsout->size = j;
     910           8 :     if (dataout != STRPTR(tsout))
     911           8 :         memmove(STRPTR(tsout), dataout, cur_pos);
     912             : 
     913           8 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     914             : 
     915           8 :     PG_FREE_IF_COPY(tsin, 0);
     916           8 :     PG_RETURN_POINTER(tsout);
     917             : }
     918             : 
     919             : Datum
     920           8 : tsvector_concat(PG_FUNCTION_ARGS)
     921             : {
     922           8 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     923           8 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     924             :     TSVector    out;
     925             :     WordEntry  *ptr;
     926             :     WordEntry  *ptr1,
     927             :                *ptr2;
     928             :     WordEntryPos *p;
     929           8 :     int         maxpos = 0,
     930             :                 i,
     931             :                 j,
     932             :                 i1,
     933             :                 i2,
     934             :                 dataoff,
     935             :                 output_bytes,
     936             :                 output_size;
     937             :     char       *data,
     938             :                *data1,
     939             :                *data2;
     940             : 
     941             :     /* Get max position in in1; we'll need this to offset in2's positions */
     942           8 :     ptr = ARRPTR(in1);
     943           8 :     i = in1->size;
     944          20 :     while (i--)
     945             :     {
     946          12 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     947             :         {
     948          12 :             p = POSDATAPTR(in1, ptr);
     949          24 :             while (j--)
     950             :             {
     951          12 :                 if (WEP_GETPOS(*p) > maxpos)
     952           8 :                     maxpos = WEP_GETPOS(*p);
     953          12 :                 p++;
     954             :             }
     955             :         }
     956          12 :         ptr++;
     957             :     }
     958             : 
     959           8 :     ptr1 = ARRPTR(in1);
     960           8 :     ptr2 = ARRPTR(in2);
     961           8 :     data1 = STRPTR(in1);
     962           8 :     data2 = STRPTR(in2);
     963           8 :     i1 = in1->size;
     964           8 :     i2 = in2->size;
     965             : 
     966             :     /*
     967             :      * Conservative estimate of space needed.  We might need all the data in
     968             :      * both inputs, and conceivably add a pad byte before position data for
     969             :      * each item where there was none before.
     970             :      */
     971           8 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     972             : 
     973           8 :     out = (TSVector) palloc0(output_bytes);
     974           8 :     SET_VARSIZE(out, output_bytes);
     975             : 
     976             :     /*
     977             :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     978             :      * collapse out any unused space at the end.
     979             :      */
     980           8 :     out->size = in1->size + in2->size;
     981             : 
     982           8 :     ptr = ARRPTR(out);
     983           8 :     data = STRPTR(out);
     984           8 :     dataoff = 0;
     985          20 :     while (i1 && i2)
     986             :     {
     987          12 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     988             : 
     989          12 :         if (cmp < 0)
     990             :         {                       /* in1 first */
     991           4 :             ptr->haspos = ptr1->haspos;
     992           4 :             ptr->len = ptr1->len;
     993           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
     994           4 :             ptr->pos = dataoff;
     995           4 :             dataoff += ptr1->len;
     996           4 :             if (ptr->haspos)
     997             :             {
     998           4 :                 dataoff = SHORTALIGN(dataoff);
     999           4 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1000           4 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1001             :             }
    1002             : 
    1003           4 :             ptr++;
    1004           4 :             ptr1++;
    1005           4 :             i1--;
    1006             :         }
    1007           8 :         else if (cmp > 0)
    1008             :         {                       /* in2 first */
    1009           4 :             ptr->haspos = ptr2->haspos;
    1010           4 :             ptr->len = ptr2->len;
    1011           4 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1012           4 :             ptr->pos = dataoff;
    1013           4 :             dataoff += ptr2->len;
    1014           4 :             if (ptr->haspos)
    1015             :             {
    1016           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1017             : 
    1018           0 :                 if (addlen == 0)
    1019           0 :                     ptr->haspos = 0;
    1020             :                 else
    1021             :                 {
    1022           0 :                     dataoff = SHORTALIGN(dataoff);
    1023           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1024             :                 }
    1025             :             }
    1026             : 
    1027           4 :             ptr++;
    1028           4 :             ptr2++;
    1029           4 :             i2--;
    1030             :         }
    1031             :         else
    1032             :         {
    1033           4 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1034           4 :             ptr->len = ptr1->len;
    1035           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1036           4 :             ptr->pos = dataoff;
    1037           4 :             dataoff += ptr1->len;
    1038           4 :             if (ptr->haspos)
    1039             :             {
    1040           4 :                 if (ptr1->haspos)
    1041             :                 {
    1042           4 :                     dataoff = SHORTALIGN(dataoff);
    1043           4 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1044           4 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1045           4 :                     if (ptr2->haspos)
    1046           4 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1047             :                 }
    1048             :                 else            /* must have ptr2->haspos */
    1049             :                 {
    1050           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1051             : 
    1052           0 :                     if (addlen == 0)
    1053           0 :                         ptr->haspos = 0;
    1054             :                     else
    1055             :                     {
    1056           0 :                         dataoff = SHORTALIGN(dataoff);
    1057           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1058             :                     }
    1059             :                 }
    1060             :             }
    1061             : 
    1062           4 :             ptr++;
    1063           4 :             ptr1++;
    1064           4 :             ptr2++;
    1065           4 :             i1--;
    1066           4 :             i2--;
    1067             :         }
    1068             :     }
    1069             : 
    1070          12 :     while (i1)
    1071             :     {
    1072           4 :         ptr->haspos = ptr1->haspos;
    1073           4 :         ptr->len = ptr1->len;
    1074           4 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1075           4 :         ptr->pos = dataoff;
    1076           4 :         dataoff += ptr1->len;
    1077           4 :         if (ptr->haspos)
    1078             :         {
    1079           4 :             dataoff = SHORTALIGN(dataoff);
    1080           4 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1081           4 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1082             :         }
    1083             : 
    1084           4 :         ptr++;
    1085           4 :         ptr1++;
    1086           4 :         i1--;
    1087             :     }
    1088             : 
    1089          12 :     while (i2)
    1090             :     {
    1091           4 :         ptr->haspos = ptr2->haspos;
    1092           4 :         ptr->len = ptr2->len;
    1093           4 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1094           4 :         ptr->pos = dataoff;
    1095           4 :         dataoff += ptr2->len;
    1096           4 :         if (ptr->haspos)
    1097             :         {
    1098           4 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1099             : 
    1100           4 :             if (addlen == 0)
    1101           0 :                 ptr->haspos = 0;
    1102             :             else
    1103             :             {
    1104           4 :                 dataoff = SHORTALIGN(dataoff);
    1105           4 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1106             :             }
    1107             :         }
    1108             : 
    1109           4 :         ptr++;
    1110           4 :         ptr2++;
    1111           4 :         i2--;
    1112             :     }
    1113             : 
    1114             :     /*
    1115             :      * Instead of checking each offset individually, we check for overflow of
    1116             :      * pos fields once at the end.
    1117             :      */
    1118           8 :     if (dataoff > MAXSTRPOS)
    1119           0 :         ereport(ERROR,
    1120             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1121             :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1122             : 
    1123             :     /*
    1124             :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1125             :      * and collapse out any unused array entries.
    1126             :      */
    1127           8 :     output_size = ptr - ARRPTR(out);
    1128             :     Assert(output_size <= out->size);
    1129           8 :     out->size = output_size;
    1130           8 :     if (data != STRPTR(out))
    1131           4 :         memmove(STRPTR(out), data, dataoff);
    1132           8 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1133             :     Assert(output_bytes <= VARSIZE(out));
    1134           8 :     SET_VARSIZE(out, output_bytes);
    1135             : 
    1136           8 :     PG_FREE_IF_COPY(in1, 0);
    1137           8 :     PG_FREE_IF_COPY(in2, 1);
    1138           8 :     PG_RETURN_POINTER(out);
    1139             : }
    1140             : 
    1141             : /*
    1142             :  * Compare two strings by tsvector rules.
    1143             :  *
    1144             :  * if prefix = true then it returns zero value iff b has prefix a
    1145             :  */
    1146             : int32
    1147     4143838 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1148             : {
    1149             :     int         cmp;
    1150             : 
    1151     4143838 :     if (lena == 0)
    1152             :     {
    1153           0 :         if (prefix)
    1154           0 :             cmp = 0;            /* empty string is prefix of anything */
    1155             :         else
    1156           0 :             cmp = (lenb > 0) ? -1 : 0;
    1157             :     }
    1158     4143838 :     else if (lenb == 0)
    1159             :     {
    1160           0 :         cmp = (lena > 0) ? 1 : 0;
    1161             :     }
    1162             :     else
    1163             :     {
    1164     4143838 :         cmp = memcmp(a, b, Min(lena, lenb));
    1165             : 
    1166     4143838 :         if (prefix)
    1167             :         {
    1168       10972 :             if (cmp == 0 && lena > lenb)
    1169           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1170             :         }
    1171     4132866 :         else if (cmp == 0 && lena != lenb)
    1172             :         {
    1173       21500 :             cmp = (lena < lenb) ? -1 : 1;
    1174             :         }
    1175             :     }
    1176             : 
    1177     4143838 :     return cmp;
    1178             : }
    1179             : 
    1180             : /*
    1181             :  * Check weight info or/and fill 'data' with the required positions
    1182             :  */
    1183             : static TSTernaryValue
    1184       45412 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1185             :                ExecPhraseData *data)
    1186             : {
    1187       45412 :     TSTernaryValue result = TS_NO;
    1188             : 
    1189             :     Assert(data == NULL || data->npos == 0);
    1190             : 
    1191       45412 :     if (entry->haspos)
    1192             :     {
    1193             :         WordEntryPosVector *posvec;
    1194             : 
    1195             :         /*
    1196             :          * We can't use the _POSVECPTR macro here because the pointer to the
    1197             :          * tsvector's lexeme storage is already contained in chkval->values.
    1198             :          */
    1199        2992 :         posvec = (WordEntryPosVector *)
    1200        2992 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1201             : 
    1202        2992 :         if (val->weight && data)
    1203          32 :         {
    1204          32 :             WordEntryPos *posvec_iter = posvec->pos;
    1205             :             WordEntryPos *dptr;
    1206             : 
    1207             :             /*
    1208             :              * Filter position information by weights
    1209             :              */
    1210          32 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1211          32 :             data->allocated = true;
    1212             : 
    1213             :             /* Is there a position with a matching weight? */
    1214          64 :             while (posvec_iter < posvec->pos + posvec->npos)
    1215             :             {
    1216             :                 /* If true, append this position to the data->pos */
    1217          32 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1218             :                 {
    1219          16 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1220          16 :                     dptr++;
    1221             :                 }
    1222             : 
    1223          32 :                 posvec_iter++;
    1224             :             }
    1225             : 
    1226          32 :             data->npos = dptr - data->pos;
    1227             : 
    1228          32 :             if (data->npos > 0)
    1229          16 :                 result = TS_YES;
    1230             :             else
    1231             :             {
    1232          16 :                 pfree(data->pos);
    1233          16 :                 data->pos = NULL;
    1234          16 :                 data->allocated = false;
    1235             :             }
    1236             :         }
    1237        2960 :         else if (val->weight)
    1238             :         {
    1239         304 :             WordEntryPos *posvec_iter = posvec->pos;
    1240             : 
    1241             :             /* Is there a position with a matching weight? */
    1242         460 :             while (posvec_iter < posvec->pos + posvec->npos)
    1243             :             {
    1244         336 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1245             :                 {
    1246         180 :                     result = TS_YES;
    1247         180 :                     break;      /* no need to go further */
    1248             :                 }
    1249             : 
    1250         156 :                 posvec_iter++;
    1251             :             }
    1252             :         }
    1253        2656 :         else if (data)
    1254             :         {
    1255        1516 :             data->npos = posvec->npos;
    1256        1516 :             data->pos = posvec->pos;
    1257        1516 :             data->allocated = false;
    1258        1516 :             result = TS_YES;
    1259             :         }
    1260             :         else
    1261             :         {
    1262             :             /* simplest case: no weight check, positions not needed */
    1263        1140 :             result = TS_YES;
    1264             :         }
    1265             :     }
    1266             :     else
    1267             :     {
    1268             :         /*
    1269             :          * Position info is lacking, so if the caller requires it, we can only
    1270             :          * say that maybe there is a match.
    1271             :          *
    1272             :          * Notice, however, that we *don't* check val->weight here.
    1273             :          * Historically, stripped tsvectors are considered to match queries
    1274             :          * whether or not the query has a weight restriction; that's a little
    1275             :          * dubious but we'll preserve the behavior.
    1276             :          */
    1277       42420 :         if (data)
    1278       15396 :             result = TS_MAYBE;
    1279             :         else
    1280       27024 :             result = TS_YES;
    1281             :     }
    1282             : 
    1283       45412 :     return result;
    1284             : }
    1285             : 
    1286             : /*
    1287             :  * TS_execute callback for matching a tsquery operand to plain tsvector data
    1288             :  */
    1289             : static TSTernaryValue
    1290      189788 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1291             : {
    1292      189788 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1293      189788 :     WordEntry  *StopLow = chkval->arrb;
    1294      189788 :     WordEntry  *StopHigh = chkval->arre;
    1295      189788 :     WordEntry  *StopMiddle = StopHigh;
    1296      189788 :     TSTernaryValue res = TS_NO;
    1297             : 
    1298             :     /* Loop invariant: StopLow <= val < StopHigh */
    1299     1193668 :     while (StopLow < StopHigh)
    1300             :     {
    1301             :         int         difference;
    1302             : 
    1303     1039268 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1304     3117804 :         difference = tsCompareString(chkval->operand + val->distance,
    1305     1039268 :                                      val->length,
    1306     1039268 :                                      chkval->values + StopMiddle->pos,
    1307     1039268 :                                      StopMiddle->len,
    1308             :                                      false);
    1309             : 
    1310     1039268 :         if (difference == 0)
    1311             :         {
    1312             :             /* Check weight info & fill 'data' with positions */
    1313       35388 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1314       35388 :             break;
    1315             :         }
    1316     1003880 :         else if (difference > 0)
    1317      566180 :             StopLow = StopMiddle + 1;
    1318             :         else
    1319      437700 :             StopHigh = StopMiddle;
    1320             :     }
    1321             : 
    1322             :     /*
    1323             :      * If it's a prefix search, we should also consider lexemes that the
    1324             :      * search term is a prefix of (which will necessarily immediately follow
    1325             :      * the place we found in the above loop).  But we can skip them if there
    1326             :      * was a definite match on the exact term AND the caller doesn't need
    1327             :      * position info.
    1328             :      */
    1329      189788 :     if (val->prefix && (res != TS_YES || data))
    1330             :     {
    1331       11016 :         WordEntryPos *allpos = NULL;
    1332       11016 :         int         npos = 0,
    1333       11016 :                     totalpos = 0;
    1334             : 
    1335             :         /* adjust start position for corner case */
    1336       11016 :         if (StopLow >= StopHigh)
    1337       11008 :             StopMiddle = StopHigh;
    1338             : 
    1339             :         /* we don't try to re-use any data from the initial match */
    1340       11016 :         if (data)
    1341             :         {
    1342          24 :             if (data->allocated)
    1343           0 :                 pfree(data->pos);
    1344          24 :             data->pos = NULL;
    1345          24 :             data->allocated = false;
    1346          24 :             data->npos = 0;
    1347             :         }
    1348       11016 :         res = TS_NO;
    1349             : 
    1350       21040 :         while ((res != TS_YES || data) &&
    1351       21704 :                StopMiddle < chkval->arre &&
    1352       31860 :                tsCompareString(chkval->operand + val->distance,
    1353       10620 :                                val->length,
    1354       10620 :                                chkval->values + StopMiddle->pos,
    1355       10620 :                                StopMiddle->len,
    1356             :                                true) == 0)
    1357             :         {
    1358             :             TSTernaryValue subres;
    1359             : 
    1360       10024 :             subres = checkclass_str(chkval, StopMiddle, val, data);
    1361             : 
    1362       10024 :             if (subres != TS_NO)
    1363             :             {
    1364        9984 :                 if (data)
    1365             :                 {
    1366             :                     /*
    1367             :                      * We need to join position information
    1368             :                      */
    1369          28 :                     if (subres == TS_MAYBE)
    1370             :                     {
    1371             :                         /*
    1372             :                          * No position info for this match, so we must report
    1373             :                          * MAYBE overall.
    1374             :                          */
    1375           0 :                         res = TS_MAYBE;
    1376             :                         /* forget any previous positions */
    1377           0 :                         npos = 0;
    1378             :                         /* don't leak storage */
    1379           0 :                         if (allpos)
    1380           0 :                             pfree(allpos);
    1381           0 :                         break;
    1382             :                     }
    1383             : 
    1384          52 :                     while (npos + data->npos > totalpos)
    1385             :                     {
    1386          24 :                         if (totalpos == 0)
    1387             :                         {
    1388          24 :                             totalpos = 256;
    1389          24 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1390             :                         }
    1391             :                         else
    1392             :                         {
    1393           0 :                             totalpos *= 2;
    1394           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1395             :                         }
    1396             :                     }
    1397             : 
    1398          28 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1399          28 :                     npos += data->npos;
    1400             : 
    1401             :                     /* don't leak storage from individual matches */
    1402          28 :                     if (data->allocated)
    1403          16 :                         pfree(data->pos);
    1404          28 :                     data->pos = NULL;
    1405          28 :                     data->allocated = false;
    1406             :                     /* it's important to reset data->npos before next loop */
    1407          28 :                     data->npos = 0;
    1408             :                 }
    1409             :                 else
    1410             :                 {
    1411             :                     /* Don't need positions, just handle YES/MAYBE */
    1412        9956 :                     if (subres == TS_YES || res == TS_NO)
    1413        9956 :                         res = subres;
    1414             :                 }
    1415             :             }
    1416             : 
    1417       10024 :             StopMiddle++;
    1418             :         }
    1419             : 
    1420       11016 :         if (data && npos > 0)
    1421             :         {
    1422             :             /* Sort and make unique array of found positions */
    1423          24 :             data->pos = allpos;
    1424          24 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1425          24 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1426             :                                  compareWordEntryPos);
    1427          24 :             data->allocated = true;
    1428          24 :             res = TS_YES;
    1429             :         }
    1430             :     }
    1431             : 
    1432      189788 :     return res;
    1433             : }
    1434             : 
    1435             : /*
    1436             :  * Compute output position list for a tsquery operator in phrase mode.
    1437             :  *
    1438             :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1439             :  * returning the result list into *data.  The input position lists must be
    1440             :  * sorted and unique, and the output will be as well.
    1441             :  *
    1442             :  * data: pointer to initially-all-zeroes output struct, or NULL
    1443             :  * Ldata, Rdata: input position lists
    1444             :  * emit: bitmask of TSPO_XXX flags
    1445             :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1446             :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1447             :  * max_npos: maximum possible required size of output position array
    1448             :  *
    1449             :  * Loffset and Roffset should not be negative, else we risk trying to output
    1450             :  * negative positions, which won't fit into WordEntryPos.
    1451             :  *
    1452             :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1453             :  * we return it as TSTernaryValue.
    1454             :  *
    1455             :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1456             :  * returns TS_YES if any positions would have been emitted.
    1457             :  */
    1458             : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1459             : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1460             : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1461             : 
    1462             : static TSTernaryValue
    1463       19964 : TS_phrase_output(ExecPhraseData *data,
    1464             :                  ExecPhraseData *Ldata,
    1465             :                  ExecPhraseData *Rdata,
    1466             :                  int emit,
    1467             :                  int Loffset,
    1468             :                  int Roffset,
    1469             :                  int max_npos)
    1470             : {
    1471             :     int         Lindex,
    1472             :                 Rindex;
    1473             : 
    1474             :     /* Loop until both inputs are exhausted */
    1475       19964 :     Lindex = Rindex = 0;
    1476       20616 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1477             :     {
    1478             :         int         Lpos,
    1479             :                     Rpos;
    1480        1536 :         int         output_pos = 0;
    1481             : 
    1482             :         /*
    1483             :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1484             :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1485             :          */
    1486        1536 :         if (Lindex < Ldata->npos)
    1487        1108 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1488             :         else
    1489             :         {
    1490             :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1491         428 :             if (!(emit & TSPO_R_ONLY))
    1492         104 :                 break;
    1493         324 :             Lpos = INT_MAX;
    1494             :         }
    1495        1432 :         if (Rindex < Rdata->npos)
    1496        1260 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1497             :         else
    1498             :         {
    1499             :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1500         172 :             if (!(emit & TSPO_L_ONLY))
    1501         116 :                 break;
    1502          56 :             Rpos = INT_MAX;
    1503             :         }
    1504             : 
    1505             :         /* Merge-join the two input lists */
    1506        1316 :         if (Lpos < Rpos)
    1507             :         {
    1508             :             /* Lpos is not matched in Rdata, should we output it? */
    1509         320 :             if (emit & TSPO_L_ONLY)
    1510          76 :                 output_pos = Lpos;
    1511         320 :             Lindex++;
    1512             :         }
    1513         996 :         else if (Lpos == Rpos)
    1514             :         {
    1515             :             /* Lpos and Rpos match ... should we output it? */
    1516         496 :             if (emit & TSPO_BOTH)
    1517         432 :                 output_pos = Rpos;
    1518         496 :             Lindex++;
    1519         496 :             Rindex++;
    1520             :         }
    1521             :         else                    /* Lpos > Rpos */
    1522             :         {
    1523             :             /* Rpos is not matched in Ldata, should we output it? */
    1524         500 :             if (emit & TSPO_R_ONLY)
    1525         352 :                 output_pos = Rpos;
    1526         500 :             Rindex++;
    1527             :         }
    1528             : 
    1529        1316 :         if (output_pos > 0)
    1530             :         {
    1531         860 :             if (data)
    1532             :             {
    1533             :                 /* Store position, first allocating output array if needed */
    1534         196 :                 if (data->pos == NULL)
    1535             :                 {
    1536         164 :                     data->pos = (WordEntryPos *)
    1537         164 :                         palloc(max_npos * sizeof(WordEntryPos));
    1538         164 :                     data->allocated = true;
    1539             :                 }
    1540         196 :                 data->pos[data->npos++] = output_pos;
    1541             :             }
    1542             :             else
    1543             :             {
    1544             :                 /*
    1545             :                  * Exact positions not needed, so return TS_YES as soon as we
    1546             :                  * know there is at least one.
    1547             :                  */
    1548         664 :                 return TS_YES;
    1549             :             }
    1550             :         }
    1551             :     }
    1552             : 
    1553       19300 :     if (data && data->npos > 0)
    1554             :     {
    1555             :         /* Let's assert we didn't overrun the array */
    1556             :         Assert(data->npos <= max_npos);
    1557         164 :         return TS_YES;
    1558             :     }
    1559       19136 :     return TS_NO;
    1560             : }
    1561             : 
    1562             : /*
    1563             :  * Execute tsquery at or below an OP_PHRASE operator.
    1564             :  *
    1565             :  * This handles tsquery execution at recursion levels where we need to care
    1566             :  * about match locations.
    1567             :  *
    1568             :  * In addition to the same arguments used for TS_execute, the caller may pass
    1569             :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1570             :  * match position info on success.  data == NULL if no position data need be
    1571             :  * returned.  (In practice, outside callers pass NULL, and only the internal
    1572             :  * recursion cases pass a data pointer.)
    1573             :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1574             :  * This is OK because an outside call always starts from an OP_PHRASE node.
    1575             :  *
    1576             :  * The detailed semantics of the match data, given that the function returned
    1577             :  * TS_YES (successful match), are:
    1578             :  *
    1579             :  * npos > 0, negate = false:
    1580             :  *   query is matched at specified position(s) (and only those positions)
    1581             :  * npos > 0, negate = true:
    1582             :  *   query is matched at all positions *except* specified position(s)
    1583             :  * npos = 0, negate = true:
    1584             :  *   query is matched at all positions
    1585             :  * npos = 0, negate = false:
    1586             :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1587             :  *
    1588             :  * Successful matches also return a "width" value which is the match width in
    1589             :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1590             :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1591             :  * that when width > 0, the listed positions represent the ends of matches not
    1592             :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1593             :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1594             :  *
    1595             :  * If the TSExecuteCallback function reports that an operand is present
    1596             :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1597             :  * it is possible but not certain that the query is matched.
    1598             :  *
    1599             :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1600             :  * negate = false (which is the state initialized by the caller); but the
    1601             :  * "width" output in such cases is undefined.
    1602             :  */
    1603             : static TSTernaryValue
    1604      469190 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1605             :                   TSExecuteCallback chkcond,
    1606             :                   ExecPhraseData *data)
    1607             : {
    1608             :     ExecPhraseData Ldata,
    1609             :                 Rdata;
    1610             :     TSTernaryValue lmatch,
    1611             :                 rmatch;
    1612             :     int         Loffset,
    1613             :                 Roffset,
    1614             :                 maxwidth;
    1615             : 
    1616             :     /* since this function recurses, it could be driven to stack overflow */
    1617      469190 :     check_stack_depth();
    1618             : 
    1619      469190 :     if (curitem->type == QI_VAL)
    1620      230826 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1621             : 
    1622      238364 :     switch (curitem->qoperator.oper)
    1623             :     {
    1624       80554 :         case OP_NOT:
    1625             : 
    1626             :             /*
    1627             :              * We need not touch data->width, since a NOT operation does not
    1628             :              * change the match width.
    1629             :              */
    1630       80554 :             if (flags & TS_EXEC_SKIP_NOT)
    1631             :             {
    1632             :                 /* with SKIP_NOT, report NOT as "match everywhere" */
    1633             :                 Assert(data->npos == 0 && !data->negate);
    1634           0 :                 data->negate = true;
    1635           0 :                 return TS_YES;
    1636             :             }
    1637       80554 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1638             :             {
    1639       70414 :                 case TS_NO:
    1640             :                     /* change "match nowhere" to "match everywhere" */
    1641             :                     Assert(data->npos == 0 && !data->negate);
    1642       70414 :                     data->negate = true;
    1643       70414 :                     return TS_YES;
    1644         260 :                 case TS_YES:
    1645         260 :                     if (data->npos > 0)
    1646             :                     {
    1647             :                         /* we have some positions, invert negate flag */
    1648         256 :                         data->negate = !data->negate;
    1649         256 :                         return TS_YES;
    1650             :                     }
    1651           4 :                     else if (data->negate)
    1652             :                     {
    1653             :                         /* change "match everywhere" to "match nowhere" */
    1654           4 :                         data->negate = false;
    1655           4 :                         return TS_NO;
    1656             :                     }
    1657             :                     /* Should not get here if result was TS_YES */
    1658             :                     Assert(false);
    1659           0 :                     break;
    1660        9880 :                 case TS_MAYBE:
    1661             :                     /* match positions are, and remain, uncertain */
    1662        9880 :                     return TS_MAYBE;
    1663             :             }
    1664           0 :             break;
    1665             : 
    1666      157706 :         case OP_PHRASE:
    1667             :         case OP_AND:
    1668      157706 :             memset(&Ldata, 0, sizeof(Ldata));
    1669      157706 :             memset(&Rdata, 0, sizeof(Rdata));
    1670             : 
    1671      157706 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1672             :                                        arg, flags, chkcond, &Ldata);
    1673      157706 :             if (lmatch == TS_NO)
    1674       84510 :                 return TS_NO;
    1675             : 
    1676       73196 :             rmatch = TS_phrase_execute(curitem + 1,
    1677             :                                        arg, flags, chkcond, &Rdata);
    1678       73196 :             if (rmatch == TS_NO)
    1679       36168 :                 return TS_NO;
    1680             : 
    1681             :             /*
    1682             :              * If either operand has no position information, then we can't
    1683             :              * return reliable position data, only a MAYBE result.
    1684             :              */
    1685       37028 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1686       17160 :                 return TS_MAYBE;
    1687             : 
    1688       19868 :             if (curitem->qoperator.oper == OP_PHRASE)
    1689             :             {
    1690             :                 /*
    1691             :                  * Compute Loffset and Roffset suitable for phrase match, and
    1692             :                  * compute overall width of whole phrase match.
    1693             :                  */
    1694       19864 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1695       19864 :                 Roffset = 0;
    1696       19864 :                 if (data)
    1697         252 :                     data->width = curitem->qoperator.distance +
    1698          84 :                         Ldata.width + Rdata.width;
    1699             :             }
    1700             :             else
    1701             :             {
    1702             :                 /*
    1703             :                  * For OP_AND, set output width and alignment like OP_OR (see
    1704             :                  * comment below)
    1705             :                  */
    1706           4 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1707           4 :                 Loffset = maxwidth - Ldata.width;
    1708           4 :                 Roffset = maxwidth - Rdata.width;
    1709           4 :                 if (data)
    1710           4 :                     data->width = maxwidth;
    1711             :             }
    1712             : 
    1713       19868 :             if (Ldata.negate && Rdata.negate)
    1714             :             {
    1715             :                 /* !L & !R: treat as !(L | R) */
    1716       18960 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1717             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1718             :                                         Loffset, Roffset,
    1719       18960 :                                         Ldata.npos + Rdata.npos);
    1720       18960 :                 if (data)
    1721           0 :                     data->negate = true;
    1722       18960 :                 return TS_YES;
    1723             :             }
    1724         908 :             else if (Ldata.negate)
    1725             :             {
    1726             :                 /* !L & R */
    1727         300 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1728             :                                         TSPO_R_ONLY,
    1729             :                                         Loffset, Roffset,
    1730             :                                         Rdata.npos);
    1731             :             }
    1732         608 :             else if (Rdata.negate)
    1733             :             {
    1734             :                 /* L & !R */
    1735           4 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1736             :                                         TSPO_L_ONLY,
    1737             :                                         Loffset, Roffset,
    1738             :                                         Ldata.npos);
    1739             :             }
    1740             :             else
    1741             :             {
    1742             :                 /* straight AND */
    1743         604 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1744             :                                         TSPO_BOTH,
    1745             :                                         Loffset, Roffset,
    1746         604 :                                         Min(Ldata.npos, Rdata.npos));
    1747             :             }
    1748             : 
    1749         104 :         case OP_OR:
    1750         104 :             memset(&Ldata, 0, sizeof(Ldata));
    1751         104 :             memset(&Rdata, 0, sizeof(Rdata));
    1752             : 
    1753         104 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1754             :                                        arg, flags, chkcond, &Ldata);
    1755         104 :             rmatch = TS_phrase_execute(curitem + 1,
    1756             :                                        arg, flags, chkcond, &Rdata);
    1757             : 
    1758         104 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1759           8 :                 return TS_NO;
    1760             : 
    1761             :             /*
    1762             :              * If either operand has no position information, then we can't
    1763             :              * return reliable position data, only a MAYBE result.
    1764             :              */
    1765          96 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1766           0 :                 return TS_MAYBE;
    1767             : 
    1768             :             /*
    1769             :              * Cope with undefined output width from failed submatch.  (This
    1770             :              * takes less code than trying to ensure that all failure returns
    1771             :              * set data->width to zero.)
    1772             :              */
    1773          96 :             if (lmatch == TS_NO)
    1774          12 :                 Ldata.width = 0;
    1775          96 :             if (rmatch == TS_NO)
    1776          56 :                 Rdata.width = 0;
    1777             : 
    1778             :             /*
    1779             :              * For OP_AND and OP_OR, report the width of the wider of the two
    1780             :              * inputs, and align the narrower input's positions to the right
    1781             :              * end of that width.  This rule deals at least somewhat
    1782             :              * reasonably with cases like "x <-> (y | z <-> q)".
    1783             :              */
    1784          96 :             maxwidth = Max(Ldata.width, Rdata.width);
    1785          96 :             Loffset = maxwidth - Ldata.width;
    1786          96 :             Roffset = maxwidth - Rdata.width;
    1787          96 :             data->width = maxwidth;
    1788             : 
    1789          96 :             if (Ldata.negate && Rdata.negate)
    1790             :             {
    1791             :                 /* !L | !R: treat as !(L & R) */
    1792           4 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1793             :                                         TSPO_BOTH,
    1794             :                                         Loffset, Roffset,
    1795           4 :                                         Min(Ldata.npos, Rdata.npos));
    1796           4 :                 data->negate = true;
    1797           4 :                 return TS_YES;
    1798             :             }
    1799          92 :             else if (Ldata.negate)
    1800             :             {
    1801             :                 /* !L | R: treat as !(L & !R) */
    1802          20 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1803             :                                         TSPO_L_ONLY,
    1804             :                                         Loffset, Roffset,
    1805             :                                         Ldata.npos);
    1806          20 :                 data->negate = true;
    1807          20 :                 return TS_YES;
    1808             :             }
    1809          72 :             else if (Rdata.negate)
    1810             :             {
    1811             :                 /* L | !R: treat as !(!L & R) */
    1812           4 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1813             :                                         TSPO_R_ONLY,
    1814             :                                         Loffset, Roffset,
    1815             :                                         Rdata.npos);
    1816           4 :                 data->negate = true;
    1817           4 :                 return TS_YES;
    1818             :             }
    1819             :             else
    1820             :             {
    1821             :                 /* straight OR */
    1822          68 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1823             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1824             :                                         Loffset, Roffset,
    1825          68 :                                         Ldata.npos + Rdata.npos);
    1826             :             }
    1827             : 
    1828           0 :         default:
    1829           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1830             :     }
    1831             : 
    1832             :     /* not reachable, but keep compiler quiet */
    1833           0 :     return TS_NO;
    1834             : }
    1835             : 
    1836             : 
    1837             : /*
    1838             :  * Evaluate tsquery boolean expression.
    1839             :  *
    1840             :  * curitem: current tsquery item (initially, the first one)
    1841             :  * arg: opaque value to pass through to callback function
    1842             :  * flags: bitmask of flag bits shown in ts_utils.h
    1843             :  * chkcond: callback function to check whether a primitive value is present
    1844             :  */
    1845             : bool
    1846      372310 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1847             :            TSExecuteCallback chkcond)
    1848             : {
    1849             :     /*
    1850             :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1851             :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1852             :      * need to check again.
    1853             :      */
    1854      372310 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1855             : }
    1856             : 
    1857             : /*
    1858             :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1859             :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1860             :  * operator, we pass it off to TS_phrase_execute which does worry.
    1861             :  */
    1862             : static TSTernaryValue
    1863      704840 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1864             :                    TSExecuteCallback chkcond)
    1865             : {
    1866             :     TSTernaryValue lmatch;
    1867             : 
    1868             :     /* since this function recurses, it could be driven to stack overflow */
    1869      704840 :     check_stack_depth();
    1870             : 
    1871             :     /* ... and let's check for query cancel while we're at it */
    1872      704840 :     CHECK_FOR_INTERRUPTS();
    1873             : 
    1874      704840 :     if (curitem->type == QI_VAL)
    1875      282760 :         return chkcond(arg, (QueryOperand *) curitem,
    1876             :                        NULL /* don't need position info */ );
    1877             : 
    1878      422080 :     switch (curitem->qoperator.oper)
    1879             :     {
    1880      135868 :         case OP_NOT:
    1881      135868 :             if (flags & TS_EXEC_SKIP_NOT)
    1882           0 :                 return TS_YES;
    1883      135868 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1884             :             {
    1885      128216 :                 case TS_NO:
    1886      128216 :                     return TS_YES;
    1887        3260 :                 case TS_YES:
    1888        3260 :                     return TS_NO;
    1889        4392 :                 case TS_MAYBE:
    1890        4392 :                     return TS_MAYBE;
    1891             :             }
    1892           0 :             break;
    1893             : 
    1894       56150 :         case OP_AND:
    1895       56150 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1896             :                                         flags, chkcond);
    1897       56150 :             if (lmatch == TS_NO)
    1898       44630 :                 return TS_NO;
    1899       11520 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1900             :             {
    1901        6840 :                 case TS_NO:
    1902        6840 :                     return TS_NO;
    1903        2200 :                 case TS_YES:
    1904        2200 :                     return lmatch;
    1905        2480 :                 case TS_MAYBE:
    1906        2480 :                     return TS_MAYBE;
    1907             :             }
    1908           0 :             break;
    1909             : 
    1910       72536 :         case OP_OR:
    1911       72536 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1912             :                                         flags, chkcond);
    1913       72536 :             if (lmatch == TS_YES)
    1914       16080 :                 return TS_YES;
    1915       56456 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1916             :             {
    1917       38338 :                 case TS_NO:
    1918       38338 :                     return lmatch;
    1919        4928 :                 case TS_YES:
    1920        4928 :                     return TS_YES;
    1921       13190 :                 case TS_MAYBE:
    1922       13190 :                     return TS_MAYBE;
    1923             :             }
    1924           0 :             break;
    1925             : 
    1926      157526 :         case OP_PHRASE:
    1927             : 
    1928             :             /*
    1929             :              * If we get a MAYBE result, and the caller doesn't want that,
    1930             :              * convert it to NO.  It would be more consistent, perhaps, to
    1931             :              * return the result of TS_phrase_execute() verbatim and then
    1932             :              * convert MAYBE results at the top of the recursion.  But
    1933             :              * converting at the topmost phrase operator gives results that
    1934             :              * are bug-compatible with the old implementation, so do it like
    1935             :              * this for now.
    1936             :              */
    1937      157526 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1938             :             {
    1939      120822 :                 case TS_NO:
    1940      120822 :                     return TS_NO;
    1941       19548 :                 case TS_YES:
    1942       19548 :                     return TS_YES;
    1943       17156 :                 case TS_MAYBE:
    1944       17156 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1945             :             }
    1946           0 :             break;
    1947             : 
    1948           0 :         default:
    1949           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1950             :     }
    1951             : 
    1952             :     /* not reachable, but keep compiler quiet */
    1953           0 :     return TS_NO;
    1954             : }
    1955             : 
    1956             : /*
    1957             :  * Detect whether a tsquery boolean expression requires any positive matches
    1958             :  * to values shown in the tsquery.
    1959             :  *
    1960             :  * This is needed to know whether a GIN index search requires full index scan.
    1961             :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    1962             :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    1963             :  */
    1964             : bool
    1965         556 : tsquery_requires_match(QueryItem *curitem)
    1966             : {
    1967             :     /* since this function recurses, it could be driven to stack overflow */
    1968         556 :     check_stack_depth();
    1969             : 
    1970         556 :     if (curitem->type == QI_VAL)
    1971         264 :         return true;
    1972             : 
    1973         292 :     switch (curitem->qoperator.oper)
    1974             :     {
    1975         112 :         case OP_NOT:
    1976             : 
    1977             :             /*
    1978             :              * Assume there are no required matches underneath a NOT.  For
    1979             :              * some cases with nested NOTs, we could prove there's a required
    1980             :              * match, but it seems unlikely to be worth the trouble.
    1981             :              */
    1982         112 :             return false;
    1983             : 
    1984         136 :         case OP_PHRASE:
    1985             : 
    1986             :             /*
    1987             :              * Treat OP_PHRASE as OP_AND here
    1988             :              */
    1989             :         case OP_AND:
    1990             :             /* If either side requires a match, we're good */
    1991         136 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1992         104 :                 return true;
    1993             :             else
    1994          32 :                 return tsquery_requires_match(curitem + 1);
    1995             : 
    1996          44 :         case OP_OR:
    1997             :             /* Both sides must require a match */
    1998          44 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1999          44 :                 return tsquery_requires_match(curitem + 1);
    2000             :             else
    2001           0 :                 return false;
    2002             : 
    2003           0 :         default:
    2004           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2005             :     }
    2006             : 
    2007             :     /* not reachable, but keep compiler quiet */
    2008             :     return false;
    2009             : }
    2010             : 
    2011             : /*
    2012             :  * boolean operations
    2013             :  */
    2014             : Datum
    2015          40 : ts_match_qv(PG_FUNCTION_ARGS)
    2016             : {
    2017          40 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    2018             :                                         PG_GETARG_DATUM(1),
    2019             :                                         PG_GETARG_DATUM(0)));
    2020             : }
    2021             : 
    2022             : Datum
    2023      147136 : ts_match_vq(PG_FUNCTION_ARGS)
    2024             : {
    2025      147136 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    2026      147136 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2027             :     CHKVAL      chkval;
    2028             :     bool        result;
    2029             : 
    2030             :     /* empty query matches nothing */
    2031      147136 :     if (!query->size)
    2032             :     {
    2033           0 :         PG_FREE_IF_COPY(val, 0);
    2034           0 :         PG_FREE_IF_COPY(query, 1);
    2035           0 :         PG_RETURN_BOOL(false);
    2036             :     }
    2037             : 
    2038      147136 :     chkval.arrb = ARRPTR(val);
    2039      147136 :     chkval.arre = chkval.arrb + val->size;
    2040      147136 :     chkval.values = STRPTR(val);
    2041      147136 :     chkval.operand = GETOPERAND(query);
    2042      147136 :     result = TS_execute(GETQUERY(query),
    2043             :                         &chkval,
    2044             :                         TS_EXEC_EMPTY,
    2045             :                         checkcondition_str);
    2046             : 
    2047      147136 :     PG_FREE_IF_COPY(val, 0);
    2048      147136 :     PG_FREE_IF_COPY(query, 1);
    2049      147136 :     PG_RETURN_BOOL(result);
    2050             : }
    2051             : 
    2052             : Datum
    2053           0 : ts_match_tt(PG_FUNCTION_ARGS)
    2054             : {
    2055             :     TSVector    vector;
    2056             :     TSQuery     query;
    2057             :     bool        res;
    2058             : 
    2059           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2060             :                                                   PG_GETARG_DATUM(0)));
    2061           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2062             :                                                 PG_GETARG_DATUM(1)));
    2063             : 
    2064           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2065             :                                            TSVectorGetDatum(vector),
    2066             :                                            TSQueryGetDatum(query)));
    2067             : 
    2068           0 :     pfree(vector);
    2069           0 :     pfree(query);
    2070             : 
    2071           0 :     PG_RETURN_BOOL(res);
    2072             : }
    2073             : 
    2074             : Datum
    2075           0 : ts_match_tq(PG_FUNCTION_ARGS)
    2076             : {
    2077             :     TSVector    vector;
    2078           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2079             :     bool        res;
    2080             : 
    2081           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2082             :                                                   PG_GETARG_DATUM(0)));
    2083             : 
    2084           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2085             :                                            TSVectorGetDatum(vector),
    2086             :                                            TSQueryGetDatum(query)));
    2087             : 
    2088           0 :     pfree(vector);
    2089           0 :     PG_FREE_IF_COPY(query, 1);
    2090             : 
    2091           0 :     PG_RETURN_BOOL(res);
    2092             : }
    2093             : 
    2094             : /*
    2095             :  * ts_stat statistic function support
    2096             :  */
    2097             : 
    2098             : 
    2099             : /*
    2100             :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2101             :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2102             :  */
    2103             : static int
    2104        5452 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2105             : {
    2106        5452 :     int         len = POSDATALEN(txt, wptr);
    2107        5452 :     int         num = 0;
    2108        5452 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2109             : 
    2110       11100 :     while (len--)
    2111             :     {
    2112        5648 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2113           8 :             num++;
    2114        5648 :         ptr++;
    2115             :     }
    2116        5452 :     return num;
    2117             : }
    2118             : 
    2119             : #define compareStatWord(a,e,t)                          \
    2120             :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2121             :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2122             :                     false)
    2123             : 
    2124             : static void
    2125      230416 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2126             : {
    2127      230416 :     WordEntry  *we = ARRPTR(txt) + off;
    2128      230416 :     StatEntry  *node = stat->root,
    2129      230416 :                *pnode = NULL;
    2130             :     int         n,
    2131      230416 :                 res = 0;
    2132      230416 :     uint32      depth = 1;
    2133             : 
    2134      230416 :     if (stat->weight == 0)
    2135      115208 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2136             :     else
    2137      115208 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2138             : 
    2139      230416 :     if (n == 0)
    2140      115204 :         return;                 /* nothing to insert */
    2141             : 
    2142     1163640 :     while (node)
    2143             :     {
    2144     1159064 :         res = compareStatWord(node, we, txt);
    2145             : 
    2146     1159064 :         if (res == 0)
    2147             :         {
    2148      110636 :             break;
    2149             :         }
    2150             :         else
    2151             :         {
    2152     1048428 :             pnode = node;
    2153     1048428 :             node = (res < 0) ? node->left : node->right;
    2154             :         }
    2155     1048428 :         depth++;
    2156             :     }
    2157             : 
    2158      115212 :     if (depth > stat->maxdepth)
    2159          84 :         stat->maxdepth = depth;
    2160             : 
    2161      115212 :     if (node == NULL)
    2162             :     {
    2163        4576 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2164        4576 :         node->left = node->right = NULL;
    2165        4576 :         node->ndoc = 1;
    2166        4576 :         node->nentry = n;
    2167        4576 :         node->lenlexeme = we->len;
    2168        4576 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2169             : 
    2170        4576 :         if (pnode == NULL)
    2171             :         {
    2172           8 :             stat->root = node;
    2173             :         }
    2174             :         else
    2175             :         {
    2176        4568 :             if (res < 0)
    2177        2248 :                 pnode->left = node;
    2178             :             else
    2179        2320 :                 pnode->right = node;
    2180             :         }
    2181             : 
    2182             :     }
    2183             :     else
    2184             :     {
    2185      110636 :         node->ndoc++;
    2186      110636 :         node->nentry += n;
    2187             :     }
    2188             : }
    2189             : 
    2190             : static void
    2191      330256 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2192             :                     uint32 low, uint32 high, uint32 offset)
    2193             : {
    2194             :     uint32      pos;
    2195      330256 :     uint32      middle = (low + high) >> 1;
    2196             : 
    2197      330256 :     pos = (low + middle) >> 1;
    2198      330256 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2199      113552 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2200      330256 :     pos = (high + middle + 1) >> 1;
    2201      330256 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2202      112856 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2203             : 
    2204      330256 :     if (low != middle)
    2205      165128 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2206      330256 :     if (high != middle + 1)
    2207      161120 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2208      330256 : }
    2209             : 
    2210             : /*
    2211             :  * This is written like a custom aggregate function, because the
    2212             :  * original plan was to do just that. Unfortunately, an aggregate function
    2213             :  * can't return a set, so that plan was abandoned. If that limitation is
    2214             :  * lifted in the future, ts_stat could be a real aggregate function so that
    2215             :  * you could use it like this:
    2216             :  *
    2217             :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2218             :  *
    2219             :  *  where vector_column is a tsvector-type column in vector_table.
    2220             :  */
    2221             : 
    2222             : static TSVectorStat *
    2223        4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2224             : {
    2225        4072 :     TSVector    txt = DatumGetTSVector(data);
    2226             :     uint32      i,
    2227        4072 :                 nbit = 0,
    2228             :                 offset;
    2229             : 
    2230        4072 :     if (stat == NULL)
    2231             :     {                           /* Init in first */
    2232           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2233           0 :         stat->maxdepth = 1;
    2234             :     }
    2235             : 
    2236             :     /* simple check of correctness */
    2237        4072 :     if (txt == NULL || txt->size == 0)
    2238             :     {
    2239          64 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2240          64 :             pfree(txt);
    2241          64 :         return stat;
    2242             :     }
    2243             : 
    2244        4008 :     i = txt->size - 1;
    2245       28480 :     for (; i > 0; i >>= 1)
    2246       24472 :         nbit++;
    2247             : 
    2248        4008 :     nbit = 1 << nbit;
    2249        4008 :     offset = (nbit - txt->size) / 2;
    2250             : 
    2251        4008 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2252        4008 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2253             : 
    2254        4008 :     return stat;
    2255             : }
    2256             : 
    2257             : static void
    2258           8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2259             :                    TSVectorStat *stat)
    2260             : {
    2261             :     TupleDesc   tupdesc;
    2262             :     MemoryContext oldcontext;
    2263             :     StatEntry  *node;
    2264             : 
    2265           8 :     funcctx->user_fctx = (void *) stat;
    2266             : 
    2267           8 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2268             : 
    2269           8 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2270           8 :     stat->stackpos = 0;
    2271             : 
    2272           8 :     node = stat->root;
    2273             :     /* find leftmost value */
    2274           8 :     if (node == NULL)
    2275           0 :         stat->stack[stat->stackpos] = NULL;
    2276             :     else
    2277             :         for (;;)
    2278             :         {
    2279          24 :             stat->stack[stat->stackpos] = node;
    2280          32 :             if (node->left)
    2281             :             {
    2282          24 :                 stat->stackpos++;
    2283          24 :                 node = node->left;
    2284             :             }
    2285             :             else
    2286           8 :                 break;
    2287             :         }
    2288             :     Assert(stat->stackpos <= stat->maxdepth);
    2289             : 
    2290           8 :     tupdesc = CreateTemplateTupleDesc(3);
    2291           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
    2292             :                        TEXTOID, -1, 0);
    2293           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
    2294             :                        INT4OID, -1, 0);
    2295           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
    2296             :                        INT4OID, -1, 0);
    2297           8 :     funcctx->tuple_desc = BlessTupleDesc(tupdesc);
    2298           8 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2299             : 
    2300           8 :     MemoryContextSwitchTo(oldcontext);
    2301           8 : }
    2302             : 
    2303             : static StatEntry *
    2304        9152 : walkStatEntryTree(TSVectorStat *stat)
    2305             : {
    2306        9152 :     StatEntry  *node = stat->stack[stat->stackpos];
    2307             : 
    2308        9152 :     if (node == NULL)
    2309           0 :         return NULL;
    2310             : 
    2311        9152 :     if (node->ndoc != 0)
    2312             :     {
    2313             :         /* return entry itself: we already was at left sublink */
    2314        2256 :         return node;
    2315             :     }
    2316        6896 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2317             :     {
    2318             :         /* go on right sublink */
    2319        2320 :         stat->stackpos++;
    2320        2320 :         node = node->right;
    2321             : 
    2322             :         /* find most-left value */
    2323             :         for (;;)
    2324             :         {
    2325        2224 :             stat->stack[stat->stackpos] = node;
    2326        4544 :             if (node->left)
    2327             :             {
    2328        2224 :                 stat->stackpos++;
    2329        2224 :                 node = node->left;
    2330             :             }
    2331             :             else
    2332        2320 :                 break;
    2333             :         }
    2334        2320 :         Assert(stat->stackpos <= stat->maxdepth);
    2335             :     }
    2336             :     else
    2337             :     {
    2338             :         /* we already return all left subtree, itself and  right subtree */
    2339        4576 :         if (stat->stackpos == 0)
    2340           8 :             return NULL;
    2341             : 
    2342        4568 :         stat->stackpos--;
    2343        4568 :         return walkStatEntryTree(stat);
    2344             :     }
    2345             : 
    2346        2320 :     return node;
    2347             : }
    2348             : 
    2349             : static Datum
    2350        4584 : ts_process_call(FuncCallContext *funcctx)
    2351             : {
    2352             :     TSVectorStat *st;
    2353             :     StatEntry  *entry;
    2354             : 
    2355        4584 :     st = (TSVectorStat *) funcctx->user_fctx;
    2356             : 
    2357        4584 :     entry = walkStatEntryTree(st);
    2358             : 
    2359        4584 :     if (entry != NULL)
    2360             :     {
    2361             :         Datum       result;
    2362             :         char       *values[3];
    2363             :         char        ndoc[16];
    2364             :         char        nentry[16];
    2365             :         HeapTuple   tuple;
    2366             : 
    2367        4576 :         values[0] = palloc(entry->lenlexeme + 1);
    2368        4576 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2369        4576 :         (values[0])[entry->lenlexeme] = '\0';
    2370        4576 :         sprintf(ndoc, "%d", entry->ndoc);
    2371        4576 :         values[1] = ndoc;
    2372        4576 :         sprintf(nentry, "%d", entry->nentry);
    2373        4576 :         values[2] = nentry;
    2374             : 
    2375        4576 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2376        4576 :         result = HeapTupleGetDatum(tuple);
    2377             : 
    2378        4576 :         pfree(values[0]);
    2379             : 
    2380             :         /* mark entry as already visited */
    2381        4576 :         entry->ndoc = 0;
    2382             : 
    2383        4576 :         return result;
    2384             :     }
    2385             : 
    2386           8 :     return (Datum) 0;
    2387             : }
    2388             : 
    2389             : static TSVectorStat *
    2390           8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2391             : {
    2392           8 :     char       *query = text_to_cstring(txt);
    2393             :     TSVectorStat *stat;
    2394             :     bool        isnull;
    2395             :     Portal      portal;
    2396             :     SPIPlanPtr  plan;
    2397             : 
    2398           8 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2399             :         /* internal error */
    2400           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2401             : 
    2402           8 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2403             :         /* internal error */
    2404           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2405             : 
    2406           8 :     SPI_cursor_fetch(portal, true, 100);
    2407             : 
    2408           8 :     if (SPI_tuptable == NULL ||
    2409           8 :         SPI_tuptable->tupdesc->natts != 1 ||
    2410           8 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2411             :                            TSVECTOROID))
    2412           0 :         ereport(ERROR,
    2413             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2414             :                  errmsg("ts_stat query must return one tsvector column")));
    2415             : 
    2416           8 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2417           8 :     stat->maxdepth = 1;
    2418             : 
    2419           8 :     if (ws)
    2420             :     {
    2421             :         char       *buf;
    2422             : 
    2423           4 :         buf = VARDATA_ANY(ws);
    2424          12 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2425             :         {
    2426           8 :             if (pg_mblen(buf) == 1)
    2427             :             {
    2428           8 :                 switch (*buf)
    2429             :                 {
    2430           4 :                     case 'A':
    2431             :                     case 'a':
    2432           4 :                         stat->weight |= 1 << 3;
    2433           4 :                         break;
    2434           4 :                     case 'B':
    2435             :                     case 'b':
    2436           4 :                         stat->weight |= 1 << 2;
    2437           4 :                         break;
    2438           0 :                     case 'C':
    2439             :                     case 'c':
    2440           0 :                         stat->weight |= 1 << 1;
    2441           0 :                         break;
    2442           0 :                     case 'D':
    2443             :                     case 'd':
    2444           0 :                         stat->weight |= 1;
    2445           0 :                         break;
    2446           0 :                     default:
    2447           0 :                         stat->weight |= 0;
    2448             :                 }
    2449           0 :             }
    2450           8 :             buf += pg_mblen(buf);
    2451             :         }
    2452             :     }
    2453             : 
    2454          56 :     while (SPI_processed > 0)
    2455             :     {
    2456             :         uint64      i;
    2457             : 
    2458        4120 :         for (i = 0; i < SPI_processed; i++)
    2459             :         {
    2460        4072 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2461             : 
    2462        4072 :             if (!isnull)
    2463        4072 :                 stat = ts_accum(persistentContext, stat, data);
    2464             :         }
    2465             : 
    2466          48 :         SPI_freetuptable(SPI_tuptable);
    2467          48 :         SPI_cursor_fetch(portal, true, 100);
    2468             :     }
    2469             : 
    2470           8 :     SPI_freetuptable(SPI_tuptable);
    2471           8 :     SPI_cursor_close(portal);
    2472           8 :     SPI_freeplan(plan);
    2473           8 :     pfree(query);
    2474             : 
    2475           8 :     return stat;
    2476             : }
    2477             : 
    2478             : Datum
    2479        4576 : ts_stat1(PG_FUNCTION_ARGS)
    2480             : {
    2481             :     FuncCallContext *funcctx;
    2482             :     Datum       result;
    2483             : 
    2484        4576 :     if (SRF_IS_FIRSTCALL())
    2485             :     {
    2486             :         TSVectorStat *stat;
    2487           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2488             : 
    2489           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2490           4 :         SPI_connect();
    2491           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2492           4 :         PG_FREE_IF_COPY(txt, 0);
    2493           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2494           4 :         SPI_finish();
    2495             :     }
    2496             : 
    2497        4576 :     funcctx = SRF_PERCALL_SETUP();
    2498        4576 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2499        4572 :         SRF_RETURN_NEXT(funcctx, result);
    2500           4 :     SRF_RETURN_DONE(funcctx);
    2501             : }
    2502             : 
    2503             : Datum
    2504           8 : ts_stat2(PG_FUNCTION_ARGS)
    2505             : {
    2506             :     FuncCallContext *funcctx;
    2507             :     Datum       result;
    2508             : 
    2509           8 :     if (SRF_IS_FIRSTCALL())
    2510             :     {
    2511             :         TSVectorStat *stat;
    2512           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2513           4 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2514             : 
    2515           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2516           4 :         SPI_connect();
    2517           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2518           4 :         PG_FREE_IF_COPY(txt, 0);
    2519           4 :         PG_FREE_IF_COPY(ws, 1);
    2520           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2521           4 :         SPI_finish();
    2522             :     }
    2523             : 
    2524           8 :     funcctx = SRF_PERCALL_SETUP();
    2525           8 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2526           4 :         SRF_RETURN_NEXT(funcctx, result);
    2527           4 :     SRF_RETURN_DONE(funcctx);
    2528             : }
    2529             : 
    2530             : 
    2531             : /*
    2532             :  * Triggers for automatic update of a tsvector column from text column(s)
    2533             :  *
    2534             :  * Trigger arguments are either
    2535             :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2536             :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2537             :  * ie, tsconfig can either be specified by name, or indirectly as the
    2538             :  * contents of a regconfig field in the row.  If the name is used, it must
    2539             :  * be explicitly schema-qualified.
    2540             :  */
    2541             : Datum
    2542          12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2543             : {
    2544          12 :     return tsvector_update_trigger(fcinfo, false);
    2545             : }
    2546             : 
    2547             : Datum
    2548           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2549             : {
    2550           0 :     return tsvector_update_trigger(fcinfo, true);
    2551             : }
    2552             : 
    2553             : static Datum
    2554          12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2555             : {
    2556             :     TriggerData *trigdata;
    2557             :     Trigger    *trigger;
    2558             :     Relation    rel;
    2559          12 :     HeapTuple   rettuple = NULL;
    2560             :     int         tsvector_attr_num,
    2561             :                 i;
    2562             :     ParsedText  prs;
    2563             :     Datum       datum;
    2564             :     bool        isnull;
    2565             :     text       *txt;
    2566             :     Oid         cfgId;
    2567             :     bool        update_needed;
    2568             : 
    2569             :     /* Check call context */
    2570          12 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2571           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2572             : 
    2573          12 :     trigdata = (TriggerData *) fcinfo->context;
    2574          12 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2575           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2576          12 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2577           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2578             : 
    2579          12 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2580             :     {
    2581           8 :         rettuple = trigdata->tg_trigtuple;
    2582           8 :         update_needed = true;
    2583             :     }
    2584           4 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2585             :     {
    2586           4 :         rettuple = trigdata->tg_newtuple;
    2587           4 :         update_needed = false;  /* computed below */
    2588             :     }
    2589             :     else
    2590           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2591             : 
    2592          12 :     trigger = trigdata->tg_trigger;
    2593          12 :     rel = trigdata->tg_relation;
    2594             : 
    2595          12 :     if (trigger->tgnargs < 3)
    2596           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2597             : 
    2598             :     /* Find the target tsvector column */
    2599          12 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2600          12 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2601           0 :         ereport(ERROR,
    2602             :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2603             :                  errmsg("tsvector column \"%s\" does not exist",
    2604             :                         trigger->tgargs[0])));
    2605             :     /* This will effectively reject system columns, so no separate test: */
    2606          12 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2607             :                            TSVECTOROID))
    2608           0 :         ereport(ERROR,
    2609             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2610             :                  errmsg("column \"%s\" is not of tsvector type",
    2611             :                         trigger->tgargs[0])));
    2612             : 
    2613             :     /* Find the configuration to use */
    2614          12 :     if (config_column)
    2615             :     {
    2616             :         int         config_attr_num;
    2617             : 
    2618           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2619           0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2620           0 :             ereport(ERROR,
    2621             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2622             :                      errmsg("configuration column \"%s\" does not exist",
    2623             :                             trigger->tgargs[1])));
    2624           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2625             :                                REGCONFIGOID))
    2626           0 :             ereport(ERROR,
    2627             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2628             :                      errmsg("column \"%s\" is not of regconfig type",
    2629             :                             trigger->tgargs[1])));
    2630             : 
    2631           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2632           0 :         if (isnull)
    2633           0 :             ereport(ERROR,
    2634             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2635             :                      errmsg("configuration column \"%s\" must not be null",
    2636             :                             trigger->tgargs[1])));
    2637           0 :         cfgId = DatumGetObjectId(datum);
    2638             :     }
    2639             :     else
    2640             :     {
    2641             :         List       *names;
    2642             : 
    2643          12 :         names = stringToQualifiedNameList(trigger->tgargs[1]);
    2644             :         /* require a schema so that results are not search path dependent */
    2645          12 :         if (list_length(names) < 2)
    2646           0 :             ereport(ERROR,
    2647             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2648             :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2649             :                             trigger->tgargs[1])));
    2650          12 :         cfgId = get_ts_config_oid(names, false);
    2651             :     }
    2652             : 
    2653             :     /* initialize parse state */
    2654          12 :     prs.lenwords = 32;
    2655          12 :     prs.curwords = 0;
    2656          12 :     prs.pos = 0;
    2657          12 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2658             : 
    2659             :     /* find all words in indexable column(s) */
    2660          24 :     for (i = 2; i < trigger->tgnargs; i++)
    2661             :     {
    2662             :         int         numattr;
    2663             : 
    2664          12 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2665          12 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2666           0 :             ereport(ERROR,
    2667             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2668             :                      errmsg("column \"%s\" does not exist",
    2669             :                             trigger->tgargs[i])));
    2670          12 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2671           0 :             ereport(ERROR,
    2672             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2673             :                      errmsg("column \"%s\" is not of a character type",
    2674             :                             trigger->tgargs[i])));
    2675             : 
    2676          12 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2677           4 :             update_needed = true;
    2678             : 
    2679          12 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2680          12 :         if (isnull)
    2681           4 :             continue;
    2682             : 
    2683           8 :         txt = DatumGetTextPP(datum);
    2684             : 
    2685           8 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2686             : 
    2687           8 :         if (txt != (text *) DatumGetPointer(datum))
    2688           0 :             pfree(txt);
    2689             :     }
    2690             : 
    2691          12 :     if (update_needed)
    2692             :     {
    2693             :         /* make tsvector value */
    2694          12 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2695          12 :         isnull = false;
    2696             : 
    2697             :         /* and insert it into tuple */
    2698          12 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2699             :                                              1, &tsvector_attr_num,
    2700             :                                              &datum, &isnull);
    2701             : 
    2702          12 :         pfree(DatumGetPointer(datum));
    2703             :     }
    2704             : 
    2705          12 :     return PointerGetDatum(rettuple);
    2706             : }

Generated by: LCOV version 1.13