LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19beta1 Lines: 88.9 % 1149 1022
Test Date: 2026-06-27 05:16:41 Functions: 84.9 % 53 45
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * tsvector_op.c
       4              :  *    operations over tsvector
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/utils/adt/tsvector_op.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #include "postgres.h"
      15              : 
      16              : #include <limits.h>
      17              : 
      18              : #include "access/htup_details.h"
      19              : #include "catalog/namespace.h"
      20              : #include "catalog/pg_type.h"
      21              : #include "commands/trigger.h"
      22              : #include "common/int.h"
      23              : #include "executor/spi.h"
      24              : #include "funcapi.h"
      25              : #include "lib/qunique.h"
      26              : #include "mb/pg_wchar.h"
      27              : #include "miscadmin.h"
      28              : #include "parser/parse_coerce.h"
      29              : #include "tsearch/ts_utils.h"
      30              : #include "utils/array.h"
      31              : #include "utils/builtins.h"
      32              : #include "utils/regproc.h"
      33              : #include "utils/rel.h"
      34              : 
      35              : 
      36              : typedef struct
      37              : {
      38              :     WordEntry  *arrb;
      39              :     WordEntry  *arre;
      40              :     char       *values;
      41              :     char       *operand;
      42              : } CHKVAL;
      43              : 
      44              : 
      45              : typedef struct StatEntry
      46              : {
      47              :     uint32      ndoc;           /* zero indicates that we were already here
      48              :                                  * while walking through the tree */
      49              :     uint32      nentry;
      50              :     struct StatEntry *left;
      51              :     struct StatEntry *right;
      52              :     uint32      lenlexeme;
      53              :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54              : } StatEntry;
      55              : 
      56              : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57              : 
      58              : typedef struct
      59              : {
      60              :     int32       weight;
      61              : 
      62              :     uint32      maxdepth;
      63              : 
      64              :     StatEntry **stack;
      65              :     uint32      stackpos;
      66              : 
      67              :     StatEntry  *root;
      68              : } TSVectorStat;
      69              : 
      70              : 
      71              : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      72              :                                          uint32 flags,
      73              :                                          TSExecuteCallback chkcond);
      74              : static bool TS_execute_locations_recurse(QueryItem *curitem,
      75              :                                          void *arg,
      76              :                                          TSExecuteCallback chkcond,
      77              :                                          List **locations);
      78              : static int  tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len);
      79              : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      80              : 
      81              : 
      82              : /*
      83              :  * Order: haspos, len, word, for all positions (pos, weight)
      84              :  */
      85              : static int
      86          233 : silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
      87              : {
      88          233 :     if (VARSIZE(a) < VARSIZE(b))
      89            0 :         return -1;
      90          233 :     else if (VARSIZE(a) > VARSIZE(b))
      91            0 :         return 1;
      92          233 :     else if (a->size < b->size)
      93            0 :         return -1;
      94          233 :     else if (a->size > b->size)
      95            0 :         return 1;
      96              :     else
      97              :     {
      98          233 :         const WordEntry *aptr = ARRPTR(a);
      99          233 :         const WordEntry *bptr = ARRPTR(b);
     100          233 :         int         i = 0;
     101              :         int         res;
     102              : 
     103              : 
     104          268 :         for (i = 0; i < a->size; i++)
     105              :         {
     106          235 :             if (aptr->haspos != bptr->haspos)
     107              :             {
     108            0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     109              :             }
     110          235 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     111              :             {
     112          200 :                 return res;
     113              :             }
     114           35 :             else if (aptr->haspos)
     115              :             {
     116           32 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     117           32 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     118              :                 int         j;
     119              : 
     120           32 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     121            0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     122              : 
     123           64 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     124              :                 {
     125           32 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     126              :                     {
     127            0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     128              :                     }
     129           32 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     130              :                     {
     131            0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     132              :                     }
     133           32 :                     ap++, bp++;
     134              :                 }
     135              :             }
     136              : 
     137           35 :             aptr++;
     138           35 :             bptr++;
     139              :         }
     140              :     }
     141              : 
     142           33 :     return 0;
     143              : }
     144              : 
     145              : #define TSVECTORCMPFUNC( type, action, ret )            \
     146              : Datum                                                   \
     147              : tsvector_##type(PG_FUNCTION_ARGS)                       \
     148              : {                                                       \
     149              :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     150              :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     151              :     int         res = silly_cmp_tsvector(a, b);         \
     152              :     PG_FREE_IF_COPY(a,0);                               \
     153              :     PG_FREE_IF_COPY(b,1);                               \
     154              :     PG_RETURN_##ret( res action 0 );                    \
     155              : }   \
     156              : /* keep compiler quiet - no extra ; */                  \
     157              : extern int no_such_variable
     158              : 
     159            0 : TSVECTORCMPFUNC(lt, <, BOOL);
     160            0 : TSVECTORCMPFUNC(le, <=, BOOL);
     161            1 : TSVECTORCMPFUNC(eq, ==, BOOL);
     162            0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     163            0 : TSVECTORCMPFUNC(gt, >, BOOL);
     164            0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     165          232 : TSVECTORCMPFUNC(cmp, +, INT32);
     166              : 
     167              : Datum
     168           73 : tsvector_strip(PG_FUNCTION_ARGS)
     169              : {
     170           73 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     171              :     TSVector    out;
     172              :     int         i,
     173           73 :                 len = 0;
     174           73 :     WordEntry  *arrin = ARRPTR(in),
     175              :                *arrout;
     176              :     char       *cur;
     177              : 
     178          261 :     for (i = 0; i < in->size; i++)
     179          188 :         len += arrin[i].len;
     180              : 
     181           73 :     len = CALCDATASIZE(in->size, len);
     182           73 :     out = (TSVector) palloc0(len);
     183           73 :     SET_VARSIZE(out, len);
     184           73 :     out->size = in->size;
     185           73 :     arrout = ARRPTR(out);
     186           73 :     cur = STRPTR(out);
     187          261 :     for (i = 0; i < in->size; i++)
     188              :     {
     189          188 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     190          188 :         arrout[i].haspos = 0;
     191          188 :         arrout[i].len = arrin[i].len;
     192          188 :         arrout[i].pos = cur - STRPTR(out);
     193          188 :         cur += arrout[i].len;
     194              :     }
     195              : 
     196           73 :     PG_FREE_IF_COPY(in, 0);
     197           73 :     PG_RETURN_POINTER(out);
     198              : }
     199              : 
     200              : Datum
     201            7 : tsvector_length(PG_FUNCTION_ARGS)
     202              : {
     203            7 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     204            7 :     int32       ret = in->size;
     205              : 
     206            7 :     PG_FREE_IF_COPY(in, 0);
     207            7 :     PG_RETURN_INT32(ret);
     208              : }
     209              : 
     210              : static int
     211           48 : parse_weight(char cw)
     212              : {
     213              :     int         w;
     214              : 
     215           48 :     switch (cw)
     216              :     {
     217           14 :         case 'A':
     218              :         case 'a':
     219           14 :             w = 3;
     220           14 :             break;
     221            4 :         case 'B':
     222              :         case 'b':
     223            4 :             w = 2;
     224            4 :             break;
     225           30 :         case 'C':
     226              :         case 'c':
     227           30 :             w = 1;
     228           30 :             break;
     229            0 :         case 'D':
     230              :         case 'd':
     231            0 :             w = 0;
     232            0 :             break;
     233            0 :         default:
     234              :             /* Avoid printing non-ASCII bytes, else we have encoding issues */
     235            0 :             if (cw >= ' ' && cw < 0x7f)
     236            0 :                 ereport(ERROR,
     237              :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     238              :                          errmsg("unrecognized weight: \"%c\"", cw)));
     239              :             else                /* use \ooo format, like charout() */
     240            0 :                 ereport(ERROR,
     241              :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     242              :                          errmsg("unrecognized weight: \"\\%03o\"",
     243              :                                 (unsigned char) cw)));
     244              :     }
     245           48 :     return w;
     246              : }
     247              : 
     248              : 
     249              : Datum
     250           10 : tsvector_setweight(PG_FUNCTION_ARGS)
     251              : {
     252           10 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     253           10 :     char        cw = PG_GETARG_CHAR(1);
     254              :     TSVector    out;
     255              :     int         i,
     256              :                 j;
     257              :     WordEntry  *entry;
     258              :     WordEntryPos *p;
     259           10 :     int         w = parse_weight(cw);
     260              : 
     261           10 :     out = (TSVector) palloc(VARSIZE(in));
     262           10 :     memcpy(out, in, VARSIZE(in));
     263           10 :     entry = ARRPTR(out);
     264           10 :     i = out->size;
     265           50 :     while (i--)
     266              :     {
     267           40 :         if ((j = POSDATALEN(out, entry)) != 0)
     268              :         {
     269           40 :             p = POSDATAPTR(out, entry);
     270          140 :             while (j--)
     271              :             {
     272          100 :                 WEP_SETWEIGHT(*p, w);
     273          100 :                 p++;
     274              :             }
     275              :         }
     276           40 :         entry++;
     277              :     }
     278              : 
     279           10 :     PG_FREE_IF_COPY(in, 0);
     280           10 :     PG_RETURN_POINTER(out);
     281              : }
     282              : 
     283              : /*
     284              :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     285              :  *
     286              :  * Assign weight w to elements of tsin that are listed in lexemes.
     287              :  */
     288              : Datum
     289           20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     290              : {
     291           20 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     292           20 :     char        char_weight = PG_GETARG_CHAR(1);
     293           20 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     294              : 
     295              :     TSVector    tsout;
     296              :     int         i,
     297              :                 j,
     298              :                 nlexemes,
     299              :                 weight;
     300              :     WordEntry  *entry;
     301              :     Datum      *dlexemes;
     302              :     bool       *nulls;
     303              : 
     304           20 :     weight = parse_weight(char_weight);
     305              : 
     306           20 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     307           20 :     memcpy(tsout, tsin, VARSIZE(tsin));
     308           20 :     entry = ARRPTR(tsout);
     309              : 
     310           20 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
     311              : 
     312              :     /*
     313              :      * Assuming that lexemes array is significantly shorter than tsvector we
     314              :      * can iterate through lexemes performing binary search of each lexeme
     315              :      * from lexemes in tsvector.
     316              :      */
     317           60 :     for (i = 0; i < nlexemes; i++)
     318              :     {
     319              :         char       *lex;
     320              :         int         lex_len,
     321              :                     lex_pos;
     322              : 
     323              :         /* Ignore null array elements, they surely don't match */
     324           40 :         if (nulls[i])
     325            5 :             continue;
     326              : 
     327           35 :         lex = VARDATA(DatumGetPointer(dlexemes[i]));
     328           35 :         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     329           35 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     330              : 
     331           35 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     332              :         {
     333           20 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     334              : 
     335           65 :             while (j--)
     336              :             {
     337           45 :                 WEP_SETWEIGHT(*p, weight);
     338           45 :                 p++;
     339              :             }
     340              :         }
     341              :     }
     342              : 
     343           20 :     PG_FREE_IF_COPY(tsin, 0);
     344           20 :     PG_FREE_IF_COPY(lexemes, 2);
     345              : 
     346           20 :     PG_RETURN_POINTER(tsout);
     347              : }
     348              : 
     349              : #define compareEntry(pa, a, pb, b) \
     350              :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     351              :                     (pb) + (b)->pos, (b)->len,    \
     352              :                     false)
     353              : 
     354              : /*
     355              :  * Add positions from src to dest after offsetting them by maxpos.
     356              :  * Return the number added (might be less than expected due to overflow)
     357              :  */
     358              : static int32
     359           10 : add_pos(TSVector src, WordEntry *srcptr,
     360              :         TSVector dest, WordEntry *destptr,
     361              :         int32 maxpos)
     362              : {
     363           10 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     364              :     int         i;
     365           10 :     uint16      slen = POSDATALEN(src, srcptr),
     366              :                 startlen;
     367           10 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     368           10 :                *dpos = POSDATAPTR(dest, destptr);
     369              : 
     370           10 :     if (!destptr->haspos)
     371            0 :         *clen = 0;
     372              : 
     373           10 :     startlen = *clen;
     374           10 :     for (i = 0;
     375           20 :          i < slen && *clen < MAXNUMPOS &&
     376           10 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     377           10 :          i++)
     378              :     {
     379           10 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     380           10 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     381           10 :         (*clen)++;
     382              :     }
     383              : 
     384           10 :     if (*clen != startlen)
     385           10 :         destptr->haspos = 1;
     386           10 :     return *clen - startlen;
     387              : }
     388              : 
     389              : /*
     390              :  * Perform binary search of given lexeme in TSVector.
     391              :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     392              :  * found.
     393              :  */
     394              : static int
     395          165 : tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
     396              : {
     397          165 :     const WordEntry *arrin = ARRPTR(tsv);
     398          165 :     int         StopLow = 0,
     399          165 :                 StopHigh = tsv->size,
     400              :                 StopMiddle,
     401              :                 cmp;
     402              : 
     403          435 :     while (StopLow < StopHigh)
     404              :     {
     405          385 :         StopMiddle = (StopLow + StopHigh) / 2;
     406              : 
     407          385 :         cmp = tsCompareString(lexeme, lexeme_len,
     408          385 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     409          385 :                               arrin[StopMiddle].len,
     410              :                               false);
     411              : 
     412          385 :         if (cmp < 0)
     413          180 :             StopHigh = StopMiddle;
     414          205 :         else if (cmp > 0)
     415           90 :             StopLow = StopMiddle + 1;
     416              :         else                    /* found it */
     417          115 :             return StopMiddle;
     418              :     }
     419              : 
     420           50 :     return -1;
     421              : }
     422              : 
     423              : /*
     424              :  * qsort comparator functions
     425              :  */
     426              : 
     427              : static int
     428           65 : compare_int(const void *va, const void *vb)
     429              : {
     430           65 :     int         a = *((const int *) va);
     431           65 :     int         b = *((const int *) vb);
     432              : 
     433           65 :     return pg_cmp_s32(a, b);
     434              : }
     435              : 
     436              : static int
     437           85 : compare_text_lexemes(const void *va, const void *vb)
     438              : {
     439           85 :     Datum       a = *((const Datum *) va);
     440           85 :     Datum       b = *((const Datum *) vb);
     441           85 :     char       *alex = VARDATA_ANY(DatumGetPointer(a));
     442           85 :     int         alex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(a));
     443           85 :     char       *blex = VARDATA_ANY(DatumGetPointer(b));
     444           85 :     int         blex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(b));
     445              : 
     446           85 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     447              : }
     448              : 
     449              : /*
     450              :  * Internal routine to delete lexemes from TSVector by array of offsets.
     451              :  *
     452              :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     453              :  * int indices_count -- size of that array
     454              :  *
     455              :  * Returns new TSVector without given lexemes along with their positions
     456              :  * and weights.
     457              :  */
     458              : static TSVector
     459           55 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     460              :                            int indices_count)
     461              : {
     462              :     TSVector    tsout;
     463           55 :     WordEntry  *arrin = ARRPTR(tsv),
     464              :                *arrout;
     465           55 :     char       *data = STRPTR(tsv),
     466              :                *dataout;
     467              :     int         i,              /* index in arrin */
     468              :                 j,              /* index in arrout */
     469              :                 k,              /* index in indices_to_delete */
     470              :                 curoff;         /* index in dataout area */
     471              : 
     472              :     /*
     473              :      * Sort the filter array to simplify membership checks below.  Also, get
     474              :      * rid of any duplicate entries, so that we can assume that indices_count
     475              :      * is exactly equal to the number of lexemes that will be removed.
     476              :      */
     477           55 :     if (indices_count > 1)
     478              :     {
     479           25 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     480           25 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     481              :                                 compare_int);
     482              :     }
     483              : 
     484              :     /*
     485              :      * Here we overestimate tsout size, since we don't know how much space is
     486              :      * used by the deleted lexeme(s).  We will set exact size below.
     487              :      */
     488           55 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     489              : 
     490              :     /* This count must be correct because STRPTR(tsout) relies on it. */
     491           55 :     tsout->size = tsv->size - indices_count;
     492              : 
     493              :     /*
     494              :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     495              :      */
     496           55 :     arrout = ARRPTR(tsout);
     497           55 :     dataout = STRPTR(tsout);
     498           55 :     curoff = 0;
     499          330 :     for (i = j = k = 0; i < tsv->size; i++)
     500              :     {
     501              :         /*
     502              :          * If current i is present in indices_to_delete, skip this lexeme.
     503              :          * Since indices_to_delete is already sorted, we only need to check
     504              :          * the current (k'th) entry.
     505              :          */
     506          275 :         if (k < indices_count && i == indices_to_delete[k])
     507              :         {
     508           80 :             k++;
     509           80 :             continue;
     510              :         }
     511              : 
     512              :         /* Copy lexeme and its positions and weights */
     513          195 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     514          195 :         arrout[j].haspos = arrin[i].haspos;
     515          195 :         arrout[j].len = arrin[i].len;
     516          195 :         arrout[j].pos = curoff;
     517          195 :         curoff += arrin[i].len;
     518          195 :         if (arrin[i].haspos)
     519              :         {
     520          130 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     521          130 :                 + sizeof(uint16);
     522              : 
     523          130 :             curoff = SHORTALIGN(curoff);
     524          130 :             memcpy(dataout + curoff,
     525          130 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     526              :                    len);
     527          130 :             curoff += len;
     528              :         }
     529              : 
     530          195 :         j++;
     531              :     }
     532              : 
     533              :     /*
     534              :      * k should now be exactly equal to indices_count. If it isn't then the
     535              :      * caller provided us with indices outside of [0, tsv->size) range and
     536              :      * estimation of tsout's size is wrong.
     537              :      */
     538              :     Assert(k == indices_count);
     539              : 
     540           55 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     541           55 :     return tsout;
     542              : }
     543              : 
     544              : /*
     545              :  * Delete given lexeme from tsvector.
     546              :  * Implementation of user-level ts_delete(tsvector, text).
     547              :  */
     548              : Datum
     549           30 : tsvector_delete_str(PG_FUNCTION_ARGS)
     550              : {
     551           30 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     552              :                 tsout;
     553           30 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     554           30 :     char       *lexeme = VARDATA_ANY(tlexeme);
     555           30 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     556              :                 skip_index;
     557              : 
     558           30 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     559           10 :         PG_RETURN_POINTER(tsin);
     560              : 
     561           20 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     562              : 
     563           20 :     PG_FREE_IF_COPY(tsin, 0);
     564           20 :     PG_FREE_IF_COPY(tlexeme, 1);
     565           20 :     PG_RETURN_POINTER(tsout);
     566              : }
     567              : 
     568              : /*
     569              :  * Delete given array of lexemes from tsvector.
     570              :  * Implementation of user-level ts_delete(tsvector, text[]).
     571              :  */
     572              : Datum
     573           35 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     574              : {
     575           35 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     576              :                 tsout;
     577           35 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     578              :     int         i,
     579              :                 nlex,
     580              :                 skip_count,
     581              :                *skip_indices;
     582              :     Datum      *dlexemes;
     583              :     bool       *nulls;
     584              : 
     585           35 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
     586              : 
     587              :     /*
     588              :      * In typical use case array of lexemes to delete is relatively small. So
     589              :      * here we optimize things for that scenario: iterate through lexarr
     590              :      * performing binary search of each lexeme from lexarr in tsvector.
     591              :      */
     592           35 :     skip_indices = palloc0(nlex * sizeof(int));
     593          140 :     for (i = skip_count = 0; i < nlex; i++)
     594              :     {
     595              :         char       *lex;
     596              :         int         lex_len,
     597              :                     lex_pos;
     598              : 
     599              :         /* Ignore null array elements, they surely don't match */
     600          105 :         if (nulls[i])
     601            5 :             continue;
     602              : 
     603          100 :         lex = VARDATA(DatumGetPointer(dlexemes[i]));
     604          100 :         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     605          100 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     606              : 
     607          100 :         if (lex_pos >= 0)
     608           65 :             skip_indices[skip_count++] = lex_pos;
     609              :     }
     610              : 
     611           35 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     612              : 
     613           35 :     pfree(skip_indices);
     614           35 :     PG_FREE_IF_COPY(tsin, 0);
     615           35 :     PG_FREE_IF_COPY(lexemes, 1);
     616              : 
     617           35 :     PG_RETURN_POINTER(tsout);
     618              : }
     619              : 
     620              : /*
     621              :  * Expand tsvector as table with following columns:
     622              :  *     lexeme: lexeme text
     623              :  *     positions: integer array of lexeme positions
     624              :  *     weights: char array of weights corresponding to positions
     625              :  */
     626              : Datum
     627          120 : tsvector_unnest(PG_FUNCTION_ARGS)
     628              : {
     629              :     FuncCallContext *funcctx;
     630              :     TSVector    tsin;
     631              : 
     632          120 :     if (SRF_IS_FIRSTCALL())
     633              :     {
     634              :         MemoryContext oldcontext;
     635              :         TupleDesc   tupdesc;
     636              : 
     637           20 :         funcctx = SRF_FIRSTCALL_INIT();
     638           20 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     639              : 
     640           20 :         tupdesc = CreateTemplateTupleDesc(3);
     641           20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     642              :                            TEXTOID, -1, 0);
     643           20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     644              :                            INT2ARRAYOID, -1, 0);
     645           20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     646              :                            TEXTARRAYOID, -1, 0);
     647           20 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     648            0 :             elog(ERROR, "return type must be a row type");
     649           20 :         TupleDescFinalize(tupdesc);
     650           20 :         funcctx->tuple_desc = tupdesc;
     651              : 
     652           20 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     653              : 
     654           20 :         MemoryContextSwitchTo(oldcontext);
     655              :     }
     656              : 
     657          120 :     funcctx = SRF_PERCALL_SETUP();
     658          120 :     tsin = (TSVector) funcctx->user_fctx;
     659              : 
     660          120 :     if (funcctx->call_cntr < tsin->size)
     661              :     {
     662          100 :         WordEntry  *arrin = ARRPTR(tsin);
     663          100 :         char       *data = STRPTR(tsin);
     664              :         HeapTuple   tuple;
     665              :         int         j,
     666          100 :                     i = funcctx->call_cntr;
     667          100 :         bool        nulls[] = {false, false, false};
     668              :         Datum       values[3];
     669              : 
     670          100 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     671              : 
     672          100 :         if (arrin[i].haspos)
     673              :         {
     674              :             WordEntryPosVector *posv;
     675              :             Datum      *positions;
     676              :             Datum      *weights;
     677              :             char        weight;
     678              : 
     679              :             /*
     680              :              * Internally tsvector stores position and weight in the same
     681              :              * uint16 (2 bits for weight, 14 for position). Here we extract
     682              :              * that in two separate arrays.
     683              :              */
     684           60 :             posv = _POSVECPTR(tsin, arrin + i);
     685           60 :             positions = palloc(posv->npos * sizeof(Datum));
     686           60 :             weights = palloc(posv->npos * sizeof(Datum));
     687          168 :             for (j = 0; j < posv->npos; j++)
     688              :             {
     689          108 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     690          108 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     691          108 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     692              :                                                                       1));
     693              :             }
     694              : 
     695           60 :             values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
     696           60 :             values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
     697              :         }
     698              :         else
     699              :         {
     700           40 :             nulls[1] = nulls[2] = true;
     701              :         }
     702              : 
     703          100 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     704          100 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     705              :     }
     706              :     else
     707              :     {
     708           20 :         SRF_RETURN_DONE(funcctx);
     709              :     }
     710              : }
     711              : 
     712              : /*
     713              :  * Convert tsvector to array of lexemes.
     714              :  */
     715              : Datum
     716           10 : tsvector_to_array(PG_FUNCTION_ARGS)
     717              : {
     718           10 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     719           10 :     WordEntry  *arrin = ARRPTR(tsin);
     720              :     Datum      *elements;
     721              :     int         i;
     722              :     ArrayType  *array;
     723              : 
     724           10 :     elements = palloc(tsin->size * sizeof(Datum));
     725              : 
     726           60 :     for (i = 0; i < tsin->size; i++)
     727              :     {
     728           50 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     729              :                                                                arrin[i].len));
     730              :     }
     731              : 
     732           10 :     array = construct_array_builtin(elements, tsin->size, TEXTOID);
     733              : 
     734           10 :     pfree(elements);
     735           10 :     PG_FREE_IF_COPY(tsin, 0);
     736           10 :     PG_RETURN_POINTER(array);
     737              : }
     738              : 
     739              : /*
     740              :  * Build tsvector from array of lexemes.
     741              :  */
     742              : Datum
     743           18 : array_to_tsvector(PG_FUNCTION_ARGS)
     744              : {
     745           18 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     746              :     TSVector    tsout;
     747              :     Datum      *dlexemes;
     748              :     WordEntry  *arrout;
     749              :     bool       *nulls;
     750              :     int         nitems,
     751              :                 i,
     752              :                 tslen,
     753           18 :                 datalen = 0;
     754              :     char       *cur;
     755              : 
     756           18 :     deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
     757              : 
     758              :     /*
     759              :      * Reject nulls and zero length strings (maybe we should just ignore them,
     760              :      * instead?)
     761              :      */
     762           95 :     for (i = 0; i < nitems; i++)
     763              :     {
     764           85 :         if (nulls[i])
     765            4 :             ereport(ERROR,
     766              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     767              :                      errmsg("lexeme array may not contain nulls")));
     768              : 
     769           81 :         if (VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ == 0)
     770            4 :             ereport(ERROR,
     771              :                     (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
     772              :                      errmsg("lexeme array may not contain empty strings")));
     773              :     }
     774              : 
     775              :     /* Sort and de-dup, because this is required for a valid tsvector. */
     776           10 :     if (nitems > 1)
     777              :     {
     778           10 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     779           10 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     780              :                          compare_text_lexemes);
     781              :     }
     782              : 
     783              :     /* Calculate space needed for surviving lexemes. */
     784           50 :     for (i = 0; i < nitems; i++)
     785           40 :         datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     786           10 :     tslen = CALCDATASIZE(nitems, datalen);
     787              : 
     788              :     /* Allocate and fill tsvector. */
     789           10 :     tsout = (TSVector) palloc0(tslen);
     790           10 :     SET_VARSIZE(tsout, tslen);
     791           10 :     tsout->size = nitems;
     792              : 
     793           10 :     arrout = ARRPTR(tsout);
     794           10 :     cur = STRPTR(tsout);
     795           50 :     for (i = 0; i < nitems; i++)
     796              :     {
     797           40 :         char       *lex = VARDATA(DatumGetPointer(dlexemes[i]));
     798           40 :         int         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     799              : 
     800           40 :         memcpy(cur, lex, lex_len);
     801           40 :         arrout[i].haspos = 0;
     802           40 :         arrout[i].len = lex_len;
     803           40 :         arrout[i].pos = cur - STRPTR(tsout);
     804           40 :         cur += lex_len;
     805              :     }
     806              : 
     807           10 :     PG_FREE_IF_COPY(v, 0);
     808           10 :     PG_RETURN_POINTER(tsout);
     809              : }
     810              : 
     811              : /*
     812              :  * ts_filter(): keep only lexemes with given weights in tsvector.
     813              :  */
     814              : Datum
     815           14 : tsvector_filter(PG_FUNCTION_ARGS)
     816              : {
     817           14 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     818              :                 tsout;
     819           14 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     820           14 :     WordEntry  *arrin = ARRPTR(tsin),
     821              :                *arrout;
     822           14 :     char       *datain = STRPTR(tsin),
     823              :                *dataout;
     824              :     Datum      *dweights;
     825              :     bool       *nulls;
     826              :     int         nweights;
     827              :     int         i,
     828              :                 j;
     829           14 :     int         cur_pos = 0;
     830           14 :     char        mask = 0;
     831              : 
     832           14 :     deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
     833              : 
     834           32 :     for (i = 0; i < nweights; i++)
     835              :     {
     836              :         char        char_weight;
     837              : 
     838           22 :         if (nulls[i])
     839            4 :             ereport(ERROR,
     840              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     841              :                      errmsg("weight array may not contain nulls")));
     842              : 
     843           18 :         char_weight = DatumGetChar(dweights[i]);
     844           18 :         mask |= 1 << parse_weight(char_weight);
     845              :     }
     846              : 
     847           10 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     848           10 :     tsout->size = tsin->size;
     849           10 :     arrout = ARRPTR(tsout);
     850           10 :     dataout = STRPTR(tsout);
     851              : 
     852           90 :     for (i = j = 0; i < tsin->size; i++)
     853              :     {
     854              :         WordEntryPosVector *posvin,
     855              :                    *posvout;
     856           80 :         int         npos = 0;
     857              :         int         k;
     858              : 
     859           80 :         if (!arrin[i].haspos)
     860           25 :             continue;
     861              : 
     862           55 :         posvin = _POSVECPTR(tsin, arrin + i);
     863           55 :         posvout = (WordEntryPosVector *)
     864           55 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     865              : 
     866          110 :         for (k = 0; k < posvin->npos; k++)
     867              :         {
     868           55 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     869           25 :                 posvout->pos[npos++] = posvin->pos[k];
     870              :         }
     871              : 
     872              :         /* if no satisfactory positions found, skip lexeme */
     873           55 :         if (!npos)
     874           30 :             continue;
     875              : 
     876           25 :         arrout[j].haspos = true;
     877           25 :         arrout[j].len = arrin[i].len;
     878           25 :         arrout[j].pos = cur_pos;
     879              : 
     880           25 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     881           25 :         posvout->npos = npos;
     882           25 :         cur_pos += SHORTALIGN(arrin[i].len);
     883           25 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     884              :             sizeof(uint16);
     885           25 :         j++;
     886              :     }
     887              : 
     888           10 :     tsout->size = j;
     889           10 :     if (dataout != STRPTR(tsout))
     890           10 :         memmove(STRPTR(tsout), dataout, cur_pos);
     891              : 
     892           10 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     893              : 
     894           10 :     PG_FREE_IF_COPY(tsin, 0);
     895           10 :     PG_RETURN_POINTER(tsout);
     896              : }
     897              : 
     898              : Datum
     899            9 : tsvector_concat(PG_FUNCTION_ARGS)
     900              : {
     901            9 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     902            9 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     903              :     TSVector    out;
     904              :     WordEntry  *ptr;
     905              :     WordEntry  *ptr1,
     906              :                *ptr2;
     907              :     WordEntryPos *p;
     908            9 :     int         maxpos = 0,
     909              :                 i,
     910              :                 j,
     911              :                 i1,
     912              :                 i2,
     913              :                 dataoff,
     914              :                 output_bytes,
     915              :                 output_size;
     916              :     char       *data,
     917              :                *data1,
     918              :                *data2;
     919              : 
     920              :     /* Get max position in in1; we'll need this to offset in2's positions */
     921            9 :     ptr = ARRPTR(in1);
     922            9 :     i = in1->size;
     923           23 :     while (i--)
     924              :     {
     925           14 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     926              :         {
     927           14 :             p = POSDATAPTR(in1, ptr);
     928           28 :             while (j--)
     929              :             {
     930           14 :                 if (WEP_GETPOS(*p) > maxpos)
     931            9 :                     maxpos = WEP_GETPOS(*p);
     932           14 :                 p++;
     933              :             }
     934              :         }
     935           14 :         ptr++;
     936              :     }
     937              : 
     938            9 :     ptr1 = ARRPTR(in1);
     939            9 :     ptr2 = ARRPTR(in2);
     940            9 :     data1 = STRPTR(in1);
     941            9 :     data2 = STRPTR(in2);
     942            9 :     i1 = in1->size;
     943            9 :     i2 = in2->size;
     944              : 
     945              :     /*
     946              :      * Conservative estimate of space needed.  We might need all the data in
     947              :      * both inputs, and conceivably add a pad byte before position data for
     948              :      * each item where there was none before.
     949              :      */
     950            9 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     951              : 
     952            9 :     out = (TSVector) palloc0(output_bytes);
     953            9 :     SET_VARSIZE(out, output_bytes);
     954              : 
     955              :     /*
     956              :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     957              :      * collapse out any unused space at the end.
     958              :      */
     959            9 :     out->size = in1->size + in2->size;
     960              : 
     961            9 :     ptr = ARRPTR(out);
     962            9 :     data = STRPTR(out);
     963            9 :     dataoff = 0;
     964           23 :     while (i1 && i2)
     965              :     {
     966           14 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     967              : 
     968           14 :         if (cmp < 0)
     969              :         {                       /* in1 first */
     970            5 :             ptr->haspos = ptr1->haspos;
     971            5 :             ptr->len = ptr1->len;
     972            5 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
     973            5 :             ptr->pos = dataoff;
     974            5 :             dataoff += ptr1->len;
     975            5 :             if (ptr->haspos)
     976              :             {
     977            5 :                 dataoff = SHORTALIGN(dataoff);
     978            5 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
     979            5 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
     980              :             }
     981              : 
     982            5 :             ptr++;
     983            5 :             ptr1++;
     984            5 :             i1--;
     985              :         }
     986            9 :         else if (cmp > 0)
     987              :         {                       /* in2 first */
     988            4 :             ptr->haspos = ptr2->haspos;
     989            4 :             ptr->len = ptr2->len;
     990            4 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
     991            4 :             ptr->pos = dataoff;
     992            4 :             dataoff += ptr2->len;
     993            4 :             if (ptr->haspos)
     994              :             {
     995            0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
     996              : 
     997            0 :                 if (addlen == 0)
     998            0 :                     ptr->haspos = 0;
     999              :                 else
    1000              :                 {
    1001            0 :                     dataoff = SHORTALIGN(dataoff);
    1002            0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1003              :                 }
    1004              :             }
    1005              : 
    1006            4 :             ptr++;
    1007            4 :             ptr2++;
    1008            4 :             i2--;
    1009              :         }
    1010              :         else
    1011              :         {
    1012            5 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1013            5 :             ptr->len = ptr1->len;
    1014            5 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1015            5 :             ptr->pos = dataoff;
    1016            5 :             dataoff += ptr1->len;
    1017            5 :             if (ptr->haspos)
    1018              :             {
    1019            5 :                 if (ptr1->haspos)
    1020              :                 {
    1021            5 :                     dataoff = SHORTALIGN(dataoff);
    1022            5 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1023            5 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1024            5 :                     if (ptr2->haspos)
    1025            5 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1026              :                 }
    1027              :                 else            /* must have ptr2->haspos */
    1028              :                 {
    1029            0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1030              : 
    1031            0 :                     if (addlen == 0)
    1032            0 :                         ptr->haspos = 0;
    1033              :                     else
    1034              :                     {
    1035            0 :                         dataoff = SHORTALIGN(dataoff);
    1036            0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1037              :                     }
    1038              :                 }
    1039              :             }
    1040              : 
    1041            5 :             ptr++;
    1042            5 :             ptr1++;
    1043            5 :             ptr2++;
    1044            5 :             i1--;
    1045            5 :             i2--;
    1046              :         }
    1047              :     }
    1048              : 
    1049           13 :     while (i1)
    1050              :     {
    1051            4 :         ptr->haspos = ptr1->haspos;
    1052            4 :         ptr->len = ptr1->len;
    1053            4 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1054            4 :         ptr->pos = dataoff;
    1055            4 :         dataoff += ptr1->len;
    1056            4 :         if (ptr->haspos)
    1057              :         {
    1058            4 :             dataoff = SHORTALIGN(dataoff);
    1059            4 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1060            4 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1061              :         }
    1062              : 
    1063            4 :         ptr++;
    1064            4 :         ptr1++;
    1065            4 :         i1--;
    1066              :     }
    1067              : 
    1068           14 :     while (i2)
    1069              :     {
    1070            5 :         ptr->haspos = ptr2->haspos;
    1071            5 :         ptr->len = ptr2->len;
    1072            5 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1073            5 :         ptr->pos = dataoff;
    1074            5 :         dataoff += ptr2->len;
    1075            5 :         if (ptr->haspos)
    1076              :         {
    1077            5 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1078              : 
    1079            5 :             if (addlen == 0)
    1080            0 :                 ptr->haspos = 0;
    1081              :             else
    1082              :             {
    1083            5 :                 dataoff = SHORTALIGN(dataoff);
    1084            5 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1085              :             }
    1086              :         }
    1087              : 
    1088            5 :         ptr++;
    1089            5 :         ptr2++;
    1090            5 :         i2--;
    1091              :     }
    1092              : 
    1093              :     /*
    1094              :      * Instead of checking each offset individually, we check for overflow of
    1095              :      * pos fields once at the end.
    1096              :      */
    1097            9 :     if (dataoff > MAXSTRPOS)
    1098            0 :         ereport(ERROR,
    1099              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1100              :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1101              : 
    1102              :     /*
    1103              :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1104              :      * and collapse out any unused array entries.
    1105              :      */
    1106            9 :     output_size = ptr - ARRPTR(out);
    1107              :     Assert(output_size <= out->size);
    1108            9 :     out->size = output_size;
    1109            9 :     if (data != STRPTR(out))
    1110            5 :         memmove(STRPTR(out), data, dataoff);
    1111            9 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1112              :     Assert(output_bytes <= VARSIZE(out));
    1113            9 :     SET_VARSIZE(out, output_bytes);
    1114              : 
    1115            9 :     PG_FREE_IF_COPY(in1, 0);
    1116            9 :     PG_FREE_IF_COPY(in2, 1);
    1117            9 :     PG_RETURN_POINTER(out);
    1118              : }
    1119              : 
    1120              : /*
    1121              :  * Compare two strings by tsvector rules.
    1122              :  *
    1123              :  * if prefix = true then it returns zero value iff b has prefix a
    1124              :  */
    1125              : int32
    1126      4195612 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1127              : {
    1128              :     int         cmp;
    1129              : 
    1130      4195612 :     if (lena == 0)
    1131              :     {
    1132           30 :         if (prefix)
    1133            0 :             cmp = 0;            /* empty string is prefix of anything */
    1134              :         else
    1135           30 :             cmp = (lenb > 0) ? -1 : 0;
    1136              :     }
    1137      4195582 :     else if (lenb == 0)
    1138              :     {
    1139            0 :         cmp = (lena > 0) ? 1 : 0;
    1140              :     }
    1141              :     else
    1142              :     {
    1143      4195582 :         cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
    1144              : 
    1145      4195582 :         if (prefix)
    1146              :         {
    1147        11021 :             if (cmp == 0 && lena > lenb)
    1148            0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1149              :         }
    1150      4184561 :         else if (cmp == 0 && lena != lenb)
    1151              :         {
    1152        21675 :             cmp = (lena < lenb) ? -1 : 1;
    1153              :         }
    1154              :     }
    1155              : 
    1156      4195612 :     return cmp;
    1157              : }
    1158              : 
    1159              : /*
    1160              :  * Check weight info or/and fill 'data' with the required positions
    1161              :  */
    1162              : static TSTernaryValue
    1163        45576 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1164              :                ExecPhraseData *data)
    1165              : {
    1166        45576 :     TSTernaryValue result = TS_NO;
    1167              : 
    1168              :     Assert(data == NULL || data->npos == 0);
    1169              : 
    1170        45576 :     if (entry->haspos)
    1171              :     {
    1172              :         WordEntryPosVector *posvec;
    1173              : 
    1174              :         /*
    1175              :          * We can't use the _POSVECPTR macro here because the pointer to the
    1176              :          * tsvector's lexeme storage is already contained in chkval->values.
    1177              :          */
    1178         3160 :         posvec = (WordEntryPosVector *)
    1179         3160 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1180              : 
    1181         3160 :         if (val->weight && data)
    1182           40 :         {
    1183           40 :             WordEntryPos *posvec_iter = posvec->pos;
    1184              :             WordEntryPos *dptr;
    1185              : 
    1186              :             /*
    1187              :              * Filter position information by weights
    1188              :              */
    1189           40 :             dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
    1190           40 :             data->allocated = true;
    1191              : 
    1192              :             /* Is there a position with a matching weight? */
    1193           80 :             while (posvec_iter < posvec->pos + posvec->npos)
    1194              :             {
    1195              :                 /* If true, append this position to the data->pos */
    1196           40 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1197              :                 {
    1198           20 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1199           20 :                     dptr++;
    1200              :                 }
    1201              : 
    1202           40 :                 posvec_iter++;
    1203              :             }
    1204              : 
    1205           40 :             data->npos = dptr - data->pos;
    1206              : 
    1207           40 :             if (data->npos > 0)
    1208           20 :                 result = TS_YES;
    1209              :             else
    1210              :             {
    1211           20 :                 pfree(data->pos);
    1212           20 :                 data->pos = NULL;
    1213           20 :                 data->allocated = false;
    1214              :             }
    1215              :         }
    1216         3120 :         else if (val->weight)
    1217              :         {
    1218          332 :             WordEntryPos *posvec_iter = posvec->pos;
    1219              : 
    1220              :             /* Is there a position with a matching weight? */
    1221          503 :             while (posvec_iter < posvec->pos + posvec->npos)
    1222              :             {
    1223          372 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1224              :                 {
    1225          201 :                     result = TS_YES;
    1226          201 :                     break;      /* no need to go further */
    1227              :                 }
    1228              : 
    1229          171 :                 posvec_iter++;
    1230              :             }
    1231              :         }
    1232         2788 :         else if (data)
    1233              :         {
    1234         1645 :             data->npos = posvec->npos;
    1235         1645 :             data->pos = posvec->pos;
    1236         1645 :             data->allocated = false;
    1237         1645 :             result = TS_YES;
    1238              :         }
    1239              :         else
    1240              :         {
    1241              :             /* simplest case: no weight check, positions not needed */
    1242         1143 :             result = TS_YES;
    1243              :         }
    1244              :     }
    1245              :     else
    1246              :     {
    1247              :         /*
    1248              :          * Position info is lacking, so if the caller requires it, we can only
    1249              :          * say that maybe there is a match.
    1250              :          *
    1251              :          * Notice, however, that we *don't* check val->weight here.
    1252              :          * Historically, stripped tsvectors are considered to match queries
    1253              :          * whether or not the query has a weight restriction; that's a little
    1254              :          * dubious but we'll preserve the behavior.
    1255              :          */
    1256        42416 :         if (data)
    1257        15385 :             result = TS_MAYBE;
    1258              :         else
    1259        27031 :             result = TS_YES;
    1260              :     }
    1261              : 
    1262        45576 :     return result;
    1263              : }
    1264              : 
    1265              : /*
    1266              :  * TS_execute callback for matching a tsquery operand to plain tsvector data
    1267              :  */
    1268              : static TSTernaryValue
    1269       189554 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1270              : {
    1271       189554 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1272       189554 :     WordEntry  *StopLow = chkval->arrb;
    1273       189554 :     WordEntry  *StopHigh = chkval->arre;
    1274       189554 :     WordEntry  *StopMiddle = StopHigh;
    1275       189554 :     TSTernaryValue res = TS_NO;
    1276              : 
    1277              :     /* Loop invariant: StopLow <= val < StopHigh */
    1278      1191598 :     while (StopLow < StopHigh)
    1279              :     {
    1280              :         int         difference;
    1281              : 
    1282      1037564 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1283      1037564 :         difference = tsCompareString(chkval->operand + val->distance,
    1284      1037564 :                                      val->length,
    1285      1037564 :                                      chkval->values + StopMiddle->pos,
    1286      1037564 :                                      StopMiddle->len,
    1287              :                                      false);
    1288              : 
    1289      1037564 :         if (difference == 0)
    1290              :         {
    1291              :             /* Check weight info & fill 'data' with positions */
    1292        35520 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1293        35520 :             break;
    1294              :         }
    1295      1002044 :         else if (difference > 0)
    1296       565083 :             StopLow = StopMiddle + 1;
    1297              :         else
    1298       436961 :             StopHigh = StopMiddle;
    1299              :     }
    1300              : 
    1301              :     /*
    1302              :      * If it's a prefix search, we should also consider lexemes that the
    1303              :      * search term is a prefix of (which will necessarily immediately follow
    1304              :      * the place we found in the above loop).  But we can skip them if there
    1305              :      * was a definite match on the exact term AND the caller doesn't need
    1306              :      * position info.
    1307              :      */
    1308       189554 :     if (val->prefix && (res != TS_YES || data))
    1309              :     {
    1310        11040 :         WordEntryPos *allpos = NULL;
    1311        11040 :         int         npos = 0,
    1312        11040 :                     totalpos = 0;
    1313              : 
    1314              :         /* adjust start position for corner case */
    1315        11040 :         if (StopLow >= StopHigh)
    1316        11030 :             StopMiddle = StopHigh;
    1317              : 
    1318              :         /* we don't try to re-use any data from the initial match */
    1319        11040 :         if (data)
    1320              :         {
    1321           30 :             if (data->allocated)
    1322            0 :                 pfree(data->pos);
    1323           30 :             data->pos = NULL;
    1324           30 :             data->allocated = false;
    1325           30 :             data->npos = 0;
    1326              :         }
    1327        11040 :         res = TS_NO;
    1328              : 
    1329        21011 :         while ((res != TS_YES || data) &&
    1330        31751 :                StopMiddle < chkval->arre &&
    1331        10655 :                tsCompareString(chkval->operand + val->distance,
    1332        10655 :                                val->length,
    1333        10655 :                                chkval->values + StopMiddle->pos,
    1334        10655 :                                StopMiddle->len,
    1335              :                                true) == 0)
    1336              :         {
    1337              :             TSTernaryValue subres;
    1338              : 
    1339        10056 :             subres = checkclass_str(chkval, StopMiddle, val, data);
    1340              : 
    1341        10056 :             if (subres != TS_NO)
    1342              :             {
    1343        10006 :                 if (data)
    1344              :                 {
    1345              :                     /*
    1346              :                      * We need to join position information
    1347              :                      */
    1348           35 :                     if (subres == TS_MAYBE)
    1349              :                     {
    1350              :                         /*
    1351              :                          * No position info for this match, so we must report
    1352              :                          * MAYBE overall.
    1353              :                          */
    1354            0 :                         res = TS_MAYBE;
    1355              :                         /* forget any previous positions */
    1356            0 :                         npos = 0;
    1357              :                         /* don't leak storage */
    1358            0 :                         if (allpos)
    1359            0 :                             pfree(allpos);
    1360            0 :                         break;
    1361              :                     }
    1362              : 
    1363           65 :                     while (npos + data->npos > totalpos)
    1364              :                     {
    1365           30 :                         if (totalpos == 0)
    1366              :                         {
    1367           30 :                             totalpos = 256;
    1368           30 :                             allpos = palloc_array(WordEntryPos, totalpos);
    1369              :                         }
    1370              :                         else
    1371              :                         {
    1372            0 :                             totalpos *= 2;
    1373            0 :                             allpos = repalloc_array(allpos, WordEntryPos, totalpos);
    1374              :                         }
    1375              :                     }
    1376              : 
    1377           35 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1378           35 :                     npos += data->npos;
    1379              : 
    1380              :                     /* don't leak storage from individual matches */
    1381           35 :                     if (data->allocated)
    1382           20 :                         pfree(data->pos);
    1383           35 :                     data->pos = NULL;
    1384           35 :                     data->allocated = false;
    1385              :                     /* it's important to reset data->npos before next loop */
    1386           35 :                     data->npos = 0;
    1387              :                 }
    1388              :                 else
    1389              :                 {
    1390              :                     /* Don't need positions, just handle YES/MAYBE */
    1391         9971 :                     if (subres == TS_YES || res == TS_NO)
    1392         9971 :                         res = subres;
    1393              :                 }
    1394              :             }
    1395              : 
    1396        10056 :             StopMiddle++;
    1397              :         }
    1398              : 
    1399        11040 :         if (data && npos > 0)
    1400              :         {
    1401              :             /* Sort and make unique array of found positions */
    1402           30 :             data->pos = allpos;
    1403           30 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1404           30 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1405              :                                  compareWordEntryPos);
    1406           30 :             data->allocated = true;
    1407           30 :             res = TS_YES;
    1408              :         }
    1409              :     }
    1410              : 
    1411       189554 :     return res;
    1412              : }
    1413              : 
    1414              : /*
    1415              :  * Compute output position list for a tsquery operator in phrase mode.
    1416              :  *
    1417              :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1418              :  * returning the result list into *data.  The input position lists must be
    1419              :  * sorted and unique, and the output will be as well.
    1420              :  *
    1421              :  * data: pointer to initially-all-zeroes output struct, or NULL
    1422              :  * Ldata, Rdata: input position lists
    1423              :  * emit: bitmask of TSPO_XXX flags
    1424              :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1425              :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1426              :  * max_npos: maximum possible required size of output position array
    1427              :  *
    1428              :  * Loffset and Roffset should not be negative, else we risk trying to output
    1429              :  * negative positions, which won't fit into WordEntryPos.
    1430              :  *
    1431              :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1432              :  * we return it as TSTernaryValue.
    1433              :  *
    1434              :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1435              :  * returns TS_YES if any positions would have been emitted.
    1436              :  */
    1437              : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1438              : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1439              : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1440              : 
    1441              : static TSTernaryValue
    1442        20112 : TS_phrase_output(ExecPhraseData *data,
    1443              :                  ExecPhraseData *Ldata,
    1444              :                  ExecPhraseData *Rdata,
    1445              :                  int emit,
    1446              :                  int Loffset,
    1447              :                  int Roffset,
    1448              :                  int max_npos)
    1449              : {
    1450              :     int         Lindex,
    1451              :                 Rindex;
    1452              : 
    1453              :     /* Loop until both inputs are exhausted */
    1454        20112 :     Lindex = Rindex = 0;
    1455        20904 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1456              :     {
    1457              :         int         Lpos,
    1458              :                     Rpos;
    1459         1748 :         int         output_pos = 0;
    1460              : 
    1461              :         /*
    1462              :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1463              :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1464              :          */
    1465         1748 :         if (Lindex < Ldata->npos)
    1466         1288 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1467              :         else
    1468              :         {
    1469              :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1470          460 :             if (!(emit & TSPO_R_ONLY))
    1471          113 :                 break;
    1472          347 :             Lpos = INT_MAX;
    1473              :         }
    1474         1635 :         if (Rindex < Rdata->npos)
    1475         1445 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1476              :         else
    1477              :         {
    1478              :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1479          190 :             if (!(emit & TSPO_L_ONLY))
    1480          122 :                 break;
    1481           68 :             Rpos = INT_MAX;
    1482              :         }
    1483              : 
    1484              :         /* Merge-join the two input lists */
    1485         1513 :         if (Lpos < Rpos)
    1486              :         {
    1487              :             /* Lpos is not matched in Rdata, should we output it? */
    1488          365 :             if (emit & TSPO_L_ONLY)
    1489          116 :                 output_pos = Lpos;
    1490          365 :             Lindex++;
    1491              :         }
    1492         1148 :         else if (Lpos == Rpos)
    1493              :         {
    1494              :             /* Lpos and Rpos match ... should we output it? */
    1495          621 :             if (emit & TSPO_BOTH)
    1496          553 :                 output_pos = Rpos;
    1497          621 :             Lindex++;
    1498          621 :             Rindex++;
    1499              :         }
    1500              :         else                    /* Lpos > Rpos */
    1501              :         {
    1502              :             /* Rpos is not matched in Ldata, should we output it? */
    1503          527 :             if (emit & TSPO_R_ONLY)
    1504          376 :                 output_pos = Rpos;
    1505          527 :             Rindex++;
    1506              :         }
    1507              : 
    1508         1513 :         if (output_pos > 0)
    1509              :         {
    1510         1045 :             if (data)
    1511              :             {
    1512              :                 /* Store position, first allocating output array if needed */
    1513          324 :                 if (data->pos == NULL)
    1514              :                 {
    1515          261 :                     data->pos = (WordEntryPos *)
    1516          261 :                         palloc(max_npos * sizeof(WordEntryPos));
    1517          261 :                     data->allocated = true;
    1518              :                 }
    1519          324 :                 data->pos[data->npos++] = output_pos;
    1520              :             }
    1521              :             else
    1522              :             {
    1523              :                 /*
    1524              :                  * Exact positions not needed, so return TS_YES as soon as we
    1525              :                  * know there is at least one.
    1526              :                  */
    1527          721 :                 return TS_YES;
    1528              :             }
    1529              :         }
    1530              :     }
    1531              : 
    1532        19391 :     if (data && data->npos > 0)
    1533              :     {
    1534              :         /* Let's assert we didn't overrun the array */
    1535              :         Assert(data->npos <= max_npos);
    1536          261 :         return TS_YES;
    1537              :     }
    1538        19130 :     return TS_NO;
    1539              : }
    1540              : 
    1541              : /*
    1542              :  * Execute tsquery at or below an OP_PHRASE operator.
    1543              :  *
    1544              :  * This handles tsquery execution at recursion levels where we need to care
    1545              :  * about match locations.
    1546              :  *
    1547              :  * In addition to the same arguments used for TS_execute, the caller may pass
    1548              :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1549              :  * match position info on success.  data == NULL if no position data need be
    1550              :  * returned.
    1551              :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1552              :  * This is OK because an outside call always starts from an OP_PHRASE node,
    1553              :  * and all internal recursion cases pass data != NULL.
    1554              :  *
    1555              :  * The detailed semantics of the match data, given that the function returned
    1556              :  * TS_YES (successful match), are:
    1557              :  *
    1558              :  * npos > 0, negate = false:
    1559              :  *   query is matched at specified position(s) (and only those positions)
    1560              :  * npos > 0, negate = true:
    1561              :  *   query is matched at all positions *except* specified position(s)
    1562              :  * npos = 0, negate = true:
    1563              :  *   query is matched at all positions
    1564              :  * npos = 0, negate = false:
    1565              :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1566              :  *
    1567              :  * Successful matches also return a "width" value which is the match width in
    1568              :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1569              :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1570              :  * that when width > 0, the listed positions represent the ends of matches not
    1571              :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1572              :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1573              :  *
    1574              :  * If the TSExecuteCallback function reports that an operand is present
    1575              :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1576              :  * it is possible but not certain that the query is matched.
    1577              :  *
    1578              :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1579              :  * negate = false (which is the state initialized by the caller); but the
    1580              :  * "width" output in such cases is undefined.
    1581              :  */
    1582              : static TSTernaryValue
    1583       467663 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1584              :                   TSExecuteCallback chkcond,
    1585              :                   ExecPhraseData *data)
    1586              : {
    1587              :     ExecPhraseData Ldata,
    1588              :                 Rdata;
    1589              :     TSTernaryValue lmatch,
    1590              :                 rmatch;
    1591              :     int         Loffset,
    1592              :                 Roffset,
    1593              :                 maxwidth;
    1594              : 
    1595              :     /* since this function recurses, it could be driven to stack overflow */
    1596       467663 :     check_stack_depth();
    1597              : 
    1598              :     /* ... and let's check for query cancel while we're at it */
    1599       467663 :     CHECK_FOR_INTERRUPTS();
    1600              : 
    1601       467663 :     if (curitem->type == QI_VAL)
    1602       230075 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1603              : 
    1604       237588 :     switch (curitem->qoperator.oper)
    1605              :     {
    1606        80485 :         case OP_NOT:
    1607              : 
    1608              :             /*
    1609              :              * We need not touch data->width, since a NOT operation does not
    1610              :              * change the match width.
    1611              :              */
    1612        80485 :             if (flags & TS_EXEC_SKIP_NOT)
    1613              :             {
    1614              :                 /* with SKIP_NOT, report NOT as "match everywhere" */
    1615              :                 Assert(data->npos == 0 && !data->negate);
    1616            0 :                 data->negate = true;
    1617            0 :                 return TS_YES;
    1618              :             }
    1619        80485 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1620              :             {
    1621        70342 :                 case TS_NO:
    1622              :                     /* change "match nowhere" to "match everywhere" */
    1623              :                     Assert(data->npos == 0 && !data->negate);
    1624        70342 :                     data->negate = true;
    1625        70342 :                     return TS_YES;
    1626          273 :                 case TS_YES:
    1627          273 :                     if (data->npos > 0)
    1628              :                     {
    1629              :                         /* we have some positions, invert negate flag */
    1630          268 :                         data->negate = !data->negate;
    1631          268 :                         return TS_YES;
    1632              :                     }
    1633            5 :                     else if (data->negate)
    1634              :                     {
    1635              :                         /* change "match everywhere" to "match nowhere" */
    1636            5 :                         data->negate = false;
    1637            5 :                         return TS_NO;
    1638              :                     }
    1639              :                     /* Should not get here if result was TS_YES */
    1640              :                     Assert(false);
    1641            0 :                     break;
    1642         9870 :                 case TS_MAYBE:
    1643              :                     /* match positions are, and remain, uncertain */
    1644         9870 :                     return TS_MAYBE;
    1645              :             }
    1646            0 :             break;
    1647              : 
    1648       156977 :         case OP_PHRASE:
    1649              :         case OP_AND:
    1650       156977 :             memset(&Ldata, 0, sizeof(Ldata));
    1651       156977 :             memset(&Rdata, 0, sizeof(Rdata));
    1652              : 
    1653       156977 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1654              :                                        arg, flags, chkcond, &Ldata);
    1655       156977 :             if (lmatch == TS_NO)
    1656        83795 :                 return TS_NO;
    1657              : 
    1658        73182 :             rmatch = TS_phrase_execute(curitem + 1,
    1659              :                                        arg, flags, chkcond, &Rdata);
    1660        73182 :             if (rmatch == TS_NO)
    1661        35992 :                 return TS_NO;
    1662              : 
    1663              :             /*
    1664              :              * If either operand has no position information, then we can't
    1665              :              * return reliable position data, only a MAYBE result.
    1666              :              */
    1667        37190 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1668        17204 :                 return TS_MAYBE;
    1669              : 
    1670        19986 :             if (curitem->qoperator.oper == OP_PHRASE)
    1671              :             {
    1672              :                 /*
    1673              :                  * Compute Loffset and Roffset suitable for phrase match, and
    1674              :                  * compute overall width of whole phrase match.
    1675              :                  */
    1676        19981 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1677        19981 :                 Roffset = 0;
    1678        19981 :                 if (data)
    1679          155 :                     data->width = curitem->qoperator.distance +
    1680          155 :                         Ldata.width + Rdata.width;
    1681              :             }
    1682              :             else
    1683              :             {
    1684              :                 /*
    1685              :                  * For OP_AND, set output width and alignment like OP_OR (see
    1686              :                  * comment below)
    1687              :                  */
    1688            5 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1689            5 :                 Loffset = maxwidth - Ldata.width;
    1690            5 :                 Roffset = maxwidth - Rdata.width;
    1691            5 :                 if (data)
    1692            5 :                     data->width = maxwidth;
    1693              :             }
    1694              : 
    1695        19986 :             if (Ldata.negate && Rdata.negate)
    1696              :             {
    1697              :                 /* !L & !R: treat as !(L | R) */
    1698        18958 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1699              :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1700              :                                         Loffset, Roffset,
    1701        18958 :                                         Ldata.npos + Rdata.npos);
    1702        18958 :                 if (data)
    1703            0 :                     data->negate = true;
    1704        18958 :                 return TS_YES;
    1705              :             }
    1706         1028 :             else if (Ldata.negate)
    1707              :             {
    1708              :                 /* !L & R */
    1709          309 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1710              :                                         TSPO_R_ONLY,
    1711              :                                         Loffset, Roffset,
    1712              :                                         Rdata.npos);
    1713              :             }
    1714          719 :             else if (Rdata.negate)
    1715              :             {
    1716              :                 /* L & !R */
    1717            5 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1718              :                                         TSPO_L_ONLY,
    1719              :                                         Loffset, Roffset,
    1720              :                                         Ldata.npos);
    1721              :             }
    1722              :             else
    1723              :             {
    1724              :                 /* straight AND */
    1725          714 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1726              :                                         TSPO_BOTH,
    1727              :                                         Loffset, Roffset,
    1728          714 :                                         Min(Ldata.npos, Rdata.npos));
    1729              :             }
    1730              : 
    1731          126 :         case OP_OR:
    1732          126 :             memset(&Ldata, 0, sizeof(Ldata));
    1733          126 :             memset(&Rdata, 0, sizeof(Rdata));
    1734              : 
    1735          126 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1736              :                                        arg, flags, chkcond, &Ldata);
    1737          126 :             rmatch = TS_phrase_execute(curitem + 1,
    1738              :                                        arg, flags, chkcond, &Rdata);
    1739              : 
    1740          126 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1741           10 :                 return TS_NO;
    1742              : 
    1743              :             /*
    1744              :              * If either operand has no position information, then we can't
    1745              :              * return reliable position data, only a MAYBE result.
    1746              :              */
    1747          116 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1748            0 :                 return TS_MAYBE;
    1749              : 
    1750              :             /*
    1751              :              * Cope with undefined output width from failed submatch.  (This
    1752              :              * takes less code than trying to ensure that all failure returns
    1753              :              * set data->width to zero.)
    1754              :              */
    1755          116 :             if (lmatch == TS_NO)
    1756           15 :                 Ldata.width = 0;
    1757          116 :             if (rmatch == TS_NO)
    1758           68 :                 Rdata.width = 0;
    1759              : 
    1760              :             /*
    1761              :              * For OP_AND and OP_OR, report the width of the wider of the two
    1762              :              * inputs, and align the narrower input's positions to the right
    1763              :              * end of that width.  This rule deals at least somewhat
    1764              :              * reasonably with cases like "x <-> (y | z <-> q)".
    1765              :              */
    1766          116 :             maxwidth = Max(Ldata.width, Rdata.width);
    1767          116 :             Loffset = maxwidth - Ldata.width;
    1768          116 :             Roffset = maxwidth - Rdata.width;
    1769          116 :             data->width = maxwidth;
    1770              : 
    1771          116 :             if (Ldata.negate && Rdata.negate)
    1772              :             {
    1773              :                 /* !L | !R: treat as !(L & R) */
    1774            5 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1775              :                                         TSPO_BOTH,
    1776              :                                         Loffset, Roffset,
    1777            5 :                                         Min(Ldata.npos, Rdata.npos));
    1778            5 :                 data->negate = true;
    1779            5 :                 return TS_YES;
    1780              :             }
    1781          111 :             else if (Ldata.negate)
    1782              :             {
    1783              :                 /* !L | R: treat as !(L & !R) */
    1784           25 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1785              :                                         TSPO_L_ONLY,
    1786              :                                         Loffset, Roffset,
    1787              :                                         Ldata.npos);
    1788           25 :                 data->negate = true;
    1789           25 :                 return TS_YES;
    1790              :             }
    1791           86 :             else if (Rdata.negate)
    1792              :             {
    1793              :                 /* L | !R: treat as !(!L & R) */
    1794            5 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1795              :                                         TSPO_R_ONLY,
    1796              :                                         Loffset, Roffset,
    1797              :                                         Rdata.npos);
    1798            5 :                 data->negate = true;
    1799            5 :                 return TS_YES;
    1800              :             }
    1801              :             else
    1802              :             {
    1803              :                 /* straight OR */
    1804           81 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1805              :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1806              :                                         Loffset, Roffset,
    1807           81 :                                         Ldata.npos + Rdata.npos);
    1808              :             }
    1809              : 
    1810            0 :         default:
    1811            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1812              :     }
    1813              : 
    1814              :     /* not reachable, but keep compiler quiet */
    1815            0 :     return TS_NO;
    1816              : }
    1817              : 
    1818              : 
    1819              : /*
    1820              :  * Evaluate tsquery boolean expression.
    1821              :  *
    1822              :  * curitem: current tsquery item (initially, the first one)
    1823              :  * arg: opaque value to pass through to callback function
    1824              :  * flags: bitmask of flag bits shown in ts_utils.h
    1825              :  * chkcond: callback function to check whether a primitive value is present
    1826              :  */
    1827              : bool
    1828       346294 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1829              :            TSExecuteCallback chkcond)
    1830              : {
    1831              :     /*
    1832              :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1833              :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1834              :      * need to check again.
    1835              :      */
    1836       346294 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1837              : }
    1838              : 
    1839              : /*
    1840              :  * Evaluate tsquery boolean expression.
    1841              :  *
    1842              :  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
    1843              :  */
    1844              : TSTernaryValue
    1845        24628 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
    1846              :                    TSExecuteCallback chkcond)
    1847              : {
    1848        24628 :     return TS_execute_recurse(curitem, arg, flags, chkcond);
    1849              : }
    1850              : 
    1851              : /*
    1852              :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1853              :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1854              :  * operator, we pass it off to TS_phrase_execute which does worry.
    1855              :  */
    1856              : static TSTernaryValue
    1857       702946 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1858              :                    TSExecuteCallback chkcond)
    1859              : {
    1860              :     TSTernaryValue lmatch;
    1861              : 
    1862              :     /* since this function recurses, it could be driven to stack overflow */
    1863       702946 :     check_stack_depth();
    1864              : 
    1865              :     /* ... and let's check for query cancel while we're at it */
    1866       702946 :     CHECK_FOR_INTERRUPTS();
    1867              : 
    1868       702946 :     if (curitem->type == QI_VAL)
    1869       282246 :         return chkcond(arg, (QueryOperand *) curitem,
    1870              :                        NULL /* don't need position info */ );
    1871              : 
    1872       420700 :     switch (curitem->qoperator.oper)
    1873              :     {
    1874       135515 :         case OP_NOT:
    1875       135515 :             if (flags & TS_EXEC_SKIP_NOT)
    1876            0 :                 return TS_YES;
    1877       135515 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1878              :             {
    1879       127862 :                 case TS_NO:
    1880       127862 :                     return TS_YES;
    1881         3264 :                 case TS_YES:
    1882         3264 :                     return TS_NO;
    1883         4389 :                 case TS_MAYBE:
    1884         4389 :                     return TS_MAYBE;
    1885              :             }
    1886            0 :             break;
    1887              : 
    1888        55800 :         case OP_AND:
    1889        55800 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1890              :                                         flags, chkcond);
    1891        55800 :             if (lmatch == TS_NO)
    1892        44283 :                 return TS_NO;
    1893        11517 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1894              :             {
    1895         6755 :                 case TS_NO:
    1896         6755 :                     return TS_NO;
    1897         2236 :                 case TS_YES:
    1898         2236 :                     return lmatch;
    1899         2526 :                 case TS_MAYBE:
    1900         2526 :                     return TS_MAYBE;
    1901              :             }
    1902            0 :             break;
    1903              : 
    1904        72668 :         case OP_OR:
    1905        72668 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1906              :                                         flags, chkcond);
    1907        72668 :             if (lmatch == TS_YES)
    1908        16144 :                 return TS_YES;
    1909        56524 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1910              :             {
    1911        38317 :                 case TS_NO:
    1912        38317 :                     return lmatch;
    1913         4962 :                 case TS_YES:
    1914         4962 :                     return TS_YES;
    1915        13245 :                 case TS_MAYBE:
    1916        13245 :                     return TS_MAYBE;
    1917              :             }
    1918            0 :             break;
    1919              : 
    1920       156717 :         case OP_PHRASE:
    1921              : 
    1922              :             /*
    1923              :              * If we get a MAYBE result, and the caller doesn't want that,
    1924              :              * convert it to NO.  It would be more consistent, perhaps, to
    1925              :              * return the result of TS_phrase_execute() verbatim and then
    1926              :              * convert MAYBE results at the top of the recursion.  But
    1927              :              * converting at the topmost phrase operator gives results that
    1928              :              * are bug-compatible with the old implementation, so do it like
    1929              :              * this for now.
    1930              :              */
    1931       156717 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1932              :             {
    1933       119916 :                 case TS_NO:
    1934       119916 :                     return TS_NO;
    1935        19602 :                 case TS_YES:
    1936        19602 :                     return TS_YES;
    1937        17199 :                 case TS_MAYBE:
    1938        17199 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1939              :             }
    1940            0 :             break;
    1941              : 
    1942            0 :         default:
    1943            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1944              :     }
    1945              : 
    1946              :     /* not reachable, but keep compiler quiet */
    1947            0 :     return TS_NO;
    1948              : }
    1949              : 
    1950              : /*
    1951              :  * Evaluate tsquery and report locations of matching terms.
    1952              :  *
    1953              :  * This is like TS_execute except that it returns match locations not just
    1954              :  * success/failure status.  The callback function is required to provide
    1955              :  * position data (we report failure if it doesn't).
    1956              :  *
    1957              :  * On successful match, the result is a List of ExecPhraseData structs, one
    1958              :  * for each AND'ed term or phrase operator in the query.  Each struct includes
    1959              :  * a sorted array of lexeme positions matching that term.  (Recall that for
    1960              :  * phrase operators, the match includes width+1 lexemes, and the recorded
    1961              :  * position is that of the rightmost lexeme.)
    1962              :  *
    1963              :  * OR subexpressions are handled by union'ing their match locations into a
    1964              :  * single List element, which is valid since any of those locations contains
    1965              :  * a match.  However, when some of the OR'ed terms are phrase operators, we
    1966              :  * report the maximum width of any of the OR'ed terms, making such cases
    1967              :  * slightly imprecise in the conservative direction.  (For example, if the
    1968              :  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
    1969              :  * reported as though it includes the lexeme to the left of C.)
    1970              :  *
    1971              :  * Locations of NOT subexpressions are not reported.  (Obviously, there can
    1972              :  * be no successful NOT matches at top level, or the match would have failed.
    1973              :  * So this amounts to ignoring NOTs underneath ORs.)
    1974              :  *
    1975              :  * The result is NIL if no match, or if position data was not returned.
    1976              :  *
    1977              :  * Arguments are the same as for TS_execute, although flags is currently
    1978              :  * vestigial since none of the defined bits are sensible here.
    1979              :  */
    1980              : List *
    1981          293 : TS_execute_locations(QueryItem *curitem, void *arg,
    1982              :                      uint32 flags,
    1983              :                      TSExecuteCallback chkcond)
    1984              : {
    1985              :     List       *result;
    1986              : 
    1987              :     /* No flags supported, as yet */
    1988              :     Assert(flags == TS_EXEC_EMPTY);
    1989          293 :     if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
    1990          118 :         return result;
    1991          175 :     return NIL;
    1992              : }
    1993              : 
    1994              : /*
    1995              :  * TS_execute_locations recursion for operators above any phrase operator.
    1996              :  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
    1997              :  */
    1998              : static bool
    1999          839 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
    2000              :                              TSExecuteCallback chkcond,
    2001              :                              List **locations)
    2002              : {
    2003              :     bool        lmatch,
    2004              :                 rmatch;
    2005              :     List       *llocations,
    2006              :                *rlocations;
    2007              :     ExecPhraseData *data;
    2008              : 
    2009              :     /* since this function recurses, it could be driven to stack overflow */
    2010          839 :     check_stack_depth();
    2011              : 
    2012              :     /* ... and let's check for query cancel while we're at it */
    2013          839 :     CHECK_FOR_INTERRUPTS();
    2014              : 
    2015              :     /* Default locations result is empty */
    2016          839 :     *locations = NIL;
    2017              : 
    2018          839 :     if (curitem->type == QI_VAL)
    2019              :     {
    2020          359 :         data = palloc0_object(ExecPhraseData);
    2021          359 :         if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
    2022              :         {
    2023          184 :             *locations = list_make1(data);
    2024          184 :             return true;
    2025              :         }
    2026          175 :         pfree(data);
    2027          175 :         return false;
    2028              :     }
    2029              : 
    2030          480 :     switch (curitem->qoperator.oper)
    2031              :     {
    2032           10 :         case OP_NOT:
    2033           10 :             if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
    2034              :                                               &llocations))
    2035            0 :                 return true;    /* we don't pass back any locations */
    2036           10 :             return false;
    2037              : 
    2038          400 :         case OP_AND:
    2039          400 :             if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2040              :                                               arg, chkcond,
    2041              :                                               &llocations))
    2042          304 :                 return false;
    2043           96 :             if (!TS_execute_locations_recurse(curitem + 1,
    2044              :                                               arg, chkcond,
    2045              :                                               &rlocations))
    2046           41 :                 return false;
    2047           55 :             *locations = list_concat(llocations, rlocations);
    2048           55 :             return true;
    2049              : 
    2050           20 :         case OP_OR:
    2051           20 :             lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2052              :                                                   arg, chkcond,
    2053              :                                                   &llocations);
    2054           20 :             rmatch = TS_execute_locations_recurse(curitem + 1,
    2055              :                                                   arg, chkcond,
    2056              :                                                   &rlocations);
    2057           20 :             if (lmatch || rmatch)
    2058              :             {
    2059              :                 /*
    2060              :                  * We generate an AND'able location struct from each
    2061              :                  * combination of sub-matches, following the disjunctive law
    2062              :                  * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
    2063              :                  *
    2064              :                  * However, if either input didn't produce locations (i.e., it
    2065              :                  * failed or was a NOT), we must just return the other list.
    2066              :                  */
    2067           20 :                 if (llocations == NIL)
    2068            0 :                     *locations = rlocations;
    2069           20 :                 else if (rlocations == NIL)
    2070           10 :                     *locations = llocations;
    2071              :                 else
    2072              :                 {
    2073              :                     ListCell   *ll;
    2074              : 
    2075           20 :                     foreach(ll, llocations)
    2076              :                     {
    2077           10 :                         ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
    2078              :                         ListCell   *lr;
    2079              : 
    2080           20 :                         foreach(lr, rlocations)
    2081              :                         {
    2082           10 :                             ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
    2083              : 
    2084           10 :                             data = palloc0_object(ExecPhraseData);
    2085           10 :                             (void) TS_phrase_output(data, ldata, rdata,
    2086              :                                                     TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    2087              :                                                     0, 0,
    2088           10 :                                                     ldata->npos + rdata->npos);
    2089              :                             /* Report the larger width, as explained above. */
    2090           10 :                             data->width = Max(ldata->width, rdata->width);
    2091           10 :                             *locations = lappend(*locations, data);
    2092              :                         }
    2093              :                     }
    2094              :                 }
    2095              : 
    2096           20 :                 return true;
    2097              :             }
    2098            0 :             return false;
    2099              : 
    2100           50 :         case OP_PHRASE:
    2101              :             /* We can hand this off to TS_phrase_execute */
    2102           50 :             data = palloc0_object(ExecPhraseData);
    2103           50 :             if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
    2104              :                                   data) == TS_YES)
    2105              :             {
    2106           50 :                 if (!data->negate)
    2107           50 :                     *locations = list_make1(data);
    2108           50 :                 return true;
    2109              :             }
    2110            0 :             pfree(data);
    2111            0 :             return false;
    2112              : 
    2113            0 :         default:
    2114            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2115              :     }
    2116              : 
    2117              :     /* not reachable, but keep compiler quiet */
    2118              :     return false;
    2119              : }
    2120              : 
    2121              : /*
    2122              :  * Detect whether a tsquery boolean expression requires any positive matches
    2123              :  * to values shown in the tsquery.
    2124              :  *
    2125              :  * This is needed to know whether a GIN index search requires full index scan.
    2126              :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    2127              :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    2128              :  */
    2129              : bool
    2130          631 : tsquery_requires_match(QueryItem *curitem)
    2131              : {
    2132              :     /* since this function recurses, it could be driven to stack overflow */
    2133          631 :     check_stack_depth();
    2134              : 
    2135          631 :     if (curitem->type == QI_VAL)
    2136          301 :         return true;
    2137              : 
    2138          330 :     switch (curitem->qoperator.oper)
    2139              :     {
    2140          127 :         case OP_NOT:
    2141              : 
    2142              :             /*
    2143              :              * Assume there are no required matches underneath a NOT.  For
    2144              :              * some cases with nested NOTs, we could prove there's a required
    2145              :              * match, but it seems unlikely to be worth the trouble.
    2146              :              */
    2147          127 :             return false;
    2148              : 
    2149          153 :         case OP_PHRASE:
    2150              : 
    2151              :             /*
    2152              :              * Treat OP_PHRASE as OP_AND here
    2153              :              */
    2154              :         case OP_AND:
    2155              :             /* If either side requires a match, we're good */
    2156          153 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2157          117 :                 return true;
    2158              :             else
    2159           36 :                 return tsquery_requires_match(curitem + 1);
    2160              : 
    2161           50 :         case OP_OR:
    2162              :             /* Both sides must require a match */
    2163           50 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2164           50 :                 return tsquery_requires_match(curitem + 1);
    2165              :             else
    2166            0 :                 return false;
    2167              : 
    2168            0 :         default:
    2169            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2170              :     }
    2171              : 
    2172              :     /* not reachable, but keep compiler quiet */
    2173              :     return false;
    2174              : }
    2175              : 
    2176              : /*
    2177              :  * boolean operations
    2178              :  */
    2179              : Datum
    2180           40 : ts_match_qv(PG_FUNCTION_ARGS)
    2181              : {
    2182           40 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    2183              :                                         PG_GETARG_DATUM(1),
    2184              :                                         PG_GETARG_DATUM(0)));
    2185              : }
    2186              : 
    2187              : Datum
    2188       146808 : ts_match_vq(PG_FUNCTION_ARGS)
    2189              : {
    2190       146808 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    2191       146808 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2192              :     CHKVAL      chkval;
    2193              :     bool        result;
    2194              : 
    2195              :     /* empty query matches nothing */
    2196       146808 :     if (!query->size)
    2197              :     {
    2198            0 :         PG_FREE_IF_COPY(val, 0);
    2199            0 :         PG_FREE_IF_COPY(query, 1);
    2200            0 :         PG_RETURN_BOOL(false);
    2201              :     }
    2202              : 
    2203       146808 :     chkval.arrb = ARRPTR(val);
    2204       146808 :     chkval.arre = chkval.arrb + val->size;
    2205       146808 :     chkval.values = STRPTR(val);
    2206       146808 :     chkval.operand = GETOPERAND(query);
    2207       146808 :     result = TS_execute(GETQUERY(query),
    2208              :                         &chkval,
    2209              :                         TS_EXEC_EMPTY,
    2210              :                         checkcondition_str);
    2211              : 
    2212       146808 :     PG_FREE_IF_COPY(val, 0);
    2213       146808 :     PG_FREE_IF_COPY(query, 1);
    2214       146808 :     PG_RETURN_BOOL(result);
    2215              : }
    2216              : 
    2217              : Datum
    2218            0 : ts_match_tt(PG_FUNCTION_ARGS)
    2219              : {
    2220              :     TSVector    vector;
    2221              :     TSQuery     query;
    2222              :     bool        res;
    2223              : 
    2224            0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2225              :                                                   PG_GETARG_DATUM(0)));
    2226            0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2227              :                                                 PG_GETARG_DATUM(1)));
    2228              : 
    2229            0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2230              :                                            TSVectorGetDatum(vector),
    2231              :                                            TSQueryGetDatum(query)));
    2232              : 
    2233            0 :     pfree(vector);
    2234            0 :     pfree(query);
    2235              : 
    2236            0 :     PG_RETURN_BOOL(res);
    2237              : }
    2238              : 
    2239              : Datum
    2240            0 : ts_match_tq(PG_FUNCTION_ARGS)
    2241              : {
    2242              :     TSVector    vector;
    2243            0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2244              :     bool        res;
    2245              : 
    2246            0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2247              :                                                   PG_GETARG_DATUM(0)));
    2248              : 
    2249            0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2250              :                                            TSVectorGetDatum(vector),
    2251              :                                            TSQueryGetDatum(query)));
    2252              : 
    2253            0 :     pfree(vector);
    2254            0 :     PG_FREE_IF_COPY(query, 1);
    2255              : 
    2256            0 :     PG_RETURN_BOOL(res);
    2257              : }
    2258              : 
    2259              : /*
    2260              :  * ts_stat statistic function support
    2261              :  */
    2262              : 
    2263              : 
    2264              : /*
    2265              :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2266              :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2267              :  */
    2268              : static int
    2269         5452 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2270              : {
    2271         5452 :     int         len = POSDATALEN(txt, wptr);
    2272         5452 :     int         num = 0;
    2273         5452 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2274              : 
    2275        11100 :     while (len--)
    2276              :     {
    2277         5648 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2278            8 :             num++;
    2279         5648 :         ptr++;
    2280              :     }
    2281         5452 :     return num;
    2282              : }
    2283              : 
    2284              : #define compareStatWord(a,e,t)                          \
    2285              :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2286              :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2287              :                     false)
    2288              : 
    2289              : static void
    2290       230416 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2291              : {
    2292       230416 :     WordEntry  *we = ARRPTR(txt) + off;
    2293       230416 :     StatEntry  *node = stat->root,
    2294       230416 :                *pnode = NULL;
    2295              :     int         n,
    2296       230416 :                 res = 0;
    2297       230416 :     uint32      depth = 1;
    2298              : 
    2299       230416 :     if (stat->weight == 0)
    2300       115208 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2301              :     else
    2302       115208 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2303              : 
    2304       230416 :     if (n == 0)
    2305       115204 :         return;                 /* nothing to insert */
    2306              : 
    2307      1163592 :     while (node)
    2308              :     {
    2309      1159016 :         res = compareStatWord(node, we, txt);
    2310              : 
    2311      1159016 :         if (res == 0)
    2312              :         {
    2313       110636 :             break;
    2314              :         }
    2315              :         else
    2316              :         {
    2317      1048380 :             pnode = node;
    2318      1048380 :             node = (res < 0) ? node->left : node->right;
    2319              :         }
    2320      1048380 :         depth++;
    2321              :     }
    2322              : 
    2323       115212 :     if (depth > stat->maxdepth)
    2324           84 :         stat->maxdepth = depth;
    2325              : 
    2326       115212 :     if (node == NULL)
    2327              :     {
    2328         4576 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2329         4576 :         node->left = node->right = NULL;
    2330         4576 :         node->ndoc = 1;
    2331         4576 :         node->nentry = n;
    2332         4576 :         node->lenlexeme = we->len;
    2333         4576 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2334              : 
    2335         4576 :         if (pnode == NULL)
    2336              :         {
    2337            8 :             stat->root = node;
    2338              :         }
    2339              :         else
    2340              :         {
    2341         4568 :             if (res < 0)
    2342         2254 :                 pnode->left = node;
    2343              :             else
    2344         2314 :                 pnode->right = node;
    2345              :         }
    2346              :     }
    2347              :     else
    2348              :     {
    2349       110636 :         node->ndoc++;
    2350       110636 :         node->nentry += n;
    2351              :     }
    2352              : }
    2353              : 
    2354              : static void
    2355       330256 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2356              :                     uint32 low, uint32 high, uint32 offset)
    2357              : {
    2358              :     uint32      pos;
    2359       330256 :     uint32      middle = (low + high) >> 1;
    2360              : 
    2361       330256 :     pos = (low + middle) >> 1;
    2362       330256 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2363       113552 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2364       330256 :     pos = (high + middle + 1) >> 1;
    2365       330256 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2366       112856 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2367              : 
    2368       330256 :     if (low != middle)
    2369       165128 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2370       330256 :     if (high != middle + 1)
    2371       161120 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2372       330256 : }
    2373              : 
    2374              : /*
    2375              :  * This is written like a custom aggregate function, because the
    2376              :  * original plan was to do just that. Unfortunately, an aggregate function
    2377              :  * can't return a set, so that plan was abandoned. If that limitation is
    2378              :  * lifted in the future, ts_stat could be a real aggregate function so that
    2379              :  * you could use it like this:
    2380              :  *
    2381              :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2382              :  *
    2383              :  *  where vector_column is a tsvector-type column in vector_table.
    2384              :  */
    2385              : 
    2386              : static TSVectorStat *
    2387         4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2388              : {
    2389         4072 :     TSVector    txt = DatumGetTSVector(data);
    2390              :     uint32      i,
    2391         4072 :                 nbit = 0,
    2392              :                 offset;
    2393              : 
    2394         4072 :     if (stat == NULL)
    2395              :     {                           /* Init in first */
    2396            0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2397            0 :         stat->maxdepth = 1;
    2398              :     }
    2399              : 
    2400              :     /* simple check of correctness */
    2401         4072 :     if (txt == NULL || txt->size == 0)
    2402              :     {
    2403           64 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2404           64 :             pfree(txt);
    2405           64 :         return stat;
    2406              :     }
    2407              : 
    2408         4008 :     i = txt->size - 1;
    2409        28480 :     for (; i > 0; i >>= 1)
    2410        24472 :         nbit++;
    2411              : 
    2412         4008 :     nbit = 1 << nbit;
    2413         4008 :     offset = (nbit - txt->size) / 2;
    2414              : 
    2415         4008 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2416         4008 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2417              : 
    2418         4008 :     return stat;
    2419              : }
    2420              : 
    2421              : static void
    2422            8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2423              :                    TSVectorStat *stat)
    2424              : {
    2425              :     TupleDesc   tupdesc;
    2426              :     MemoryContext oldcontext;
    2427              :     StatEntry  *node;
    2428              : 
    2429            8 :     funcctx->user_fctx = stat;
    2430              : 
    2431            8 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2432              : 
    2433            8 :     stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
    2434            8 :     stat->stackpos = 0;
    2435              : 
    2436            8 :     node = stat->root;
    2437              :     /* find leftmost value */
    2438            8 :     if (node == NULL)
    2439            0 :         stat->stack[stat->stackpos] = NULL;
    2440              :     else
    2441              :         for (;;)
    2442              :         {
    2443           32 :             stat->stack[stat->stackpos] = node;
    2444           32 :             if (node->left)
    2445              :             {
    2446           24 :                 stat->stackpos++;
    2447           24 :                 node = node->left;
    2448              :             }
    2449              :             else
    2450            8 :                 break;
    2451              :         }
    2452              :     Assert(stat->stackpos <= stat->maxdepth);
    2453              : 
    2454            8 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2455            0 :         elog(ERROR, "return type must be a row type");
    2456            8 :     funcctx->tuple_desc = tupdesc;
    2457            8 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2458              : 
    2459            8 :     MemoryContextSwitchTo(oldcontext);
    2460            8 : }
    2461              : 
    2462              : static StatEntry *
    2463         9152 : walkStatEntryTree(TSVectorStat *stat)
    2464              : {
    2465         9152 :     StatEntry  *node = stat->stack[stat->stackpos];
    2466              : 
    2467         9152 :     if (node == NULL)
    2468            0 :         return NULL;
    2469              : 
    2470         9152 :     if (node->ndoc != 0)
    2471              :     {
    2472              :         /* return entry itself: we already was at left sublink */
    2473         2262 :         return node;
    2474              :     }
    2475         6890 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2476              :     {
    2477              :         /* go on right sublink */
    2478         2314 :         stat->stackpos++;
    2479         2314 :         node = node->right;
    2480              : 
    2481              :         /* find most-left value */
    2482              :         for (;;)
    2483              :         {
    2484         4544 :             stat->stack[stat->stackpos] = node;
    2485         4544 :             if (node->left)
    2486              :             {
    2487         2230 :                 stat->stackpos++;
    2488         2230 :                 node = node->left;
    2489              :             }
    2490              :             else
    2491         2314 :                 break;
    2492              :         }
    2493         2314 :         Assert(stat->stackpos <= stat->maxdepth);
    2494              :     }
    2495              :     else
    2496              :     {
    2497              :         /* we already return all left subtree, itself and  right subtree */
    2498         4576 :         if (stat->stackpos == 0)
    2499            8 :             return NULL;
    2500              : 
    2501         4568 :         stat->stackpos--;
    2502         4568 :         return walkStatEntryTree(stat);
    2503              :     }
    2504              : 
    2505         2314 :     return node;
    2506              : }
    2507              : 
    2508              : static Datum
    2509         4584 : ts_process_call(FuncCallContext *funcctx)
    2510              : {
    2511              :     TSVectorStat *st;
    2512              :     StatEntry  *entry;
    2513              : 
    2514         4584 :     st = (TSVectorStat *) funcctx->user_fctx;
    2515              : 
    2516         4584 :     entry = walkStatEntryTree(st);
    2517              : 
    2518         4584 :     if (entry != NULL)
    2519              :     {
    2520              :         Datum       result;
    2521              :         char       *values[3];
    2522              :         char        ndoc[16];
    2523              :         char        nentry[16];
    2524              :         HeapTuple   tuple;
    2525              : 
    2526         4576 :         values[0] = palloc(entry->lenlexeme + 1);
    2527         4576 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2528         4576 :         (values[0])[entry->lenlexeme] = '\0';
    2529         4576 :         sprintf(ndoc, "%d", entry->ndoc);
    2530         4576 :         values[1] = ndoc;
    2531         4576 :         sprintf(nentry, "%d", entry->nentry);
    2532         4576 :         values[2] = nentry;
    2533              : 
    2534         4576 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2535         4576 :         result = HeapTupleGetDatum(tuple);
    2536              : 
    2537         4576 :         pfree(values[0]);
    2538              : 
    2539              :         /* mark entry as already visited */
    2540         4576 :         entry->ndoc = 0;
    2541              : 
    2542         4576 :         return result;
    2543              :     }
    2544              : 
    2545            8 :     return (Datum) 0;
    2546              : }
    2547              : 
    2548              : static TSVectorStat *
    2549            8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2550              : {
    2551            8 :     char       *query = text_to_cstring(txt);
    2552              :     TSVectorStat *stat;
    2553              :     bool        isnull;
    2554              :     Portal      portal;
    2555              :     SPIPlanPtr  plan;
    2556              : 
    2557            8 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2558              :         /* internal error */
    2559            0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2560              : 
    2561            8 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2562              :         /* internal error */
    2563            0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2564              : 
    2565            8 :     SPI_cursor_fetch(portal, true, 100);
    2566              : 
    2567            8 :     if (SPI_tuptable == NULL ||
    2568            8 :         SPI_tuptable->tupdesc->natts != 1 ||
    2569            8 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2570              :                            TSVECTOROID))
    2571            0 :         ereport(ERROR,
    2572              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2573              :                  errmsg("ts_stat query must return one tsvector column")));
    2574              : 
    2575            8 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2576            8 :     stat->maxdepth = 1;
    2577              : 
    2578            8 :     if (ws)
    2579              :     {
    2580              :         char       *buf;
    2581              :         const char *end;
    2582              : 
    2583            4 :         buf = VARDATA_ANY(ws);
    2584            4 :         end = buf + VARSIZE_ANY_EXHDR(ws);
    2585           12 :         while (buf < end)
    2586              :         {
    2587            8 :             int         len = pg_mblen_range(buf, end);
    2588              : 
    2589            8 :             if (len == 1)
    2590              :             {
    2591            8 :                 switch (*buf)
    2592              :                 {
    2593            4 :                     case 'A':
    2594              :                     case 'a':
    2595            4 :                         stat->weight |= 1 << 3;
    2596            4 :                         break;
    2597            4 :                     case 'B':
    2598              :                     case 'b':
    2599            4 :                         stat->weight |= 1 << 2;
    2600            4 :                         break;
    2601            0 :                     case 'C':
    2602              :                     case 'c':
    2603            0 :                         stat->weight |= 1 << 1;
    2604            0 :                         break;
    2605            0 :                     case 'D':
    2606              :                     case 'd':
    2607            0 :                         stat->weight |= 1;
    2608            0 :                         break;
    2609            0 :                     default:
    2610            0 :                         stat->weight |= 0;
    2611              :                 }
    2612              :             }
    2613            8 :             buf += len;
    2614              :         }
    2615              :     }
    2616              : 
    2617           56 :     while (SPI_processed > 0)
    2618              :     {
    2619              :         uint64      i;
    2620              : 
    2621         4120 :         for (i = 0; i < SPI_processed; i++)
    2622              :         {
    2623         4072 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2624              : 
    2625         4072 :             if (!isnull)
    2626         4072 :                 stat = ts_accum(persistentContext, stat, data);
    2627              :         }
    2628              : 
    2629           48 :         SPI_freetuptable(SPI_tuptable);
    2630           48 :         SPI_cursor_fetch(portal, true, 100);
    2631              :     }
    2632              : 
    2633            8 :     SPI_freetuptable(SPI_tuptable);
    2634            8 :     SPI_cursor_close(portal);
    2635            8 :     SPI_freeplan(plan);
    2636            8 :     pfree(query);
    2637              : 
    2638            8 :     return stat;
    2639              : }
    2640              : 
    2641              : Datum
    2642         4576 : ts_stat1(PG_FUNCTION_ARGS)
    2643              : {
    2644              :     FuncCallContext *funcctx;
    2645              :     Datum       result;
    2646              : 
    2647         4576 :     if (SRF_IS_FIRSTCALL())
    2648              :     {
    2649              :         TSVectorStat *stat;
    2650            4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2651              : 
    2652            4 :         funcctx = SRF_FIRSTCALL_INIT();
    2653            4 :         SPI_connect();
    2654            4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2655            4 :         PG_FREE_IF_COPY(txt, 0);
    2656            4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2657            4 :         SPI_finish();
    2658              :     }
    2659              : 
    2660         4576 :     funcctx = SRF_PERCALL_SETUP();
    2661         4576 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2662         4572 :         SRF_RETURN_NEXT(funcctx, result);
    2663            4 :     SRF_RETURN_DONE(funcctx);
    2664              : }
    2665              : 
    2666              : Datum
    2667            8 : ts_stat2(PG_FUNCTION_ARGS)
    2668              : {
    2669              :     FuncCallContext *funcctx;
    2670              :     Datum       result;
    2671              : 
    2672            8 :     if (SRF_IS_FIRSTCALL())
    2673              :     {
    2674              :         TSVectorStat *stat;
    2675            4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2676            4 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2677              : 
    2678            4 :         funcctx = SRF_FIRSTCALL_INIT();
    2679            4 :         SPI_connect();
    2680            4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2681            4 :         PG_FREE_IF_COPY(txt, 0);
    2682            4 :         PG_FREE_IF_COPY(ws, 1);
    2683            4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2684            4 :         SPI_finish();
    2685              :     }
    2686              : 
    2687            8 :     funcctx = SRF_PERCALL_SETUP();
    2688            8 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2689            4 :         SRF_RETURN_NEXT(funcctx, result);
    2690            4 :     SRF_RETURN_DONE(funcctx);
    2691              : }
    2692              : 
    2693              : 
    2694              : /*
    2695              :  * Triggers for automatic update of a tsvector column from text column(s)
    2696              :  *
    2697              :  * Trigger arguments are either
    2698              :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2699              :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2700              :  * ie, tsconfig can either be specified by name, or indirectly as the
    2701              :  * contents of a regconfig field in the row.  If the name is used, it must
    2702              :  * be explicitly schema-qualified.
    2703              :  */
    2704              : Datum
    2705           12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2706              : {
    2707           12 :     return tsvector_update_trigger(fcinfo, false);
    2708              : }
    2709              : 
    2710              : Datum
    2711            0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2712              : {
    2713            0 :     return tsvector_update_trigger(fcinfo, true);
    2714              : }
    2715              : 
    2716              : static Datum
    2717           12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2718              : {
    2719              :     TriggerData *trigdata;
    2720              :     Trigger    *trigger;
    2721              :     Relation    rel;
    2722           12 :     HeapTuple   rettuple = NULL;
    2723              :     int         tsvector_attr_num,
    2724              :                 i;
    2725              :     ParsedText  prs;
    2726              :     Datum       datum;
    2727              :     bool        isnull;
    2728              :     text       *txt;
    2729              :     Oid         cfgId;
    2730              :     bool        update_needed;
    2731              : 
    2732              :     /* Check call context */
    2733           12 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2734            0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2735              : 
    2736           12 :     trigdata = (TriggerData *) fcinfo->context;
    2737           12 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2738            0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2739           12 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2740            0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2741              : 
    2742           12 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2743              :     {
    2744            8 :         rettuple = trigdata->tg_trigtuple;
    2745            8 :         update_needed = true;
    2746              :     }
    2747            4 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2748              :     {
    2749            4 :         rettuple = trigdata->tg_newtuple;
    2750            4 :         update_needed = false;  /* computed below */
    2751              :     }
    2752              :     else
    2753            0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2754              : 
    2755           12 :     trigger = trigdata->tg_trigger;
    2756           12 :     rel = trigdata->tg_relation;
    2757              : 
    2758           12 :     if (trigger->tgnargs < 3)
    2759            0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2760              : 
    2761              :     /* Find the target tsvector column */
    2762           12 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2763           12 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2764            0 :         ereport(ERROR,
    2765              :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2766              :                  errmsg("tsvector column \"%s\" does not exist",
    2767              :                         trigger->tgargs[0])));
    2768              :     /* This will effectively reject system columns, so no separate test: */
    2769           12 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2770              :                            TSVECTOROID))
    2771            0 :         ereport(ERROR,
    2772              :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2773              :                  errmsg("column \"%s\" is not of tsvector type",
    2774              :                         trigger->tgargs[0])));
    2775              : 
    2776              :     /* Find the configuration to use */
    2777           12 :     if (config_column)
    2778              :     {
    2779              :         int         config_attr_num;
    2780              : 
    2781            0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2782            0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2783            0 :             ereport(ERROR,
    2784              :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2785              :                      errmsg("configuration column \"%s\" does not exist",
    2786              :                             trigger->tgargs[1])));
    2787            0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2788              :                                REGCONFIGOID))
    2789            0 :             ereport(ERROR,
    2790              :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2791              :                      errmsg("column \"%s\" is not of regconfig type",
    2792              :                             trigger->tgargs[1])));
    2793              : 
    2794            0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2795            0 :         if (isnull)
    2796            0 :             ereport(ERROR,
    2797              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2798              :                      errmsg("configuration column \"%s\" must not be null",
    2799              :                             trigger->tgargs[1])));
    2800            0 :         cfgId = DatumGetObjectId(datum);
    2801              :     }
    2802              :     else
    2803              :     {
    2804              :         List       *names;
    2805              : 
    2806           12 :         names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
    2807              :         /* require a schema so that results are not search path dependent */
    2808           12 :         if (list_length(names) < 2)
    2809            0 :             ereport(ERROR,
    2810              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2811              :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2812              :                             trigger->tgargs[1])));
    2813           12 :         cfgId = get_ts_config_oid(names, false);
    2814              :     }
    2815              : 
    2816              :     /* initialize parse state */
    2817           12 :     prs.lenwords = 32;
    2818           12 :     prs.curwords = 0;
    2819           12 :     prs.pos = 0;
    2820           12 :     prs.words = palloc_array(ParsedWord, prs.lenwords);
    2821              : 
    2822              :     /* find all words in indexable column(s) */
    2823           24 :     for (i = 2; i < trigger->tgnargs; i++)
    2824              :     {
    2825              :         int         numattr;
    2826              : 
    2827           12 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2828           12 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2829            0 :             ereport(ERROR,
    2830              :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2831              :                      errmsg("column \"%s\" does not exist",
    2832              :                             trigger->tgargs[i])));
    2833           12 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2834            0 :             ereport(ERROR,
    2835              :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2836              :                      errmsg("column \"%s\" is not of a character type",
    2837              :                             trigger->tgargs[i])));
    2838              : 
    2839           12 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2840            4 :             update_needed = true;
    2841              : 
    2842           12 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2843           12 :         if (isnull)
    2844            4 :             continue;
    2845              : 
    2846            8 :         txt = DatumGetTextPP(datum);
    2847              : 
    2848            8 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2849              : 
    2850            8 :         if (txt != (text *) DatumGetPointer(datum))
    2851            0 :             pfree(txt);
    2852              :     }
    2853              : 
    2854           12 :     if (update_needed)
    2855              :     {
    2856              :         /* make tsvector value */
    2857           12 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2858           12 :         isnull = false;
    2859              : 
    2860              :         /* and insert it into tuple */
    2861           12 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2862              :                                              1, &tsvector_attr_num,
    2863              :                                              &datum, &isnull);
    2864              : 
    2865           12 :         pfree(DatumGetPointer(datum));
    2866              :     }
    2867              : 
    2868           12 :     return PointerGetDatum(rettuple);
    2869              : }
        

Generated by: LCOV version 2.0-1