LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.2 % 1173 1023
Test Date: 2026-03-12 03:15:11 Functions: 84.6 % 52 44
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * tsvector_op.c
       4              :  *    operations over tsvector
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/utils/adt/tsvector_op.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : #include "postgres.h"
      15              : 
      16              : #include <limits.h>
      17              : 
      18              : #include "access/htup_details.h"
      19              : #include "catalog/namespace.h"
      20              : #include "catalog/pg_type.h"
      21              : #include "commands/trigger.h"
      22              : #include "common/int.h"
      23              : #include "executor/spi.h"
      24              : #include "funcapi.h"
      25              : #include "lib/qunique.h"
      26              : #include "mb/pg_wchar.h"
      27              : #include "miscadmin.h"
      28              : #include "parser/parse_coerce.h"
      29              : #include "tsearch/ts_utils.h"
      30              : #include "utils/array.h"
      31              : #include "utils/builtins.h"
      32              : #include "utils/regproc.h"
      33              : #include "utils/rel.h"
      34              : 
      35              : 
      36              : typedef struct
      37              : {
      38              :     WordEntry  *arrb;
      39              :     WordEntry  *arre;
      40              :     char       *values;
      41              :     char       *operand;
      42              : } CHKVAL;
      43              : 
      44              : 
      45              : typedef struct StatEntry
      46              : {
      47              :     uint32      ndoc;           /* zero indicates that we were already here
      48              :                                  * while walking through the tree */
      49              :     uint32      nentry;
      50              :     struct StatEntry *left;
      51              :     struct StatEntry *right;
      52              :     uint32      lenlexeme;
      53              :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54              : } StatEntry;
      55              : 
      56              : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57              : 
      58              : typedef struct
      59              : {
      60              :     int32       weight;
      61              : 
      62              :     uint32      maxdepth;
      63              : 
      64              :     StatEntry **stack;
      65              :     uint32      stackpos;
      66              : 
      67              :     StatEntry  *root;
      68              : } TSVectorStat;
      69              : 
      70              : 
      71              : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      72              :                                          uint32 flags,
      73              :                                          TSExecuteCallback chkcond);
      74              : static bool TS_execute_locations_recurse(QueryItem *curitem,
      75              :                                          void *arg,
      76              :                                          TSExecuteCallback chkcond,
      77              :                                          List **locations);
      78              : static int  tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len);
      79              : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      80              : 
      81              : 
      82              : /*
      83              :  * Order: haspos, len, word, for all positions (pos, weight)
      84              :  */
      85              : static int
      86          175 : silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
      87              : {
      88          175 :     if (VARSIZE(a) < VARSIZE(b))
      89            0 :         return -1;
      90          175 :     else if (VARSIZE(a) > VARSIZE(b))
      91            0 :         return 1;
      92          175 :     else if (a->size < b->size)
      93            0 :         return -1;
      94          175 :     else if (a->size > b->size)
      95            0 :         return 1;
      96              :     else
      97              :     {
      98          175 :         const WordEntry *aptr = ARRPTR(a);
      99          175 :         const WordEntry *bptr = ARRPTR(b);
     100          175 :         int         i = 0;
     101              :         int         res;
     102              : 
     103              : 
     104          202 :         for (i = 0; i < a->size; i++)
     105              :         {
     106          177 :             if (aptr->haspos != bptr->haspos)
     107              :             {
     108            0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     109              :             }
     110          177 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     111              :             {
     112          150 :                 return res;
     113              :             }
     114           27 :             else if (aptr->haspos)
     115              :             {
     116           24 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     117           24 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     118              :                 int         j;
     119              : 
     120           24 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     121            0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     122              : 
     123           48 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     124              :                 {
     125           24 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     126              :                     {
     127            0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     128              :                     }
     129           24 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     130              :                     {
     131            0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     132              :                     }
     133           24 :                     ap++, bp++;
     134              :                 }
     135              :             }
     136              : 
     137           27 :             aptr++;
     138           27 :             bptr++;
     139              :         }
     140              :     }
     141              : 
     142           25 :     return 0;
     143              : }
     144              : 
     145              : #define TSVECTORCMPFUNC( type, action, ret )            \
     146              : Datum                                                   \
     147              : tsvector_##type(PG_FUNCTION_ARGS)                       \
     148              : {                                                       \
     149              :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     150              :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     151              :     int         res = silly_cmp_tsvector(a, b);         \
     152              :     PG_FREE_IF_COPY(a,0);                               \
     153              :     PG_FREE_IF_COPY(b,1);                               \
     154              :     PG_RETURN_##ret( res action 0 );                    \
     155              : }   \
     156              : /* keep compiler quiet - no extra ; */                  \
     157              : extern int no_such_variable
     158              : 
     159            0 : TSVECTORCMPFUNC(lt, <, BOOL);
     160            0 : TSVECTORCMPFUNC(le, <=, BOOL);
     161            1 : TSVECTORCMPFUNC(eq, ==, BOOL);
     162            0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     163            0 : TSVECTORCMPFUNC(gt, >, BOOL);
     164            0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     165          174 : TSVECTORCMPFUNC(cmp, +, INT32);
     166              : 
     167              : Datum
     168           45 : tsvector_strip(PG_FUNCTION_ARGS)
     169              : {
     170           45 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     171              :     TSVector    out;
     172              :     int         i,
     173           45 :                 len = 0;
     174           45 :     WordEntry  *arrin = ARRPTR(in),
     175              :                *arrout;
     176              :     char       *cur;
     177              : 
     178          159 :     for (i = 0; i < in->size; i++)
     179          114 :         len += arrin[i].len;
     180              : 
     181           45 :     len = CALCDATASIZE(in->size, len);
     182           45 :     out = (TSVector) palloc0(len);
     183           45 :     SET_VARSIZE(out, len);
     184           45 :     out->size = in->size;
     185           45 :     arrout = ARRPTR(out);
     186           45 :     cur = STRPTR(out);
     187          159 :     for (i = 0; i < in->size; i++)
     188              :     {
     189          114 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     190          114 :         arrout[i].haspos = 0;
     191          114 :         arrout[i].len = arrin[i].len;
     192          114 :         arrout[i].pos = cur - STRPTR(out);
     193          114 :         cur += arrout[i].len;
     194              :     }
     195              : 
     196           45 :     PG_FREE_IF_COPY(in, 0);
     197           45 :     PG_RETURN_POINTER(out);
     198              : }
     199              : 
     200              : Datum
     201            5 : tsvector_length(PG_FUNCTION_ARGS)
     202              : {
     203            5 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     204            5 :     int32       ret = in->size;
     205              : 
     206            5 :     PG_FREE_IF_COPY(in, 0);
     207            5 :     PG_RETURN_INT32(ret);
     208              : }
     209              : 
     210              : Datum
     211            6 : tsvector_setweight(PG_FUNCTION_ARGS)
     212              : {
     213            6 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     214            6 :     char        cw = PG_GETARG_CHAR(1);
     215              :     TSVector    out;
     216              :     int         i,
     217              :                 j;
     218              :     WordEntry  *entry;
     219              :     WordEntryPos *p;
     220            6 :     int         w = 0;
     221              : 
     222            6 :     switch (cw)
     223              :     {
     224            0 :         case 'A':
     225              :         case 'a':
     226            0 :             w = 3;
     227            0 :             break;
     228            0 :         case 'B':
     229              :         case 'b':
     230            0 :             w = 2;
     231            0 :             break;
     232            6 :         case 'C':
     233              :         case 'c':
     234            6 :             w = 1;
     235            6 :             break;
     236            0 :         case 'D':
     237              :         case 'd':
     238            0 :             w = 0;
     239            0 :             break;
     240            0 :         default:
     241              :             /* internal error */
     242            0 :             elog(ERROR, "unrecognized weight: %d", cw);
     243              :     }
     244              : 
     245            6 :     out = (TSVector) palloc(VARSIZE(in));
     246            6 :     memcpy(out, in, VARSIZE(in));
     247            6 :     entry = ARRPTR(out);
     248            6 :     i = out->size;
     249           30 :     while (i--)
     250              :     {
     251           24 :         if ((j = POSDATALEN(out, entry)) != 0)
     252              :         {
     253           24 :             p = POSDATAPTR(out, entry);
     254           84 :             while (j--)
     255              :             {
     256           60 :                 WEP_SETWEIGHT(*p, w);
     257           60 :                 p++;
     258              :             }
     259              :         }
     260           24 :         entry++;
     261              :     }
     262              : 
     263            6 :     PG_FREE_IF_COPY(in, 0);
     264            6 :     PG_RETURN_POINTER(out);
     265              : }
     266              : 
     267              : /*
     268              :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     269              :  *
     270              :  * Assign weight w to elements of tsin that are listed in lexemes.
     271              :  */
     272              : Datum
     273           12 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     274              : {
     275           12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     276           12 :     char        char_weight = PG_GETARG_CHAR(1);
     277           12 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     278              : 
     279              :     TSVector    tsout;
     280              :     int         i,
     281              :                 j,
     282              :                 nlexemes,
     283              :                 weight;
     284              :     WordEntry  *entry;
     285              :     Datum      *dlexemes;
     286              :     bool       *nulls;
     287              : 
     288           12 :     switch (char_weight)
     289              :     {
     290            0 :         case 'A':
     291              :         case 'a':
     292            0 :             weight = 3;
     293            0 :             break;
     294            0 :         case 'B':
     295              :         case 'b':
     296            0 :             weight = 2;
     297            0 :             break;
     298           12 :         case 'C':
     299              :         case 'c':
     300           12 :             weight = 1;
     301           12 :             break;
     302            0 :         case 'D':
     303              :         case 'd':
     304            0 :             weight = 0;
     305            0 :             break;
     306            0 :         default:
     307              :             /* internal error */
     308            0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     309              :     }
     310              : 
     311           12 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     312           12 :     memcpy(tsout, tsin, VARSIZE(tsin));
     313           12 :     entry = ARRPTR(tsout);
     314              : 
     315           12 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
     316              : 
     317              :     /*
     318              :      * Assuming that lexemes array is significantly shorter than tsvector we
     319              :      * can iterate through lexemes performing binary search of each lexeme
     320              :      * from lexemes in tsvector.
     321              :      */
     322           36 :     for (i = 0; i < nlexemes; i++)
     323              :     {
     324              :         char       *lex;
     325              :         int         lex_len,
     326              :                     lex_pos;
     327              : 
     328              :         /* Ignore null array elements, they surely don't match */
     329           24 :         if (nulls[i])
     330            3 :             continue;
     331              : 
     332           21 :         lex = VARDATA(DatumGetPointer(dlexemes[i]));
     333           21 :         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     334           21 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     335              : 
     336           21 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     337              :         {
     338           12 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     339              : 
     340           39 :             while (j--)
     341              :             {
     342           27 :                 WEP_SETWEIGHT(*p, weight);
     343           27 :                 p++;
     344              :             }
     345              :         }
     346              :     }
     347              : 
     348           12 :     PG_FREE_IF_COPY(tsin, 0);
     349           12 :     PG_FREE_IF_COPY(lexemes, 2);
     350              : 
     351           12 :     PG_RETURN_POINTER(tsout);
     352              : }
     353              : 
     354              : #define compareEntry(pa, a, pb, b) \
     355              :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     356              :                     (pb) + (b)->pos, (b)->len,    \
     357              :                     false)
     358              : 
     359              : /*
     360              :  * Add positions from src to dest after offsetting them by maxpos.
     361              :  * Return the number added (might be less than expected due to overflow)
     362              :  */
     363              : static int32
     364            6 : add_pos(TSVector src, WordEntry *srcptr,
     365              :         TSVector dest, WordEntry *destptr,
     366              :         int32 maxpos)
     367              : {
     368            6 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     369              :     int         i;
     370            6 :     uint16      slen = POSDATALEN(src, srcptr),
     371              :                 startlen;
     372            6 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     373            6 :                *dpos = POSDATAPTR(dest, destptr);
     374              : 
     375            6 :     if (!destptr->haspos)
     376            0 :         *clen = 0;
     377              : 
     378            6 :     startlen = *clen;
     379            6 :     for (i = 0;
     380           12 :          i < slen && *clen < MAXNUMPOS &&
     381            6 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     382            6 :          i++)
     383              :     {
     384            6 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     385            6 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     386            6 :         (*clen)++;
     387              :     }
     388              : 
     389            6 :     if (*clen != startlen)
     390            6 :         destptr->haspos = 1;
     391            6 :     return *clen - startlen;
     392              : }
     393              : 
     394              : /*
     395              :  * Perform binary search of given lexeme in TSVector.
     396              :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     397              :  * found.
     398              :  */
     399              : static int
     400           99 : tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
     401              : {
     402           99 :     const WordEntry *arrin = ARRPTR(tsv);
     403           99 :     int         StopLow = 0,
     404           99 :                 StopHigh = tsv->size,
     405              :                 StopMiddle,
     406              :                 cmp;
     407              : 
     408          261 :     while (StopLow < StopHigh)
     409              :     {
     410          231 :         StopMiddle = (StopLow + StopHigh) / 2;
     411              : 
     412          231 :         cmp = tsCompareString(lexeme, lexeme_len,
     413          231 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     414          231 :                               arrin[StopMiddle].len,
     415              :                               false);
     416              : 
     417          231 :         if (cmp < 0)
     418          108 :             StopHigh = StopMiddle;
     419          123 :         else if (cmp > 0)
     420           54 :             StopLow = StopMiddle + 1;
     421              :         else                    /* found it */
     422           69 :             return StopMiddle;
     423              :     }
     424              : 
     425           30 :     return -1;
     426              : }
     427              : 
     428              : /*
     429              :  * qsort comparator functions
     430              :  */
     431              : 
     432              : static int
     433           39 : compare_int(const void *va, const void *vb)
     434              : {
     435           39 :     int         a = *((const int *) va);
     436           39 :     int         b = *((const int *) vb);
     437              : 
     438           39 :     return pg_cmp_s32(a, b);
     439              : }
     440              : 
     441              : static int
     442           51 : compare_text_lexemes(const void *va, const void *vb)
     443              : {
     444           51 :     Datum       a = *((const Datum *) va);
     445           51 :     Datum       b = *((const Datum *) vb);
     446           51 :     char       *alex = VARDATA_ANY(DatumGetPointer(a));
     447           51 :     int         alex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(a));
     448           51 :     char       *blex = VARDATA_ANY(DatumGetPointer(b));
     449           51 :     int         blex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(b));
     450              : 
     451           51 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     452              : }
     453              : 
     454              : /*
     455              :  * Internal routine to delete lexemes from TSVector by array of offsets.
     456              :  *
     457              :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     458              :  * int indices_count -- size of that array
     459              :  *
     460              :  * Returns new TSVector without given lexemes along with their positions
     461              :  * and weights.
     462              :  */
     463              : static TSVector
     464           33 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     465              :                            int indices_count)
     466              : {
     467              :     TSVector    tsout;
     468           33 :     WordEntry  *arrin = ARRPTR(tsv),
     469              :                *arrout;
     470           33 :     char       *data = STRPTR(tsv),
     471              :                *dataout;
     472              :     int         i,              /* index in arrin */
     473              :                 j,              /* index in arrout */
     474              :                 k,              /* index in indices_to_delete */
     475              :                 curoff;         /* index in dataout area */
     476              : 
     477              :     /*
     478              :      * Sort the filter array to simplify membership checks below.  Also, get
     479              :      * rid of any duplicate entries, so that we can assume that indices_count
     480              :      * is exactly equal to the number of lexemes that will be removed.
     481              :      */
     482           33 :     if (indices_count > 1)
     483              :     {
     484           15 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     485           15 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     486              :                                 compare_int);
     487              :     }
     488              : 
     489              :     /*
     490              :      * Here we overestimate tsout size, since we don't know how much space is
     491              :      * used by the deleted lexeme(s).  We will set exact size below.
     492              :      */
     493           33 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     494              : 
     495              :     /* This count must be correct because STRPTR(tsout) relies on it. */
     496           33 :     tsout->size = tsv->size - indices_count;
     497              : 
     498              :     /*
     499              :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     500              :      */
     501           33 :     arrout = ARRPTR(tsout);
     502           33 :     dataout = STRPTR(tsout);
     503           33 :     curoff = 0;
     504          198 :     for (i = j = k = 0; i < tsv->size; i++)
     505              :     {
     506              :         /*
     507              :          * If current i is present in indices_to_delete, skip this lexeme.
     508              :          * Since indices_to_delete is already sorted, we only need to check
     509              :          * the current (k'th) entry.
     510              :          */
     511          165 :         if (k < indices_count && i == indices_to_delete[k])
     512              :         {
     513           48 :             k++;
     514           48 :             continue;
     515              :         }
     516              : 
     517              :         /* Copy lexeme and its positions and weights */
     518          117 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     519          117 :         arrout[j].haspos = arrin[i].haspos;
     520          117 :         arrout[j].len = arrin[i].len;
     521          117 :         arrout[j].pos = curoff;
     522          117 :         curoff += arrin[i].len;
     523          117 :         if (arrin[i].haspos)
     524              :         {
     525           78 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     526           78 :                 + sizeof(uint16);
     527              : 
     528           78 :             curoff = SHORTALIGN(curoff);
     529           78 :             memcpy(dataout + curoff,
     530           78 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     531              :                    len);
     532           78 :             curoff += len;
     533              :         }
     534              : 
     535          117 :         j++;
     536              :     }
     537              : 
     538              :     /*
     539              :      * k should now be exactly equal to indices_count. If it isn't then the
     540              :      * caller provided us with indices outside of [0, tsv->size) range and
     541              :      * estimation of tsout's size is wrong.
     542              :      */
     543              :     Assert(k == indices_count);
     544              : 
     545           33 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     546           33 :     return tsout;
     547              : }
     548              : 
     549              : /*
     550              :  * Delete given lexeme from tsvector.
     551              :  * Implementation of user-level ts_delete(tsvector, text).
     552              :  */
     553              : Datum
     554           18 : tsvector_delete_str(PG_FUNCTION_ARGS)
     555              : {
     556           18 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     557              :                 tsout;
     558           18 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     559           18 :     char       *lexeme = VARDATA_ANY(tlexeme);
     560           18 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     561              :                 skip_index;
     562              : 
     563           18 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     564            6 :         PG_RETURN_POINTER(tsin);
     565              : 
     566           12 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     567              : 
     568           12 :     PG_FREE_IF_COPY(tsin, 0);
     569           12 :     PG_FREE_IF_COPY(tlexeme, 1);
     570           12 :     PG_RETURN_POINTER(tsout);
     571              : }
     572              : 
     573              : /*
     574              :  * Delete given array of lexemes from tsvector.
     575              :  * Implementation of user-level ts_delete(tsvector, text[]).
     576              :  */
     577              : Datum
     578           21 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     579              : {
     580           21 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     581              :                 tsout;
     582           21 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     583              :     int         i,
     584              :                 nlex,
     585              :                 skip_count,
     586              :                *skip_indices;
     587              :     Datum      *dlexemes;
     588              :     bool       *nulls;
     589              : 
     590           21 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
     591              : 
     592              :     /*
     593              :      * In typical use case array of lexemes to delete is relatively small. So
     594              :      * here we optimize things for that scenario: iterate through lexarr
     595              :      * performing binary search of each lexeme from lexarr in tsvector.
     596              :      */
     597           21 :     skip_indices = palloc0(nlex * sizeof(int));
     598           84 :     for (i = skip_count = 0; i < nlex; i++)
     599              :     {
     600              :         char       *lex;
     601              :         int         lex_len,
     602              :                     lex_pos;
     603              : 
     604              :         /* Ignore null array elements, they surely don't match */
     605           63 :         if (nulls[i])
     606            3 :             continue;
     607              : 
     608           60 :         lex = VARDATA(DatumGetPointer(dlexemes[i]));
     609           60 :         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     610           60 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     611              : 
     612           60 :         if (lex_pos >= 0)
     613           39 :             skip_indices[skip_count++] = lex_pos;
     614              :     }
     615              : 
     616           21 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     617              : 
     618           21 :     pfree(skip_indices);
     619           21 :     PG_FREE_IF_COPY(tsin, 0);
     620           21 :     PG_FREE_IF_COPY(lexemes, 1);
     621              : 
     622           21 :     PG_RETURN_POINTER(tsout);
     623              : }
     624              : 
     625              : /*
     626              :  * Expand tsvector as table with following columns:
     627              :  *     lexeme: lexeme text
     628              :  *     positions: integer array of lexeme positions
     629              :  *     weights: char array of weights corresponding to positions
     630              :  */
     631              : Datum
     632           90 : tsvector_unnest(PG_FUNCTION_ARGS)
     633              : {
     634              :     FuncCallContext *funcctx;
     635              :     TSVector    tsin;
     636              : 
     637           90 :     if (SRF_IS_FIRSTCALL())
     638              :     {
     639              :         MemoryContext oldcontext;
     640              :         TupleDesc   tupdesc;
     641              : 
     642           15 :         funcctx = SRF_FIRSTCALL_INIT();
     643           15 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     644              : 
     645           15 :         tupdesc = CreateTemplateTupleDesc(3);
     646           15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     647              :                            TEXTOID, -1, 0);
     648           15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     649              :                            INT2ARRAYOID, -1, 0);
     650           15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     651              :                            TEXTARRAYOID, -1, 0);
     652           15 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     653            0 :             elog(ERROR, "return type must be a row type");
     654           15 :         funcctx->tuple_desc = tupdesc;
     655              : 
     656           15 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     657              : 
     658           15 :         MemoryContextSwitchTo(oldcontext);
     659              :     }
     660              : 
     661           90 :     funcctx = SRF_PERCALL_SETUP();
     662           90 :     tsin = (TSVector) funcctx->user_fctx;
     663              : 
     664           90 :     if (funcctx->call_cntr < tsin->size)
     665              :     {
     666           75 :         WordEntry  *arrin = ARRPTR(tsin);
     667           75 :         char       *data = STRPTR(tsin);
     668              :         HeapTuple   tuple;
     669              :         int         j,
     670           75 :                     i = funcctx->call_cntr;
     671           75 :         bool        nulls[] = {false, false, false};
     672              :         Datum       values[3];
     673              : 
     674           75 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     675              : 
     676           75 :         if (arrin[i].haspos)
     677              :         {
     678              :             WordEntryPosVector *posv;
     679              :             Datum      *positions;
     680              :             Datum      *weights;
     681              :             char        weight;
     682              : 
     683              :             /*
     684              :              * Internally tsvector stores position and weight in the same
     685              :              * uint16 (2 bits for weight, 14 for position). Here we extract
     686              :              * that in two separate arrays.
     687              :              */
     688           45 :             posv = _POSVECPTR(tsin, arrin + i);
     689           45 :             positions = palloc(posv->npos * sizeof(Datum));
     690           45 :             weights = palloc(posv->npos * sizeof(Datum));
     691          126 :             for (j = 0; j < posv->npos; j++)
     692              :             {
     693           81 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     694           81 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     695           81 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     696              :                                                                       1));
     697              :             }
     698              : 
     699           45 :             values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
     700           45 :             values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
     701              :         }
     702              :         else
     703              :         {
     704           30 :             nulls[1] = nulls[2] = true;
     705              :         }
     706              : 
     707           75 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     708           75 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     709              :     }
     710              :     else
     711              :     {
     712           15 :         SRF_RETURN_DONE(funcctx);
     713              :     }
     714              : }
     715              : 
     716              : /*
     717              :  * Convert tsvector to array of lexemes.
     718              :  */
     719              : Datum
     720            6 : tsvector_to_array(PG_FUNCTION_ARGS)
     721              : {
     722            6 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     723            6 :     WordEntry  *arrin = ARRPTR(tsin);
     724              :     Datum      *elements;
     725              :     int         i;
     726              :     ArrayType  *array;
     727              : 
     728            6 :     elements = palloc(tsin->size * sizeof(Datum));
     729              : 
     730           36 :     for (i = 0; i < tsin->size; i++)
     731              :     {
     732           30 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     733           30 :                                                                arrin[i].len));
     734              :     }
     735              : 
     736            6 :     array = construct_array_builtin(elements, tsin->size, TEXTOID);
     737              : 
     738            6 :     pfree(elements);
     739            6 :     PG_FREE_IF_COPY(tsin, 0);
     740            6 :     PG_RETURN_POINTER(array);
     741              : }
     742              : 
     743              : /*
     744              :  * Build tsvector from array of lexemes.
     745              :  */
     746              : Datum
     747           12 : array_to_tsvector(PG_FUNCTION_ARGS)
     748              : {
     749           12 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     750              :     TSVector    tsout;
     751              :     Datum      *dlexemes;
     752              :     WordEntry  *arrout;
     753              :     bool       *nulls;
     754              :     int         nitems,
     755              :                 i,
     756              :                 tslen,
     757           12 :                 datalen = 0;
     758              :     char       *cur;
     759              : 
     760           12 :     deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
     761              : 
     762              :     /*
     763              :      * Reject nulls and zero length strings (maybe we should just ignore them,
     764              :      * instead?)
     765              :      */
     766           63 :     for (i = 0; i < nitems; i++)
     767              :     {
     768           57 :         if (nulls[i])
     769            3 :             ereport(ERROR,
     770              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     771              :                      errmsg("lexeme array may not contain nulls")));
     772              : 
     773           54 :         if (VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ == 0)
     774            3 :             ereport(ERROR,
     775              :                     (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
     776              :                      errmsg("lexeme array may not contain empty strings")));
     777              :     }
     778              : 
     779              :     /* Sort and de-dup, because this is required for a valid tsvector. */
     780            6 :     if (nitems > 1)
     781              :     {
     782            6 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     783            6 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     784              :                          compare_text_lexemes);
     785              :     }
     786              : 
     787              :     /* Calculate space needed for surviving lexemes. */
     788           30 :     for (i = 0; i < nitems; i++)
     789           24 :         datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     790            6 :     tslen = CALCDATASIZE(nitems, datalen);
     791              : 
     792              :     /* Allocate and fill tsvector. */
     793            6 :     tsout = (TSVector) palloc0(tslen);
     794            6 :     SET_VARSIZE(tsout, tslen);
     795            6 :     tsout->size = nitems;
     796              : 
     797            6 :     arrout = ARRPTR(tsout);
     798            6 :     cur = STRPTR(tsout);
     799           30 :     for (i = 0; i < nitems; i++)
     800              :     {
     801           24 :         char       *lex = VARDATA(DatumGetPointer(dlexemes[i]));
     802           24 :         int         lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
     803              : 
     804           24 :         memcpy(cur, lex, lex_len);
     805           24 :         arrout[i].haspos = 0;
     806           24 :         arrout[i].len = lex_len;
     807           24 :         arrout[i].pos = cur - STRPTR(tsout);
     808           24 :         cur += lex_len;
     809              :     }
     810              : 
     811            6 :     PG_FREE_IF_COPY(v, 0);
     812            6 :     PG_RETURN_POINTER(tsout);
     813              : }
     814              : 
     815              : /*
     816              :  * ts_filter(): keep only lexemes with given weights in tsvector.
     817              :  */
     818              : Datum
     819            9 : tsvector_filter(PG_FUNCTION_ARGS)
     820              : {
     821            9 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     822              :                 tsout;
     823            9 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     824            9 :     WordEntry  *arrin = ARRPTR(tsin),
     825              :                *arrout;
     826            9 :     char       *datain = STRPTR(tsin),
     827              :                *dataout;
     828              :     Datum      *dweights;
     829              :     bool       *nulls;
     830              :     int         nweights;
     831              :     int         i,
     832              :                 j;
     833            9 :     int         cur_pos = 0;
     834            9 :     char        mask = 0;
     835              : 
     836            9 :     deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
     837              : 
     838           21 :     for (i = 0; i < nweights; i++)
     839              :     {
     840              :         char        char_weight;
     841              : 
     842           15 :         if (nulls[i])
     843            3 :             ereport(ERROR,
     844              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     845              :                      errmsg("weight array may not contain nulls")));
     846              : 
     847           12 :         char_weight = DatumGetChar(dweights[i]);
     848           12 :         switch (char_weight)
     849              :         {
     850            9 :             case 'A':
     851              :             case 'a':
     852            9 :                 mask = mask | 8;
     853            9 :                 break;
     854            3 :             case 'B':
     855              :             case 'b':
     856            3 :                 mask = mask | 4;
     857            3 :                 break;
     858            0 :             case 'C':
     859              :             case 'c':
     860            0 :                 mask = mask | 2;
     861            0 :                 break;
     862            0 :             case 'D':
     863              :             case 'd':
     864            0 :                 mask = mask | 1;
     865            0 :                 break;
     866            0 :             default:
     867            0 :                 ereport(ERROR,
     868              :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     869              :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     870              :         }
     871              :     }
     872              : 
     873            6 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     874            6 :     tsout->size = tsin->size;
     875            6 :     arrout = ARRPTR(tsout);
     876            6 :     dataout = STRPTR(tsout);
     877              : 
     878           54 :     for (i = j = 0; i < tsin->size; i++)
     879              :     {
     880              :         WordEntryPosVector *posvin,
     881              :                    *posvout;
     882           48 :         int         npos = 0;
     883              :         int         k;
     884              : 
     885           48 :         if (!arrin[i].haspos)
     886           15 :             continue;
     887              : 
     888           33 :         posvin = _POSVECPTR(tsin, arrin + i);
     889           33 :         posvout = (WordEntryPosVector *)
     890           33 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     891              : 
     892           66 :         for (k = 0; k < posvin->npos; k++)
     893              :         {
     894           33 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     895           15 :                 posvout->pos[npos++] = posvin->pos[k];
     896              :         }
     897              : 
     898              :         /* if no satisfactory positions found, skip lexeme */
     899           33 :         if (!npos)
     900           18 :             continue;
     901              : 
     902           15 :         arrout[j].haspos = true;
     903           15 :         arrout[j].len = arrin[i].len;
     904           15 :         arrout[j].pos = cur_pos;
     905              : 
     906           15 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     907           15 :         posvout->npos = npos;
     908           15 :         cur_pos += SHORTALIGN(arrin[i].len);
     909           15 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     910              :             sizeof(uint16);
     911           15 :         j++;
     912              :     }
     913              : 
     914            6 :     tsout->size = j;
     915            6 :     if (dataout != STRPTR(tsout))
     916            6 :         memmove(STRPTR(tsout), dataout, cur_pos);
     917              : 
     918            6 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     919              : 
     920            6 :     PG_FREE_IF_COPY(tsin, 0);
     921            6 :     PG_RETURN_POINTER(tsout);
     922              : }
     923              : 
     924              : Datum
     925            6 : tsvector_concat(PG_FUNCTION_ARGS)
     926              : {
     927            6 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     928            6 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     929              :     TSVector    out;
     930              :     WordEntry  *ptr;
     931              :     WordEntry  *ptr1,
     932              :                *ptr2;
     933              :     WordEntryPos *p;
     934            6 :     int         maxpos = 0,
     935              :                 i,
     936              :                 j,
     937              :                 i1,
     938              :                 i2,
     939              :                 dataoff,
     940              :                 output_bytes,
     941              :                 output_size;
     942              :     char       *data,
     943              :                *data1,
     944              :                *data2;
     945              : 
     946              :     /* Get max position in in1; we'll need this to offset in2's positions */
     947            6 :     ptr = ARRPTR(in1);
     948            6 :     i = in1->size;
     949           15 :     while (i--)
     950              :     {
     951            9 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     952              :         {
     953            9 :             p = POSDATAPTR(in1, ptr);
     954           18 :             while (j--)
     955              :             {
     956            9 :                 if (WEP_GETPOS(*p) > maxpos)
     957            6 :                     maxpos = WEP_GETPOS(*p);
     958            9 :                 p++;
     959              :             }
     960              :         }
     961            9 :         ptr++;
     962              :     }
     963              : 
     964            6 :     ptr1 = ARRPTR(in1);
     965            6 :     ptr2 = ARRPTR(in2);
     966            6 :     data1 = STRPTR(in1);
     967            6 :     data2 = STRPTR(in2);
     968            6 :     i1 = in1->size;
     969            6 :     i2 = in2->size;
     970              : 
     971              :     /*
     972              :      * Conservative estimate of space needed.  We might need all the data in
     973              :      * both inputs, and conceivably add a pad byte before position data for
     974              :      * each item where there was none before.
     975              :      */
     976            6 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     977              : 
     978            6 :     out = (TSVector) palloc0(output_bytes);
     979            6 :     SET_VARSIZE(out, output_bytes);
     980              : 
     981              :     /*
     982              :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     983              :      * collapse out any unused space at the end.
     984              :      */
     985            6 :     out->size = in1->size + in2->size;
     986              : 
     987            6 :     ptr = ARRPTR(out);
     988            6 :     data = STRPTR(out);
     989            6 :     dataoff = 0;
     990           15 :     while (i1 && i2)
     991              :     {
     992            9 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     993              : 
     994            9 :         if (cmp < 0)
     995              :         {                       /* in1 first */
     996            3 :             ptr->haspos = ptr1->haspos;
     997            3 :             ptr->len = ptr1->len;
     998            3 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
     999            3 :             ptr->pos = dataoff;
    1000            3 :             dataoff += ptr1->len;
    1001            3 :             if (ptr->haspos)
    1002              :             {
    1003            3 :                 dataoff = SHORTALIGN(dataoff);
    1004            3 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1005            3 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1006              :             }
    1007              : 
    1008            3 :             ptr++;
    1009            3 :             ptr1++;
    1010            3 :             i1--;
    1011              :         }
    1012            6 :         else if (cmp > 0)
    1013              :         {                       /* in2 first */
    1014            3 :             ptr->haspos = ptr2->haspos;
    1015            3 :             ptr->len = ptr2->len;
    1016            3 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1017            3 :             ptr->pos = dataoff;
    1018            3 :             dataoff += ptr2->len;
    1019            3 :             if (ptr->haspos)
    1020              :             {
    1021            0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1022              : 
    1023            0 :                 if (addlen == 0)
    1024            0 :                     ptr->haspos = 0;
    1025              :                 else
    1026              :                 {
    1027            0 :                     dataoff = SHORTALIGN(dataoff);
    1028            0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1029              :                 }
    1030              :             }
    1031              : 
    1032            3 :             ptr++;
    1033            3 :             ptr2++;
    1034            3 :             i2--;
    1035              :         }
    1036              :         else
    1037              :         {
    1038            3 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1039            3 :             ptr->len = ptr1->len;
    1040            3 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1041            3 :             ptr->pos = dataoff;
    1042            3 :             dataoff += ptr1->len;
    1043            3 :             if (ptr->haspos)
    1044              :             {
    1045            3 :                 if (ptr1->haspos)
    1046              :                 {
    1047            3 :                     dataoff = SHORTALIGN(dataoff);
    1048            3 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1049            3 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1050            3 :                     if (ptr2->haspos)
    1051            3 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1052              :                 }
    1053              :                 else            /* must have ptr2->haspos */
    1054              :                 {
    1055            0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1056              : 
    1057            0 :                     if (addlen == 0)
    1058            0 :                         ptr->haspos = 0;
    1059              :                     else
    1060              :                     {
    1061            0 :                         dataoff = SHORTALIGN(dataoff);
    1062            0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1063              :                     }
    1064              :                 }
    1065              :             }
    1066              : 
    1067            3 :             ptr++;
    1068            3 :             ptr1++;
    1069            3 :             ptr2++;
    1070            3 :             i1--;
    1071            3 :             i2--;
    1072              :         }
    1073              :     }
    1074              : 
    1075            9 :     while (i1)
    1076              :     {
    1077            3 :         ptr->haspos = ptr1->haspos;
    1078            3 :         ptr->len = ptr1->len;
    1079            3 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1080            3 :         ptr->pos = dataoff;
    1081            3 :         dataoff += ptr1->len;
    1082            3 :         if (ptr->haspos)
    1083              :         {
    1084            3 :             dataoff = SHORTALIGN(dataoff);
    1085            3 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1086            3 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1087              :         }
    1088              : 
    1089            3 :         ptr++;
    1090            3 :         ptr1++;
    1091            3 :         i1--;
    1092              :     }
    1093              : 
    1094            9 :     while (i2)
    1095              :     {
    1096            3 :         ptr->haspos = ptr2->haspos;
    1097            3 :         ptr->len = ptr2->len;
    1098            3 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1099            3 :         ptr->pos = dataoff;
    1100            3 :         dataoff += ptr2->len;
    1101            3 :         if (ptr->haspos)
    1102              :         {
    1103            3 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1104              : 
    1105            3 :             if (addlen == 0)
    1106            0 :                 ptr->haspos = 0;
    1107              :             else
    1108              :             {
    1109            3 :                 dataoff = SHORTALIGN(dataoff);
    1110            3 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1111              :             }
    1112              :         }
    1113              : 
    1114            3 :         ptr++;
    1115            3 :         ptr2++;
    1116            3 :         i2--;
    1117              :     }
    1118              : 
    1119              :     /*
    1120              :      * Instead of checking each offset individually, we check for overflow of
    1121              :      * pos fields once at the end.
    1122              :      */
    1123            6 :     if (dataoff > MAXSTRPOS)
    1124            0 :         ereport(ERROR,
    1125              :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1126              :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1127              : 
    1128              :     /*
    1129              :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1130              :      * and collapse out any unused array entries.
    1131              :      */
    1132            6 :     output_size = ptr - ARRPTR(out);
    1133              :     Assert(output_size <= out->size);
    1134            6 :     out->size = output_size;
    1135            6 :     if (data != STRPTR(out))
    1136            3 :         memmove(STRPTR(out), data, dataoff);
    1137            6 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1138              :     Assert(output_bytes <= VARSIZE(out));
    1139            6 :     SET_VARSIZE(out, output_bytes);
    1140              : 
    1141            6 :     PG_FREE_IF_COPY(in1, 0);
    1142            6 :     PG_FREE_IF_COPY(in2, 1);
    1143            6 :     PG_RETURN_POINTER(out);
    1144              : }
    1145              : 
    1146              : /*
    1147              :  * Compare two strings by tsvector rules.
    1148              :  *
    1149              :  * if prefix = true then it returns zero value iff b has prefix a
    1150              :  */
    1151              : int32
    1152      3141093 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1153              : {
    1154              :     int         cmp;
    1155              : 
    1156      3141093 :     if (lena == 0)
    1157              :     {
    1158           18 :         if (prefix)
    1159            0 :             cmp = 0;            /* empty string is prefix of anything */
    1160              :         else
    1161           18 :             cmp = (lenb > 0) ? -1 : 0;
    1162              :     }
    1163      3141075 :     else if (lenb == 0)
    1164              :     {
    1165            0 :         cmp = (lena > 0) ? 1 : 0;
    1166              :     }
    1167              :     else
    1168              :     {
    1169      3141075 :         cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
    1170              : 
    1171      3141075 :         if (prefix)
    1172              :         {
    1173         8229 :             if (cmp == 0 && lena > lenb)
    1174            0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1175              :         }
    1176      3132846 :         else if (cmp == 0 && lena != lenb)
    1177              :         {
    1178        16134 :             cmp = (lena < lenb) ? -1 : 1;
    1179              :         }
    1180              :     }
    1181              : 
    1182      3141093 :     return cmp;
    1183              : }
    1184              : 
    1185              : /*
    1186              :  * Check weight info or/and fill 'data' with the required positions
    1187              :  */
    1188              : static TSTernaryValue
    1189        34041 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1190              :                ExecPhraseData *data)
    1191              : {
    1192        34041 :     TSTernaryValue result = TS_NO;
    1193              : 
    1194              :     Assert(data == NULL || data->npos == 0);
    1195              : 
    1196        34041 :     if (entry->haspos)
    1197              :     {
    1198              :         WordEntryPosVector *posvec;
    1199              : 
    1200              :         /*
    1201              :          * We can't use the _POSVECPTR macro here because the pointer to the
    1202              :          * tsvector's lexeme storage is already contained in chkval->values.
    1203              :          */
    1204         2244 :         posvec = (WordEntryPosVector *)
    1205         2244 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1206              : 
    1207         2244 :         if (val->weight && data)
    1208           24 :         {
    1209           24 :             WordEntryPos *posvec_iter = posvec->pos;
    1210              :             WordEntryPos *dptr;
    1211              : 
    1212              :             /*
    1213              :              * Filter position information by weights
    1214              :              */
    1215           24 :             dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
    1216           24 :             data->allocated = true;
    1217              : 
    1218              :             /* Is there a position with a matching weight? */
    1219           48 :             while (posvec_iter < posvec->pos + posvec->npos)
    1220              :             {
    1221              :                 /* If true, append this position to the data->pos */
    1222           24 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1223              :                 {
    1224           12 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1225           12 :                     dptr++;
    1226              :                 }
    1227              : 
    1228           24 :                 posvec_iter++;
    1229              :             }
    1230              : 
    1231           24 :             data->npos = dptr - data->pos;
    1232              : 
    1233           24 :             if (data->npos > 0)
    1234           12 :                 result = TS_YES;
    1235              :             else
    1236              :             {
    1237           12 :                 pfree(data->pos);
    1238           12 :                 data->pos = NULL;
    1239           12 :                 data->allocated = false;
    1240              :             }
    1241              :         }
    1242         2220 :         else if (val->weight)
    1243              :         {
    1244          228 :             WordEntryPos *posvec_iter = posvec->pos;
    1245              : 
    1246              :             /* Is there a position with a matching weight? */
    1247          345 :             while (posvec_iter < posvec->pos + posvec->npos)
    1248              :             {
    1249          252 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1250              :                 {
    1251          135 :                     result = TS_YES;
    1252          135 :                     break;      /* no need to go further */
    1253              :                 }
    1254              : 
    1255          117 :                 posvec_iter++;
    1256              :             }
    1257              :         }
    1258         1992 :         else if (data)
    1259              :         {
    1260         1137 :             data->npos = posvec->npos;
    1261         1137 :             data->pos = posvec->pos;
    1262         1137 :             data->allocated = false;
    1263         1137 :             result = TS_YES;
    1264              :         }
    1265              :         else
    1266              :         {
    1267              :             /* simplest case: no weight check, positions not needed */
    1268          855 :             result = TS_YES;
    1269              :         }
    1270              :     }
    1271              :     else
    1272              :     {
    1273              :         /*
    1274              :          * Position info is lacking, so if the caller requires it, we can only
    1275              :          * say that maybe there is a match.
    1276              :          *
    1277              :          * Notice, however, that we *don't* check val->weight here.
    1278              :          * Historically, stripped tsvectors are considered to match queries
    1279              :          * whether or not the query has a weight restriction; that's a little
    1280              :          * dubious but we'll preserve the behavior.
    1281              :          */
    1282        31797 :         if (data)
    1283        11529 :             result = TS_MAYBE;
    1284              :         else
    1285        20268 :             result = TS_YES;
    1286              :     }
    1287              : 
    1288        34041 :     return result;
    1289              : }
    1290              : 
    1291              : /*
    1292              :  * TS_execute callback for matching a tsquery operand to plain tsvector data
    1293              :  */
    1294              : static TSTernaryValue
    1295       142011 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1296              : {
    1297       142011 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1298       142011 :     WordEntry  *StopLow = chkval->arrb;
    1299       142011 :     WordEntry  *StopHigh = chkval->arre;
    1300       142011 :     WordEntry  *StopMiddle = StopHigh;
    1301       142011 :     TSTernaryValue res = TS_NO;
    1302              : 
    1303              :     /* Loop invariant: StopLow <= val < StopHigh */
    1304       893403 :     while (StopLow < StopHigh)
    1305              :     {
    1306              :         int         difference;
    1307              : 
    1308       777915 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1309       777915 :         difference = tsCompareString(chkval->operand + val->distance,
    1310       777915 :                                      val->length,
    1311       777915 :                                      chkval->values + StopMiddle->pos,
    1312       777915 :                                      StopMiddle->len,
    1313              :                                      false);
    1314              : 
    1315       777915 :         if (difference == 0)
    1316              :         {
    1317              :             /* Check weight info & fill 'data' with positions */
    1318        26523 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1319        26523 :             break;
    1320              :         }
    1321       751392 :         else if (difference > 0)
    1322       423756 :             StopLow = StopMiddle + 1;
    1323              :         else
    1324       327636 :             StopHigh = StopMiddle;
    1325              :     }
    1326              : 
    1327              :     /*
    1328              :      * If it's a prefix search, we should also consider lexemes that the
    1329              :      * search term is a prefix of (which will necessarily immediately follow
    1330              :      * the place we found in the above loop).  But we can skip them if there
    1331              :      * was a definite match on the exact term AND the caller doesn't need
    1332              :      * position info.
    1333              :      */
    1334       142011 :     if (val->prefix && (res != TS_YES || data))
    1335              :     {
    1336         8262 :         WordEntryPos *allpos = NULL;
    1337         8262 :         int         npos = 0,
    1338         8262 :                     totalpos = 0;
    1339              : 
    1340              :         /* adjust start position for corner case */
    1341         8262 :         if (StopLow >= StopHigh)
    1342         8256 :             StopMiddle = StopHigh;
    1343              : 
    1344              :         /* we don't try to re-use any data from the initial match */
    1345         8262 :         if (data)
    1346              :         {
    1347           18 :             if (data->allocated)
    1348            0 :                 pfree(data->pos);
    1349           18 :             data->pos = NULL;
    1350           18 :             data->allocated = false;
    1351           18 :             data->npos = 0;
    1352              :         }
    1353         8262 :         res = TS_NO;
    1354              : 
    1355        15729 :         while ((res != TS_YES || data) &&
    1356        23745 :                StopMiddle < chkval->arre &&
    1357         7965 :                tsCompareString(chkval->operand + val->distance,
    1358         7965 :                                val->length,
    1359         7965 :                                chkval->values + StopMiddle->pos,
    1360         7965 :                                StopMiddle->len,
    1361              :                                true) == 0)
    1362              :         {
    1363              :             TSTernaryValue subres;
    1364              : 
    1365         7518 :             subres = checkclass_str(chkval, StopMiddle, val, data);
    1366              : 
    1367         7518 :             if (subres != TS_NO)
    1368              :             {
    1369         7488 :                 if (data)
    1370              :                 {
    1371              :                     /*
    1372              :                      * We need to join position information
    1373              :                      */
    1374           21 :                     if (subres == TS_MAYBE)
    1375              :                     {
    1376              :                         /*
    1377              :                          * No position info for this match, so we must report
    1378              :                          * MAYBE overall.
    1379              :                          */
    1380            0 :                         res = TS_MAYBE;
    1381              :                         /* forget any previous positions */
    1382            0 :                         npos = 0;
    1383              :                         /* don't leak storage */
    1384            0 :                         if (allpos)
    1385            0 :                             pfree(allpos);
    1386            0 :                         break;
    1387              :                     }
    1388              : 
    1389           39 :                     while (npos + data->npos > totalpos)
    1390              :                     {
    1391           18 :                         if (totalpos == 0)
    1392              :                         {
    1393           18 :                             totalpos = 256;
    1394           18 :                             allpos = palloc_array(WordEntryPos, totalpos);
    1395              :                         }
    1396              :                         else
    1397              :                         {
    1398            0 :                             totalpos *= 2;
    1399            0 :                             allpos = repalloc_array(allpos, WordEntryPos, totalpos);
    1400              :                         }
    1401              :                     }
    1402              : 
    1403           21 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1404           21 :                     npos += data->npos;
    1405              : 
    1406              :                     /* don't leak storage from individual matches */
    1407           21 :                     if (data->allocated)
    1408           12 :                         pfree(data->pos);
    1409           21 :                     data->pos = NULL;
    1410           21 :                     data->allocated = false;
    1411              :                     /* it's important to reset data->npos before next loop */
    1412           21 :                     data->npos = 0;
    1413              :                 }
    1414              :                 else
    1415              :                 {
    1416              :                     /* Don't need positions, just handle YES/MAYBE */
    1417         7467 :                     if (subres == TS_YES || res == TS_NO)
    1418         7467 :                         res = subres;
    1419              :                 }
    1420              :             }
    1421              : 
    1422         7518 :             StopMiddle++;
    1423              :         }
    1424              : 
    1425         8262 :         if (data && npos > 0)
    1426              :         {
    1427              :             /* Sort and make unique array of found positions */
    1428           18 :             data->pos = allpos;
    1429           18 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1430           18 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1431              :                                  compareWordEntryPos);
    1432           18 :             data->allocated = true;
    1433           18 :             res = TS_YES;
    1434              :         }
    1435              :     }
    1436              : 
    1437       142011 :     return res;
    1438              : }
    1439              : 
    1440              : /*
    1441              :  * Compute output position list for a tsquery operator in phrase mode.
    1442              :  *
    1443              :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1444              :  * returning the result list into *data.  The input position lists must be
    1445              :  * sorted and unique, and the output will be as well.
    1446              :  *
    1447              :  * data: pointer to initially-all-zeroes output struct, or NULL
    1448              :  * Ldata, Rdata: input position lists
    1449              :  * emit: bitmask of TSPO_XXX flags
    1450              :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1451              :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1452              :  * max_npos: maximum possible required size of output position array
    1453              :  *
    1454              :  * Loffset and Roffset should not be negative, else we risk trying to output
    1455              :  * negative positions, which won't fit into WordEntryPos.
    1456              :  *
    1457              :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1458              :  * we return it as TSTernaryValue.
    1459              :  *
    1460              :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1461              :  * returns TS_YES if any positions would have been emitted.
    1462              :  */
    1463              : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1464              : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1465              : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1466              : 
    1467              : static TSTernaryValue
    1468        14982 : TS_phrase_output(ExecPhraseData *data,
    1469              :                  ExecPhraseData *Ldata,
    1470              :                  ExecPhraseData *Rdata,
    1471              :                  int emit,
    1472              :                  int Loffset,
    1473              :                  int Roffset,
    1474              :                  int max_npos)
    1475              : {
    1476              :     int         Lindex,
    1477              :                 Rindex;
    1478              : 
    1479              :     /* Loop until both inputs are exhausted */
    1480        14982 :     Lindex = Rindex = 0;
    1481        15498 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1482              :     {
    1483              :         int         Lpos,
    1484              :                     Rpos;
    1485         1167 :         int         output_pos = 0;
    1486              : 
    1487              :         /*
    1488              :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1489              :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1490              :          */
    1491         1167 :         if (Lindex < Ldata->npos)
    1492          843 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1493              :         else
    1494              :         {
    1495              :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1496          324 :             if (!(emit & TSPO_R_ONLY))
    1497           75 :                 break;
    1498          249 :             Lpos = INT_MAX;
    1499              :         }
    1500         1092 :         if (Rindex < Rdata->npos)
    1501          969 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1502              :         else
    1503              :         {
    1504              :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1505          123 :             if (!(emit & TSPO_L_ONLY))
    1506           81 :                 break;
    1507           42 :             Rpos = INT_MAX;
    1508              :         }
    1509              : 
    1510              :         /* Merge-join the two input lists */
    1511         1011 :         if (Lpos < Rpos)
    1512              :         {
    1513              :             /* Lpos is not matched in Rdata, should we output it? */
    1514          243 :             if (emit & TSPO_L_ONLY)
    1515           72 :                 output_pos = Lpos;
    1516          243 :             Lindex++;
    1517              :         }
    1518          768 :         else if (Lpos == Rpos)
    1519              :         {
    1520              :             /* Lpos and Rpos match ... should we output it? */
    1521          399 :             if (emit & TSPO_BOTH)
    1522          351 :                 output_pos = Rpos;
    1523          399 :             Lindex++;
    1524          399 :             Rindex++;
    1525              :         }
    1526              :         else                    /* Lpos > Rpos */
    1527              :         {
    1528              :             /* Rpos is not matched in Ldata, should we output it? */
    1529          369 :             if (emit & TSPO_R_ONLY)
    1530          270 :                 output_pos = Rpos;
    1531          369 :             Rindex++;
    1532              :         }
    1533              : 
    1534         1011 :         if (output_pos > 0)
    1535              :         {
    1536          693 :             if (data)
    1537              :             {
    1538              :                 /* Store position, first allocating output array if needed */
    1539          198 :                 if (data->pos == NULL)
    1540              :                 {
    1541          159 :                     data->pos = (WordEntryPos *)
    1542          159 :                         palloc(max_npos * sizeof(WordEntryPos));
    1543          159 :                     data->allocated = true;
    1544              :                 }
    1545          198 :                 data->pos[data->npos++] = output_pos;
    1546              :             }
    1547              :             else
    1548              :             {
    1549              :                 /*
    1550              :                  * Exact positions not needed, so return TS_YES as soon as we
    1551              :                  * know there is at least one.
    1552              :                  */
    1553          495 :                 return TS_YES;
    1554              :             }
    1555              :         }
    1556              :     }
    1557              : 
    1558        14487 :     if (data && data->npos > 0)
    1559              :     {
    1560              :         /* Let's assert we didn't overrun the array */
    1561              :         Assert(data->npos <= max_npos);
    1562          159 :         return TS_YES;
    1563              :     }
    1564        14328 :     return TS_NO;
    1565              : }
    1566              : 
    1567              : /*
    1568              :  * Execute tsquery at or below an OP_PHRASE operator.
    1569              :  *
    1570              :  * This handles tsquery execution at recursion levels where we need to care
    1571              :  * about match locations.
    1572              :  *
    1573              :  * In addition to the same arguments used for TS_execute, the caller may pass
    1574              :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1575              :  * match position info on success.  data == NULL if no position data need be
    1576              :  * returned.
    1577              :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1578              :  * This is OK because an outside call always starts from an OP_PHRASE node,
    1579              :  * and all internal recursion cases pass data != NULL.
    1580              :  *
    1581              :  * The detailed semantics of the match data, given that the function returned
    1582              :  * TS_YES (successful match), are:
    1583              :  *
    1584              :  * npos > 0, negate = false:
    1585              :  *   query is matched at specified position(s) (and only those positions)
    1586              :  * npos > 0, negate = true:
    1587              :  *   query is matched at all positions *except* specified position(s)
    1588              :  * npos = 0, negate = true:
    1589              :  *   query is matched at all positions
    1590              :  * npos = 0, negate = false:
    1591              :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1592              :  *
    1593              :  * Successful matches also return a "width" value which is the match width in
    1594              :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1595              :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1596              :  * that when width > 0, the listed positions represent the ends of matches not
    1597              :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1598              :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1599              :  *
    1600              :  * If the TSExecuteCallback function reports that an operand is present
    1601              :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1602              :  * it is possible but not certain that the query is matched.
    1603              :  *
    1604              :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1605              :  * negate = false (which is the state initialized by the caller); but the
    1606              :  * "width" output in such cases is undefined.
    1607              :  */
    1608              : static TSTernaryValue
    1609       351910 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1610              :                   TSExecuteCallback chkcond,
    1611              :                   ExecPhraseData *data)
    1612              : {
    1613              :     ExecPhraseData Ldata,
    1614              :                 Rdata;
    1615              :     TSTernaryValue lmatch,
    1616              :                 rmatch;
    1617              :     int         Loffset,
    1618              :                 Roffset,
    1619              :                 maxwidth;
    1620              : 
    1621              :     /* since this function recurses, it could be driven to stack overflow */
    1622       351910 :     check_stack_depth();
    1623              : 
    1624              :     /* ... and let's check for query cancel while we're at it */
    1625       351910 :     CHECK_FOR_INTERRUPTS();
    1626              : 
    1627       351910 :     if (curitem->type == QI_VAL)
    1628       173118 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1629              : 
    1630       178792 :     switch (curitem->qoperator.oper)
    1631              :     {
    1632        60549 :         case OP_NOT:
    1633              : 
    1634              :             /*
    1635              :              * We need not touch data->width, since a NOT operation does not
    1636              :              * change the match width.
    1637              :              */
    1638        60549 :             if (flags & TS_EXEC_SKIP_NOT)
    1639              :             {
    1640              :                 /* with SKIP_NOT, report NOT as "match everywhere" */
    1641              :                 Assert(data->npos == 0 && !data->negate);
    1642            0 :                 data->negate = true;
    1643            0 :                 return TS_YES;
    1644              :             }
    1645        60549 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1646              :             {
    1647        52936 :                 case TS_NO:
    1648              :                     /* change "match nowhere" to "match everywhere" */
    1649              :                     Assert(data->npos == 0 && !data->negate);
    1650        52936 :                     data->negate = true;
    1651        52936 :                     return TS_YES;
    1652          195 :                 case TS_YES:
    1653          195 :                     if (data->npos > 0)
    1654              :                     {
    1655              :                         /* we have some positions, invert negate flag */
    1656          192 :                         data->negate = !data->negate;
    1657          192 :                         return TS_YES;
    1658              :                     }
    1659            3 :                     else if (data->negate)
    1660              :                     {
    1661              :                         /* change "match everywhere" to "match nowhere" */
    1662            3 :                         data->negate = false;
    1663            3 :                         return TS_NO;
    1664              :                     }
    1665              :                     /* Should not get here if result was TS_YES */
    1666              :                     Assert(false);
    1667            0 :                     break;
    1668         7418 :                 case TS_MAYBE:
    1669              :                     /* match positions are, and remain, uncertain */
    1670         7418 :                     return TS_MAYBE;
    1671              :             }
    1672            0 :             break;
    1673              : 
    1674       118165 :         case OP_PHRASE:
    1675              :         case OP_AND:
    1676       118165 :             memset(&Ldata, 0, sizeof(Ldata));
    1677       118165 :             memset(&Rdata, 0, sizeof(Rdata));
    1678              : 
    1679       118165 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1680              :                                        arg, flags, chkcond, &Ldata);
    1681       118165 :             if (lmatch == TS_NO)
    1682        63164 :                 return TS_NO;
    1683              : 
    1684        55001 :             rmatch = TS_phrase_execute(curitem + 1,
    1685              :                                        arg, flags, chkcond, &Rdata);
    1686        55001 :             if (rmatch == TS_NO)
    1687        27183 :                 return TS_NO;
    1688              : 
    1689              :             /*
    1690              :              * If either operand has no position information, then we can't
    1691              :              * return reliable position data, only a MAYBE result.
    1692              :              */
    1693        27818 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1694        12914 :                 return TS_MAYBE;
    1695              : 
    1696        14904 :             if (curitem->qoperator.oper == OP_PHRASE)
    1697              :             {
    1698              :                 /*
    1699              :                  * Compute Loffset and Roffset suitable for phrase match, and
    1700              :                  * compute overall width of whole phrase match.
    1701              :                  */
    1702        14901 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1703        14901 :                 Roffset = 0;
    1704        14901 :                 if (data)
    1705           93 :                     data->width = curitem->qoperator.distance +
    1706           93 :                         Ldata.width + Rdata.width;
    1707              :             }
    1708              :             else
    1709              :             {
    1710              :                 /*
    1711              :                  * For OP_AND, set output width and alignment like OP_OR (see
    1712              :                  * comment below)
    1713              :                  */
    1714            3 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1715            3 :                 Loffset = maxwidth - Ldata.width;
    1716            3 :                 Roffset = maxwidth - Rdata.width;
    1717            3 :                 if (data)
    1718            3 :                     data->width = maxwidth;
    1719              :             }
    1720              : 
    1721        14904 :             if (Ldata.negate && Rdata.negate)
    1722              :             {
    1723              :                 /* !L & !R: treat as !(L | R) */
    1724        14217 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1725              :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1726              :                                         Loffset, Roffset,
    1727        14217 :                                         Ldata.npos + Rdata.npos);
    1728        14217 :                 if (data)
    1729            0 :                     data->negate = true;
    1730        14217 :                 return TS_YES;
    1731              :             }
    1732          687 :             else if (Ldata.negate)
    1733              :             {
    1734              :                 /* !L & R */
    1735          225 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1736              :                                         TSPO_R_ONLY,
    1737              :                                         Loffset, Roffset,
    1738              :                                         Rdata.npos);
    1739              :             }
    1740          462 :             else if (Rdata.negate)
    1741              :             {
    1742              :                 /* L & !R */
    1743            3 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1744              :                                         TSPO_L_ONLY,
    1745              :                                         Loffset, Roffset,
    1746              :                                         Ldata.npos);
    1747              :             }
    1748              :             else
    1749              :             {
    1750              :                 /* straight AND */
    1751          459 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1752              :                                         TSPO_BOTH,
    1753              :                                         Loffset, Roffset,
    1754          459 :                                         Min(Ldata.npos, Rdata.npos));
    1755              :             }
    1756              : 
    1757           78 :         case OP_OR:
    1758           78 :             memset(&Ldata, 0, sizeof(Ldata));
    1759           78 :             memset(&Rdata, 0, sizeof(Rdata));
    1760              : 
    1761           78 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1762              :                                        arg, flags, chkcond, &Ldata);
    1763           78 :             rmatch = TS_phrase_execute(curitem + 1,
    1764              :                                        arg, flags, chkcond, &Rdata);
    1765              : 
    1766           78 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1767            6 :                 return TS_NO;
    1768              : 
    1769              :             /*
    1770              :              * If either operand has no position information, then we can't
    1771              :              * return reliable position data, only a MAYBE result.
    1772              :              */
    1773           72 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1774            0 :                 return TS_MAYBE;
    1775              : 
    1776              :             /*
    1777              :              * Cope with undefined output width from failed submatch.  (This
    1778              :              * takes less code than trying to ensure that all failure returns
    1779              :              * set data->width to zero.)
    1780              :              */
    1781           72 :             if (lmatch == TS_NO)
    1782            9 :                 Ldata.width = 0;
    1783           72 :             if (rmatch == TS_NO)
    1784           42 :                 Rdata.width = 0;
    1785              : 
    1786              :             /*
    1787              :              * For OP_AND and OP_OR, report the width of the wider of the two
    1788              :              * inputs, and align the narrower input's positions to the right
    1789              :              * end of that width.  This rule deals at least somewhat
    1790              :              * reasonably with cases like "x <-> (y | z <-> q)".
    1791              :              */
    1792           72 :             maxwidth = Max(Ldata.width, Rdata.width);
    1793           72 :             Loffset = maxwidth - Ldata.width;
    1794           72 :             Roffset = maxwidth - Rdata.width;
    1795           72 :             data->width = maxwidth;
    1796              : 
    1797           72 :             if (Ldata.negate && Rdata.negate)
    1798              :             {
    1799              :                 /* !L | !R: treat as !(L & R) */
    1800            3 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1801              :                                         TSPO_BOTH,
    1802              :                                         Loffset, Roffset,
    1803            3 :                                         Min(Ldata.npos, Rdata.npos));
    1804            3 :                 data->negate = true;
    1805            3 :                 return TS_YES;
    1806              :             }
    1807           69 :             else if (Ldata.negate)
    1808              :             {
    1809              :                 /* !L | R: treat as !(L & !R) */
    1810           15 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1811              :                                         TSPO_L_ONLY,
    1812              :                                         Loffset, Roffset,
    1813              :                                         Ldata.npos);
    1814           15 :                 data->negate = true;
    1815           15 :                 return TS_YES;
    1816              :             }
    1817           54 :             else if (Rdata.negate)
    1818              :             {
    1819              :                 /* L | !R: treat as !(!L & R) */
    1820            3 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1821              :                                         TSPO_R_ONLY,
    1822              :                                         Loffset, Roffset,
    1823              :                                         Rdata.npos);
    1824            3 :                 data->negate = true;
    1825            3 :                 return TS_YES;
    1826              :             }
    1827              :             else
    1828              :             {
    1829              :                 /* straight OR */
    1830           51 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1831              :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1832              :                                         Loffset, Roffset,
    1833           51 :                                         Ldata.npos + Rdata.npos);
    1834              :             }
    1835              : 
    1836            0 :         default:
    1837            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1838              :     }
    1839              : 
    1840              :     /* not reachable, but keep compiler quiet */
    1841            0 :     return TS_NO;
    1842              : }
    1843              : 
    1844              : 
    1845              : /*
    1846              :  * Evaluate tsquery boolean expression.
    1847              :  *
    1848              :  * curitem: current tsquery item (initially, the first one)
    1849              :  * arg: opaque value to pass through to callback function
    1850              :  * flags: bitmask of flag bits shown in ts_utils.h
    1851              :  * chkcond: callback function to check whether a primitive value is present
    1852              :  */
    1853              : bool
    1854       260429 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1855              :            TSExecuteCallback chkcond)
    1856              : {
    1857              :     /*
    1858              :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1859              :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1860              :      * need to check again.
    1861              :      */
    1862       260429 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1863              : }
    1864              : 
    1865              : /*
    1866              :  * Evaluate tsquery boolean expression.
    1867              :  *
    1868              :  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
    1869              :  */
    1870              : TSTernaryValue
    1871        18471 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
    1872              :                    TSExecuteCallback chkcond)
    1873              : {
    1874        18471 :     return TS_execute_recurse(curitem, arg, flags, chkcond);
    1875              : }
    1876              : 
    1877              : /*
    1878              :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1879              :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1880              :  * operator, we pass it off to TS_phrase_execute which does worry.
    1881              :  */
    1882              : static TSTernaryValue
    1883       527822 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1884              :                    TSExecuteCallback chkcond)
    1885              : {
    1886              :     TSTernaryValue lmatch;
    1887              : 
    1888              :     /* since this function recurses, it could be driven to stack overflow */
    1889       527822 :     check_stack_depth();
    1890              : 
    1891              :     /* ... and let's check for query cancel while we're at it */
    1892       527822 :     CHECK_FOR_INTERRUPTS();
    1893              : 
    1894       527822 :     if (curitem->type == QI_VAL)
    1895       211872 :         return chkcond(arg, (QueryOperand *) curitem,
    1896              :                        NULL /* don't need position info */ );
    1897              : 
    1898       315950 :     switch (curitem->qoperator.oper)
    1899              :     {
    1900       101616 :         case OP_NOT:
    1901       101616 :             if (flags & TS_EXEC_SKIP_NOT)
    1902            0 :                 return TS_YES;
    1903       101616 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1904              :             {
    1905        95865 :                 case TS_NO:
    1906        95865 :                     return TS_YES;
    1907         2445 :                 case TS_YES:
    1908         2445 :                     return TS_NO;
    1909         3306 :                 case TS_MAYBE:
    1910         3306 :                     return TS_MAYBE;
    1911              :             }
    1912            0 :             break;
    1913              : 
    1914        41854 :         case OP_AND:
    1915        41854 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1916              :                                         flags, chkcond);
    1917        41854 :             if (lmatch == TS_NO)
    1918        33254 :                 return TS_NO;
    1919         8600 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1920              :             {
    1921         5062 :                 case TS_NO:
    1922         5062 :                     return TS_NO;
    1923         1650 :                 case TS_YES:
    1924         1650 :                     return lmatch;
    1925         1888 :                 case TS_MAYBE:
    1926         1888 :                     return TS_MAYBE;
    1927              :             }
    1928            0 :             break;
    1929              : 
    1930        54471 :         case OP_OR:
    1931        54471 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1932              :                                         flags, chkcond);
    1933        54471 :             if (lmatch == TS_YES)
    1934        12090 :                 return TS_YES;
    1935        42381 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1936              :             {
    1937        28752 :                 case TS_NO:
    1938        28752 :                     return lmatch;
    1939         3708 :                 case TS_YES:
    1940         3708 :                     return TS_YES;
    1941         9921 :                 case TS_MAYBE:
    1942         9921 :                     return TS_MAYBE;
    1943              :             }
    1944            0 :             break;
    1945              : 
    1946       118009 :         case OP_PHRASE:
    1947              : 
    1948              :             /*
    1949              :              * If we get a MAYBE result, and the caller doesn't want that,
    1950              :              * convert it to NO.  It would be more consistent, perhaps, to
    1951              :              * return the result of TS_phrase_execute() verbatim and then
    1952              :              * convert MAYBE results at the top of the recursion.  But
    1953              :              * converting at the topmost phrase operator gives results that
    1954              :              * are bug-compatible with the old implementation, so do it like
    1955              :              * this for now.
    1956              :              */
    1957       118009 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1958              :             {
    1959        90443 :                 case TS_NO:
    1960        90443 :                     return TS_NO;
    1961        14655 :                 case TS_YES:
    1962        14655 :                     return TS_YES;
    1963        12911 :                 case TS_MAYBE:
    1964        12911 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1965              :             }
    1966            0 :             break;
    1967              : 
    1968            0 :         default:
    1969            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1970              :     }
    1971              : 
    1972              :     /* not reachable, but keep compiler quiet */
    1973            0 :     return TS_NO;
    1974              : }
    1975              : 
    1976              : /*
    1977              :  * Evaluate tsquery and report locations of matching terms.
    1978              :  *
    1979              :  * This is like TS_execute except that it returns match locations not just
    1980              :  * success/failure status.  The callback function is required to provide
    1981              :  * position data (we report failure if it doesn't).
    1982              :  *
    1983              :  * On successful match, the result is a List of ExecPhraseData structs, one
    1984              :  * for each AND'ed term or phrase operator in the query.  Each struct includes
    1985              :  * a sorted array of lexeme positions matching that term.  (Recall that for
    1986              :  * phrase operators, the match includes width+1 lexemes, and the recorded
    1987              :  * position is that of the rightmost lexeme.)
    1988              :  *
    1989              :  * OR subexpressions are handled by union'ing their match locations into a
    1990              :  * single List element, which is valid since any of those locations contains
    1991              :  * a match.  However, when some of the OR'ed terms are phrase operators, we
    1992              :  * report the maximum width of any of the OR'ed terms, making such cases
    1993              :  * slightly imprecise in the conservative direction.  (For example, if the
    1994              :  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
    1995              :  * reported as though it includes the lexeme to the left of C.)
    1996              :  *
    1997              :  * Locations of NOT subexpressions are not reported.  (Obviously, there can
    1998              :  * be no successful NOT matches at top level, or the match would have failed.
    1999              :  * So this amounts to ignoring NOTs underneath ORs.)
    2000              :  *
    2001              :  * The result is NIL if no match, or if position data was not returned.
    2002              :  *
    2003              :  * Arguments are the same as for TS_execute, although flags is currently
    2004              :  * vestigial since none of the defined bits are sensible here.
    2005              :  */
    2006              : List *
    2007          181 : TS_execute_locations(QueryItem *curitem, void *arg,
    2008              :                      uint32 flags,
    2009              :                      TSExecuteCallback chkcond)
    2010              : {
    2011              :     List       *result;
    2012              : 
    2013              :     /* No flags supported, as yet */
    2014              :     Assert(flags == TS_EXEC_EMPTY);
    2015          181 :     if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
    2016           64 :         return result;
    2017          117 :     return NIL;
    2018              : }
    2019              : 
    2020              : /*
    2021              :  * TS_execute_locations recursion for operators above any phrase operator.
    2022              :  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
    2023              :  */
    2024              : static bool
    2025          535 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
    2026              :                              TSExecuteCallback chkcond,
    2027              :                              List **locations)
    2028              : {
    2029              :     bool        lmatch,
    2030              :                 rmatch;
    2031              :     List       *llocations,
    2032              :                *rlocations;
    2033              :     ExecPhraseData *data;
    2034              : 
    2035              :     /* since this function recurses, it could be driven to stack overflow */
    2036          535 :     check_stack_depth();
    2037              : 
    2038              :     /* ... and let's check for query cancel while we're at it */
    2039          535 :     CHECK_FOR_INTERRUPTS();
    2040              : 
    2041              :     /* Default locations result is empty */
    2042          535 :     *locations = NIL;
    2043              : 
    2044          535 :     if (curitem->type == QI_VAL)
    2045              :     {
    2046          223 :         data = palloc0_object(ExecPhraseData);
    2047          223 :         if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
    2048              :         {
    2049          106 :             *locations = list_make1(data);
    2050          106 :             return true;
    2051              :         }
    2052          117 :         pfree(data);
    2053          117 :         return false;
    2054              :     }
    2055              : 
    2056          312 :     switch (curitem->qoperator.oper)
    2057              :     {
    2058            6 :         case OP_NOT:
    2059            6 :             if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
    2060              :                                               &llocations))
    2061            0 :                 return true;    /* we don't pass back any locations */
    2062            6 :             return false;
    2063              : 
    2064          264 :         case OP_AND:
    2065          264 :             if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2066              :                                               arg, chkcond,
    2067              :                                               &llocations))
    2068          204 :                 return false;
    2069           60 :             if (!TS_execute_locations_recurse(curitem + 1,
    2070              :                                               arg, chkcond,
    2071              :                                               &rlocations))
    2072           27 :                 return false;
    2073           33 :             *locations = list_concat(llocations, rlocations);
    2074           33 :             return true;
    2075              : 
    2076           12 :         case OP_OR:
    2077           12 :             lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2078              :                                                   arg, chkcond,
    2079              :                                                   &llocations);
    2080           12 :             rmatch = TS_execute_locations_recurse(curitem + 1,
    2081              :                                                   arg, chkcond,
    2082              :                                                   &rlocations);
    2083           12 :             if (lmatch || rmatch)
    2084              :             {
    2085              :                 /*
    2086              :                  * We generate an AND'able location struct from each
    2087              :                  * combination of sub-matches, following the disjunctive law
    2088              :                  * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
    2089              :                  *
    2090              :                  * However, if either input didn't produce locations (i.e., it
    2091              :                  * failed or was a NOT), we must just return the other list.
    2092              :                  */
    2093           12 :                 if (llocations == NIL)
    2094            0 :                     *locations = rlocations;
    2095           12 :                 else if (rlocations == NIL)
    2096            6 :                     *locations = llocations;
    2097              :                 else
    2098              :                 {
    2099              :                     ListCell   *ll;
    2100              : 
    2101           12 :                     foreach(ll, llocations)
    2102              :                     {
    2103            6 :                         ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
    2104              :                         ListCell   *lr;
    2105              : 
    2106           12 :                         foreach(lr, rlocations)
    2107              :                         {
    2108            6 :                             ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
    2109              : 
    2110            6 :                             data = palloc0_object(ExecPhraseData);
    2111            6 :                             (void) TS_phrase_output(data, ldata, rdata,
    2112              :                                                     TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    2113              :                                                     0, 0,
    2114            6 :                                                     ldata->npos + rdata->npos);
    2115              :                             /* Report the larger width, as explained above. */
    2116            6 :                             data->width = Max(ldata->width, rdata->width);
    2117            6 :                             *locations = lappend(*locations, data);
    2118              :                         }
    2119              :                     }
    2120              :                 }
    2121              : 
    2122           12 :                 return true;
    2123              :             }
    2124            0 :             return false;
    2125              : 
    2126           30 :         case OP_PHRASE:
    2127              :             /* We can hand this off to TS_phrase_execute */
    2128           30 :             data = palloc0_object(ExecPhraseData);
    2129           30 :             if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
    2130              :                                   data) == TS_YES)
    2131              :             {
    2132           30 :                 if (!data->negate)
    2133           30 :                     *locations = list_make1(data);
    2134           30 :                 return true;
    2135              :             }
    2136            0 :             pfree(data);
    2137            0 :             return false;
    2138              : 
    2139            0 :         default:
    2140            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2141              :     }
    2142              : 
    2143              :     /* not reachable, but keep compiler quiet */
    2144              :     return false;
    2145              : }
    2146              : 
    2147              : /*
    2148              :  * Detect whether a tsquery boolean expression requires any positive matches
    2149              :  * to values shown in the tsquery.
    2150              :  *
    2151              :  * This is needed to know whether a GIN index search requires full index scan.
    2152              :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    2153              :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    2154              :  */
    2155              : bool
    2156          417 : tsquery_requires_match(QueryItem *curitem)
    2157              : {
    2158              :     /* since this function recurses, it could be driven to stack overflow */
    2159          417 :     check_stack_depth();
    2160              : 
    2161          417 :     if (curitem->type == QI_VAL)
    2162          198 :         return true;
    2163              : 
    2164          219 :     switch (curitem->qoperator.oper)
    2165              :     {
    2166           84 :         case OP_NOT:
    2167              : 
    2168              :             /*
    2169              :              * Assume there are no required matches underneath a NOT.  For
    2170              :              * some cases with nested NOTs, we could prove there's a required
    2171              :              * match, but it seems unlikely to be worth the trouble.
    2172              :              */
    2173           84 :             return false;
    2174              : 
    2175          102 :         case OP_PHRASE:
    2176              : 
    2177              :             /*
    2178              :              * Treat OP_PHRASE as OP_AND here
    2179              :              */
    2180              :         case OP_AND:
    2181              :             /* If either side requires a match, we're good */
    2182          102 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2183           78 :                 return true;
    2184              :             else
    2185           24 :                 return tsquery_requires_match(curitem + 1);
    2186              : 
    2187           33 :         case OP_OR:
    2188              :             /* Both sides must require a match */
    2189           33 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2190           33 :                 return tsquery_requires_match(curitem + 1);
    2191              :             else
    2192            0 :                 return false;
    2193              : 
    2194            0 :         default:
    2195            0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2196              :     }
    2197              : 
    2198              :     /* not reachable, but keep compiler quiet */
    2199              :     return false;
    2200              : }
    2201              : 
    2202              : /*
    2203              :  * boolean operations
    2204              :  */
    2205              : Datum
    2206           30 : ts_match_qv(PG_FUNCTION_ARGS)
    2207              : {
    2208           30 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    2209              :                                         PG_GETARG_DATUM(1),
    2210              :                                         PG_GETARG_DATUM(0)));
    2211              : }
    2212              : 
    2213              : Datum
    2214       110040 : ts_match_vq(PG_FUNCTION_ARGS)
    2215              : {
    2216       110040 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    2217       110040 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2218              :     CHKVAL      chkval;
    2219              :     bool        result;
    2220              : 
    2221              :     /* empty query matches nothing */
    2222       110040 :     if (!query->size)
    2223              :     {
    2224            0 :         PG_FREE_IF_COPY(val, 0);
    2225            0 :         PG_FREE_IF_COPY(query, 1);
    2226            0 :         PG_RETURN_BOOL(false);
    2227              :     }
    2228              : 
    2229       110040 :     chkval.arrb = ARRPTR(val);
    2230       110040 :     chkval.arre = chkval.arrb + val->size;
    2231       110040 :     chkval.values = STRPTR(val);
    2232       110040 :     chkval.operand = GETOPERAND(query);
    2233       110040 :     result = TS_execute(GETQUERY(query),
    2234              :                         &chkval,
    2235              :                         TS_EXEC_EMPTY,
    2236              :                         checkcondition_str);
    2237              : 
    2238       110040 :     PG_FREE_IF_COPY(val, 0);
    2239       110040 :     PG_FREE_IF_COPY(query, 1);
    2240       110040 :     PG_RETURN_BOOL(result);
    2241              : }
    2242              : 
    2243              : Datum
    2244            0 : ts_match_tt(PG_FUNCTION_ARGS)
    2245              : {
    2246              :     TSVector    vector;
    2247              :     TSQuery     query;
    2248              :     bool        res;
    2249              : 
    2250            0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2251              :                                                   PG_GETARG_DATUM(0)));
    2252            0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2253              :                                                 PG_GETARG_DATUM(1)));
    2254              : 
    2255            0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2256              :                                            TSVectorGetDatum(vector),
    2257              :                                            TSQueryGetDatum(query)));
    2258              : 
    2259            0 :     pfree(vector);
    2260            0 :     pfree(query);
    2261              : 
    2262            0 :     PG_RETURN_BOOL(res);
    2263              : }
    2264              : 
    2265              : Datum
    2266            0 : ts_match_tq(PG_FUNCTION_ARGS)
    2267              : {
    2268              :     TSVector    vector;
    2269            0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2270              :     bool        res;
    2271              : 
    2272            0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2273              :                                                   PG_GETARG_DATUM(0)));
    2274              : 
    2275            0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2276              :                                            TSVectorGetDatum(vector),
    2277              :                                            TSQueryGetDatum(query)));
    2278              : 
    2279            0 :     pfree(vector);
    2280            0 :     PG_FREE_IF_COPY(query, 1);
    2281              : 
    2282            0 :     PG_RETURN_BOOL(res);
    2283              : }
    2284              : 
    2285              : /*
    2286              :  * ts_stat statistic function support
    2287              :  */
    2288              : 
    2289              : 
    2290              : /*
    2291              :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2292              :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2293              :  */
    2294              : static int
    2295         4089 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2296              : {
    2297         4089 :     int         len = POSDATALEN(txt, wptr);
    2298         4089 :     int         num = 0;
    2299         4089 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2300              : 
    2301         8325 :     while (len--)
    2302              :     {
    2303         4236 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2304            6 :             num++;
    2305         4236 :         ptr++;
    2306              :     }
    2307         4089 :     return num;
    2308              : }
    2309              : 
    2310              : #define compareStatWord(a,e,t)                          \
    2311              :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2312              :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2313              :                     false)
    2314              : 
    2315              : static void
    2316       172812 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2317              : {
    2318       172812 :     WordEntry  *we = ARRPTR(txt) + off;
    2319       172812 :     StatEntry  *node = stat->root,
    2320       172812 :                *pnode = NULL;
    2321              :     int         n,
    2322       172812 :                 res = 0;
    2323       172812 :     uint32      depth = 1;
    2324              : 
    2325       172812 :     if (stat->weight == 0)
    2326        86406 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2327              :     else
    2328        86406 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2329              : 
    2330       172812 :     if (n == 0)
    2331        86403 :         return;                 /* nothing to insert */
    2332              : 
    2333       872697 :     while (node)
    2334              :     {
    2335       869265 :         res = compareStatWord(node, we, txt);
    2336              : 
    2337       869265 :         if (res == 0)
    2338              :         {
    2339        82977 :             break;
    2340              :         }
    2341              :         else
    2342              :         {
    2343       786288 :             pnode = node;
    2344       786288 :             node = (res < 0) ? node->left : node->right;
    2345              :         }
    2346       786288 :         depth++;
    2347              :     }
    2348              : 
    2349        86409 :     if (depth > stat->maxdepth)
    2350           63 :         stat->maxdepth = depth;
    2351              : 
    2352        86409 :     if (node == NULL)
    2353              :     {
    2354         3432 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2355         3432 :         node->left = node->right = NULL;
    2356         3432 :         node->ndoc = 1;
    2357         3432 :         node->nentry = n;
    2358         3432 :         node->lenlexeme = we->len;
    2359         3432 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2360              : 
    2361         3432 :         if (pnode == NULL)
    2362              :         {
    2363            6 :             stat->root = node;
    2364              :         }
    2365              :         else
    2366              :         {
    2367         3426 :             if (res < 0)
    2368         1690 :                 pnode->left = node;
    2369              :             else
    2370         1736 :                 pnode->right = node;
    2371              :         }
    2372              :     }
    2373              :     else
    2374              :     {
    2375        82977 :         node->ndoc++;
    2376        82977 :         node->nentry += n;
    2377              :     }
    2378              : }
    2379              : 
    2380              : static void
    2381       247692 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2382              :                     uint32 low, uint32 high, uint32 offset)
    2383              : {
    2384              :     uint32      pos;
    2385       247692 :     uint32      middle = (low + high) >> 1;
    2386              : 
    2387       247692 :     pos = (low + middle) >> 1;
    2388       247692 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2389        85164 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2390       247692 :     pos = (high + middle + 1) >> 1;
    2391       247692 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2392        84642 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2393              : 
    2394       247692 :     if (low != middle)
    2395       123846 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2396       247692 :     if (high != middle + 1)
    2397       120840 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2398       247692 : }
    2399              : 
    2400              : /*
    2401              :  * This is written like a custom aggregate function, because the
    2402              :  * original plan was to do just that. Unfortunately, an aggregate function
    2403              :  * can't return a set, so that plan was abandoned. If that limitation is
    2404              :  * lifted in the future, ts_stat could be a real aggregate function so that
    2405              :  * you could use it like this:
    2406              :  *
    2407              :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2408              :  *
    2409              :  *  where vector_column is a tsvector-type column in vector_table.
    2410              :  */
    2411              : 
    2412              : static TSVectorStat *
    2413         3054 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2414              : {
    2415         3054 :     TSVector    txt = DatumGetTSVector(data);
    2416              :     uint32      i,
    2417         3054 :                 nbit = 0,
    2418              :                 offset;
    2419              : 
    2420         3054 :     if (stat == NULL)
    2421              :     {                           /* Init in first */
    2422            0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2423            0 :         stat->maxdepth = 1;
    2424              :     }
    2425              : 
    2426              :     /* simple check of correctness */
    2427         3054 :     if (txt == NULL || txt->size == 0)
    2428              :     {
    2429           48 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2430           48 :             pfree(txt);
    2431           48 :         return stat;
    2432              :     }
    2433              : 
    2434         3006 :     i = txt->size - 1;
    2435        21360 :     for (; i > 0; i >>= 1)
    2436        18354 :         nbit++;
    2437              : 
    2438         3006 :     nbit = 1 << nbit;
    2439         3006 :     offset = (nbit - txt->size) / 2;
    2440              : 
    2441         3006 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2442         3006 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2443              : 
    2444         3006 :     return stat;
    2445              : }
    2446              : 
    2447              : static void
    2448            6 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2449              :                    TSVectorStat *stat)
    2450              : {
    2451              :     TupleDesc   tupdesc;
    2452              :     MemoryContext oldcontext;
    2453              :     StatEntry  *node;
    2454              : 
    2455            6 :     funcctx->user_fctx = stat;
    2456              : 
    2457            6 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2458              : 
    2459            6 :     stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
    2460            6 :     stat->stackpos = 0;
    2461              : 
    2462            6 :     node = stat->root;
    2463              :     /* find leftmost value */
    2464            6 :     if (node == NULL)
    2465            0 :         stat->stack[stat->stackpos] = NULL;
    2466              :     else
    2467              :         for (;;)
    2468              :         {
    2469           24 :             stat->stack[stat->stackpos] = node;
    2470           24 :             if (node->left)
    2471              :             {
    2472           18 :                 stat->stackpos++;
    2473           18 :                 node = node->left;
    2474              :             }
    2475              :             else
    2476            6 :                 break;
    2477              :         }
    2478              :     Assert(stat->stackpos <= stat->maxdepth);
    2479              : 
    2480            6 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2481            0 :         elog(ERROR, "return type must be a row type");
    2482            6 :     funcctx->tuple_desc = tupdesc;
    2483            6 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2484              : 
    2485            6 :     MemoryContextSwitchTo(oldcontext);
    2486            6 : }
    2487              : 
    2488              : static StatEntry *
    2489         6864 : walkStatEntryTree(TSVectorStat *stat)
    2490              : {
    2491         6864 :     StatEntry  *node = stat->stack[stat->stackpos];
    2492              : 
    2493         6864 :     if (node == NULL)
    2494            0 :         return NULL;
    2495              : 
    2496         6864 :     if (node->ndoc != 0)
    2497              :     {
    2498              :         /* return entry itself: we already was at left sublink */
    2499         1696 :         return node;
    2500              :     }
    2501         5168 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2502              :     {
    2503              :         /* go on right sublink */
    2504         1736 :         stat->stackpos++;
    2505         1736 :         node = node->right;
    2506              : 
    2507              :         /* find most-left value */
    2508              :         for (;;)
    2509              :         {
    2510         3408 :             stat->stack[stat->stackpos] = node;
    2511         3408 :             if (node->left)
    2512              :             {
    2513         1672 :                 stat->stackpos++;
    2514         1672 :                 node = node->left;
    2515              :             }
    2516              :             else
    2517         1736 :                 break;
    2518              :         }
    2519         1736 :         Assert(stat->stackpos <= stat->maxdepth);
    2520              :     }
    2521              :     else
    2522              :     {
    2523              :         /* we already return all left subtree, itself and  right subtree */
    2524         3432 :         if (stat->stackpos == 0)
    2525            6 :             return NULL;
    2526              : 
    2527         3426 :         stat->stackpos--;
    2528         3426 :         return walkStatEntryTree(stat);
    2529              :     }
    2530              : 
    2531         1736 :     return node;
    2532              : }
    2533              : 
    2534              : static Datum
    2535         3438 : ts_process_call(FuncCallContext *funcctx)
    2536              : {
    2537              :     TSVectorStat *st;
    2538              :     StatEntry  *entry;
    2539              : 
    2540         3438 :     st = (TSVectorStat *) funcctx->user_fctx;
    2541              : 
    2542         3438 :     entry = walkStatEntryTree(st);
    2543              : 
    2544         3438 :     if (entry != NULL)
    2545              :     {
    2546              :         Datum       result;
    2547              :         char       *values[3];
    2548              :         char        ndoc[16];
    2549              :         char        nentry[16];
    2550              :         HeapTuple   tuple;
    2551              : 
    2552         3432 :         values[0] = palloc(entry->lenlexeme + 1);
    2553         3432 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2554         3432 :         (values[0])[entry->lenlexeme] = '\0';
    2555         3432 :         sprintf(ndoc, "%d", entry->ndoc);
    2556         3432 :         values[1] = ndoc;
    2557         3432 :         sprintf(nentry, "%d", entry->nentry);
    2558         3432 :         values[2] = nentry;
    2559              : 
    2560         3432 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2561         3432 :         result = HeapTupleGetDatum(tuple);
    2562              : 
    2563         3432 :         pfree(values[0]);
    2564              : 
    2565              :         /* mark entry as already visited */
    2566         3432 :         entry->ndoc = 0;
    2567              : 
    2568         3432 :         return result;
    2569              :     }
    2570              : 
    2571            6 :     return (Datum) 0;
    2572              : }
    2573              : 
    2574              : static TSVectorStat *
    2575            6 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2576              : {
    2577            6 :     char       *query = text_to_cstring(txt);
    2578              :     TSVectorStat *stat;
    2579              :     bool        isnull;
    2580              :     Portal      portal;
    2581              :     SPIPlanPtr  plan;
    2582              : 
    2583            6 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2584              :         /* internal error */
    2585            0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2586              : 
    2587            6 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2588              :         /* internal error */
    2589            0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2590              : 
    2591            6 :     SPI_cursor_fetch(portal, true, 100);
    2592              : 
    2593            6 :     if (SPI_tuptable == NULL ||
    2594            6 :         SPI_tuptable->tupdesc->natts != 1 ||
    2595            6 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2596              :                            TSVECTOROID))
    2597            0 :         ereport(ERROR,
    2598              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2599              :                  errmsg("ts_stat query must return one tsvector column")));
    2600              : 
    2601            6 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2602            6 :     stat->maxdepth = 1;
    2603              : 
    2604            6 :     if (ws)
    2605              :     {
    2606              :         char       *buf;
    2607              :         const char *end;
    2608              : 
    2609            3 :         buf = VARDATA_ANY(ws);
    2610            3 :         end = buf + VARSIZE_ANY_EXHDR(ws);
    2611            9 :         while (buf < end)
    2612              :         {
    2613            6 :             int         len = pg_mblen_range(buf, end);
    2614              : 
    2615            6 :             if (len == 1)
    2616              :             {
    2617            6 :                 switch (*buf)
    2618              :                 {
    2619            3 :                     case 'A':
    2620              :                     case 'a':
    2621            3 :                         stat->weight |= 1 << 3;
    2622            3 :                         break;
    2623            3 :                     case 'B':
    2624              :                     case 'b':
    2625            3 :                         stat->weight |= 1 << 2;
    2626            3 :                         break;
    2627            0 :                     case 'C':
    2628              :                     case 'c':
    2629            0 :                         stat->weight |= 1 << 1;
    2630            0 :                         break;
    2631            0 :                     case 'D':
    2632              :                     case 'd':
    2633            0 :                         stat->weight |= 1;
    2634            0 :                         break;
    2635            0 :                     default:
    2636            0 :                         stat->weight |= 0;
    2637              :                 }
    2638              :             }
    2639            6 :             buf += len;
    2640              :         }
    2641              :     }
    2642              : 
    2643           42 :     while (SPI_processed > 0)
    2644              :     {
    2645              :         uint64      i;
    2646              : 
    2647         3090 :         for (i = 0; i < SPI_processed; i++)
    2648              :         {
    2649         3054 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2650              : 
    2651         3054 :             if (!isnull)
    2652         3054 :                 stat = ts_accum(persistentContext, stat, data);
    2653              :         }
    2654              : 
    2655           36 :         SPI_freetuptable(SPI_tuptable);
    2656           36 :         SPI_cursor_fetch(portal, true, 100);
    2657              :     }
    2658              : 
    2659            6 :     SPI_freetuptable(SPI_tuptable);
    2660            6 :     SPI_cursor_close(portal);
    2661            6 :     SPI_freeplan(plan);
    2662            6 :     pfree(query);
    2663              : 
    2664            6 :     return stat;
    2665              : }
    2666              : 
    2667              : Datum
    2668         3432 : ts_stat1(PG_FUNCTION_ARGS)
    2669              : {
    2670              :     FuncCallContext *funcctx;
    2671              :     Datum       result;
    2672              : 
    2673         3432 :     if (SRF_IS_FIRSTCALL())
    2674              :     {
    2675              :         TSVectorStat *stat;
    2676            3 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2677              : 
    2678            3 :         funcctx = SRF_FIRSTCALL_INIT();
    2679            3 :         SPI_connect();
    2680            3 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2681            3 :         PG_FREE_IF_COPY(txt, 0);
    2682            3 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2683            3 :         SPI_finish();
    2684              :     }
    2685              : 
    2686         3432 :     funcctx = SRF_PERCALL_SETUP();
    2687         3432 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2688         3429 :         SRF_RETURN_NEXT(funcctx, result);
    2689            3 :     SRF_RETURN_DONE(funcctx);
    2690              : }
    2691              : 
    2692              : Datum
    2693            6 : ts_stat2(PG_FUNCTION_ARGS)
    2694              : {
    2695              :     FuncCallContext *funcctx;
    2696              :     Datum       result;
    2697              : 
    2698            6 :     if (SRF_IS_FIRSTCALL())
    2699              :     {
    2700              :         TSVectorStat *stat;
    2701            3 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2702            3 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2703              : 
    2704            3 :         funcctx = SRF_FIRSTCALL_INIT();
    2705            3 :         SPI_connect();
    2706            3 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2707            3 :         PG_FREE_IF_COPY(txt, 0);
    2708            3 :         PG_FREE_IF_COPY(ws, 1);
    2709            3 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2710            3 :         SPI_finish();
    2711              :     }
    2712              : 
    2713            6 :     funcctx = SRF_PERCALL_SETUP();
    2714            6 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2715            3 :         SRF_RETURN_NEXT(funcctx, result);
    2716            3 :     SRF_RETURN_DONE(funcctx);
    2717              : }
    2718              : 
    2719              : 
    2720              : /*
    2721              :  * Triggers for automatic update of a tsvector column from text column(s)
    2722              :  *
    2723              :  * Trigger arguments are either
    2724              :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2725              :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2726              :  * ie, tsconfig can either be specified by name, or indirectly as the
    2727              :  * contents of a regconfig field in the row.  If the name is used, it must
    2728              :  * be explicitly schema-qualified.
    2729              :  */
    2730              : Datum
    2731            9 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2732              : {
    2733            9 :     return tsvector_update_trigger(fcinfo, false);
    2734              : }
    2735              : 
    2736              : Datum
    2737            0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2738              : {
    2739            0 :     return tsvector_update_trigger(fcinfo, true);
    2740              : }
    2741              : 
    2742              : static Datum
    2743            9 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2744              : {
    2745              :     TriggerData *trigdata;
    2746              :     Trigger    *trigger;
    2747              :     Relation    rel;
    2748            9 :     HeapTuple   rettuple = NULL;
    2749              :     int         tsvector_attr_num,
    2750              :                 i;
    2751              :     ParsedText  prs;
    2752              :     Datum       datum;
    2753              :     bool        isnull;
    2754              :     text       *txt;
    2755              :     Oid         cfgId;
    2756              :     bool        update_needed;
    2757              : 
    2758              :     /* Check call context */
    2759            9 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2760            0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2761              : 
    2762            9 :     trigdata = (TriggerData *) fcinfo->context;
    2763            9 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2764            0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2765            9 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2766            0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2767              : 
    2768            9 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2769              :     {
    2770            6 :         rettuple = trigdata->tg_trigtuple;
    2771            6 :         update_needed = true;
    2772              :     }
    2773            3 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2774              :     {
    2775            3 :         rettuple = trigdata->tg_newtuple;
    2776            3 :         update_needed = false;  /* computed below */
    2777              :     }
    2778              :     else
    2779            0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2780              : 
    2781            9 :     trigger = trigdata->tg_trigger;
    2782            9 :     rel = trigdata->tg_relation;
    2783              : 
    2784            9 :     if (trigger->tgnargs < 3)
    2785            0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2786              : 
    2787              :     /* Find the target tsvector column */
    2788            9 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2789            9 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2790            0 :         ereport(ERROR,
    2791              :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2792              :                  errmsg("tsvector column \"%s\" does not exist",
    2793              :                         trigger->tgargs[0])));
    2794              :     /* This will effectively reject system columns, so no separate test: */
    2795            9 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2796              :                            TSVECTOROID))
    2797            0 :         ereport(ERROR,
    2798              :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2799              :                  errmsg("column \"%s\" is not of tsvector type",
    2800              :                         trigger->tgargs[0])));
    2801              : 
    2802              :     /* Find the configuration to use */
    2803            9 :     if (config_column)
    2804              :     {
    2805              :         int         config_attr_num;
    2806              : 
    2807            0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2808            0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2809            0 :             ereport(ERROR,
    2810              :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2811              :                      errmsg("configuration column \"%s\" does not exist",
    2812              :                             trigger->tgargs[1])));
    2813            0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2814              :                                REGCONFIGOID))
    2815            0 :             ereport(ERROR,
    2816              :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2817              :                      errmsg("column \"%s\" is not of regconfig type",
    2818              :                             trigger->tgargs[1])));
    2819              : 
    2820            0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2821            0 :         if (isnull)
    2822            0 :             ereport(ERROR,
    2823              :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2824              :                      errmsg("configuration column \"%s\" must not be null",
    2825              :                             trigger->tgargs[1])));
    2826            0 :         cfgId = DatumGetObjectId(datum);
    2827              :     }
    2828              :     else
    2829              :     {
    2830              :         List       *names;
    2831              : 
    2832            9 :         names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
    2833              :         /* require a schema so that results are not search path dependent */
    2834            9 :         if (list_length(names) < 2)
    2835            0 :             ereport(ERROR,
    2836              :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2837              :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2838              :                             trigger->tgargs[1])));
    2839            9 :         cfgId = get_ts_config_oid(names, false);
    2840              :     }
    2841              : 
    2842              :     /* initialize parse state */
    2843            9 :     prs.lenwords = 32;
    2844            9 :     prs.curwords = 0;
    2845            9 :     prs.pos = 0;
    2846            9 :     prs.words = palloc_array(ParsedWord, prs.lenwords);
    2847              : 
    2848              :     /* find all words in indexable column(s) */
    2849           18 :     for (i = 2; i < trigger->tgnargs; i++)
    2850              :     {
    2851              :         int         numattr;
    2852              : 
    2853            9 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2854            9 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2855            0 :             ereport(ERROR,
    2856              :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2857              :                      errmsg("column \"%s\" does not exist",
    2858              :                             trigger->tgargs[i])));
    2859            9 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2860            0 :             ereport(ERROR,
    2861              :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2862              :                      errmsg("column \"%s\" is not of a character type",
    2863              :                             trigger->tgargs[i])));
    2864              : 
    2865            9 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2866            3 :             update_needed = true;
    2867              : 
    2868            9 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2869            9 :         if (isnull)
    2870            3 :             continue;
    2871              : 
    2872            6 :         txt = DatumGetTextPP(datum);
    2873              : 
    2874            6 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2875              : 
    2876            6 :         if (txt != (text *) DatumGetPointer(datum))
    2877            0 :             pfree(txt);
    2878              :     }
    2879              : 
    2880            9 :     if (update_needed)
    2881              :     {
    2882              :         /* make tsvector value */
    2883            9 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2884            9 :         isnull = false;
    2885              : 
    2886              :         /* and insert it into tuple */
    2887            9 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2888              :                                              1, &tsvector_attr_num,
    2889              :                                              &datum, &isnull);
    2890              : 
    2891            9 :         pfree(DatumGetPointer(datum));
    2892              :     }
    2893              : 
    2894            9 :     return PointerGetDatum(rettuple);
    2895              : }
        

Generated by: LCOV version 2.0-1