LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 1014 1174 86.4 %
Date: 2023-12-11 16:10:55 Functions: 43 52 82.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsvector_op.c
       4             :  *    operations over tsvector
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsvector_op.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <limits.h>
      17             : 
      18             : #include "access/htup_details.h"
      19             : #include "catalog/namespace.h"
      20             : #include "catalog/pg_type.h"
      21             : #include "commands/trigger.h"
      22             : #include "executor/spi.h"
      23             : #include "funcapi.h"
      24             : #include "lib/qunique.h"
      25             : #include "mb/pg_wchar.h"
      26             : #include "miscadmin.h"
      27             : #include "parser/parse_coerce.h"
      28             : #include "tsearch/ts_utils.h"
      29             : #include "utils/array.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "utils/regproc.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : 
      36             : typedef struct
      37             : {
      38             :     WordEntry  *arrb;
      39             :     WordEntry  *arre;
      40             :     char       *values;
      41             :     char       *operand;
      42             : } CHKVAL;
      43             : 
      44             : 
      45             : typedef struct StatEntry
      46             : {
      47             :     uint32      ndoc;           /* zero indicates that we were already here
      48             :                                  * while walking through the tree */
      49             :     uint32      nentry;
      50             :     struct StatEntry *left;
      51             :     struct StatEntry *right;
      52             :     uint32      lenlexeme;
      53             :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54             : } StatEntry;
      55             : 
      56             : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57             : 
      58             : typedef struct
      59             : {
      60             :     int32       weight;
      61             : 
      62             :     uint32      maxdepth;
      63             : 
      64             :     StatEntry **stack;
      65             :     uint32      stackpos;
      66             : 
      67             :     StatEntry  *root;
      68             : } TSVectorStat;
      69             : 
      70             : 
      71             : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      72             :                                          uint32 flags,
      73             :                                          TSExecuteCallback chkcond);
      74             : static bool TS_execute_locations_recurse(QueryItem *curitem,
      75             :                                          void *arg,
      76             :                                          TSExecuteCallback chkcond,
      77             :                                          List **locations);
      78             : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      79             : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      80             : 
      81             : 
      82             : /*
      83             :  * Order: haspos, len, word, for all positions (pos, weight)
      84             :  */
      85             : static int
      86           2 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      87             : {
      88           2 :     if (VARSIZE(a) < VARSIZE(b))
      89           0 :         return -1;
      90           2 :     else if (VARSIZE(a) > VARSIZE(b))
      91           0 :         return 1;
      92           2 :     else if (a->size < b->size)
      93           0 :         return -1;
      94           2 :     else if (a->size > b->size)
      95           0 :         return 1;
      96             :     else
      97             :     {
      98           2 :         WordEntry  *aptr = ARRPTR(a);
      99           2 :         WordEntry  *bptr = ARRPTR(b);
     100           2 :         int         i = 0;
     101             :         int         res;
     102             : 
     103             : 
     104           8 :         for (i = 0; i < a->size; i++)
     105             :         {
     106           6 :             if (aptr->haspos != bptr->haspos)
     107             :             {
     108           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     109             :             }
     110           6 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     111             :             {
     112           0 :                 return res;
     113             :             }
     114           6 :             else if (aptr->haspos)
     115             :             {
     116           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     117           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     118             :                 int         j;
     119             : 
     120           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     121           0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     122             : 
     123           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     124             :                 {
     125           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     126             :                     {
     127           0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     128             :                     }
     129           0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     130             :                     {
     131           0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     132             :                     }
     133           0 :                     ap++, bp++;
     134             :                 }
     135             :             }
     136             : 
     137           6 :             aptr++;
     138           6 :             bptr++;
     139             :         }
     140             :     }
     141             : 
     142           2 :     return 0;
     143             : }
     144             : 
     145             : #define TSVECTORCMPFUNC( type, action, ret )            \
     146             : Datum                                                   \
     147             : tsvector_##type(PG_FUNCTION_ARGS)                       \
     148             : {                                                       \
     149             :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     150             :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     151             :     int         res = silly_cmp_tsvector(a, b);         \
     152             :     PG_FREE_IF_COPY(a,0);                               \
     153             :     PG_FREE_IF_COPY(b,1);                               \
     154             :     PG_RETURN_##ret( res action 0 );                    \
     155             : }   \
     156             : /* keep compiler quiet - no extra ; */                  \
     157             : extern int no_such_variable
     158             : 
     159           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     160           0 : TSVECTORCMPFUNC(le, <=, BOOL);
     161           2 : TSVECTORCMPFUNC(eq, ==, BOOL);
     162           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     163           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     164           0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     165           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     166             : 
     167             : Datum
     168          90 : tsvector_strip(PG_FUNCTION_ARGS)
     169             : {
     170          90 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     171             :     TSVector    out;
     172             :     int         i,
     173          90 :                 len = 0;
     174          90 :     WordEntry  *arrin = ARRPTR(in),
     175             :                *arrout;
     176             :     char       *cur;
     177             : 
     178         318 :     for (i = 0; i < in->size; i++)
     179         228 :         len += arrin[i].len;
     180             : 
     181          90 :     len = CALCDATASIZE(in->size, len);
     182          90 :     out = (TSVector) palloc0(len);
     183          90 :     SET_VARSIZE(out, len);
     184          90 :     out->size = in->size;
     185          90 :     arrout = ARRPTR(out);
     186          90 :     cur = STRPTR(out);
     187         318 :     for (i = 0; i < in->size; i++)
     188             :     {
     189         228 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     190         228 :         arrout[i].haspos = 0;
     191         228 :         arrout[i].len = arrin[i].len;
     192         228 :         arrout[i].pos = cur - STRPTR(out);
     193         228 :         cur += arrout[i].len;
     194             :     }
     195             : 
     196          90 :     PG_FREE_IF_COPY(in, 0);
     197          90 :     PG_RETURN_POINTER(out);
     198             : }
     199             : 
     200             : Datum
     201          10 : tsvector_length(PG_FUNCTION_ARGS)
     202             : {
     203          10 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     204          10 :     int32       ret = in->size;
     205             : 
     206          10 :     PG_FREE_IF_COPY(in, 0);
     207          10 :     PG_RETURN_INT32(ret);
     208             : }
     209             : 
     210             : Datum
     211          12 : tsvector_setweight(PG_FUNCTION_ARGS)
     212             : {
     213          12 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     214          12 :     char        cw = PG_GETARG_CHAR(1);
     215             :     TSVector    out;
     216             :     int         i,
     217             :                 j;
     218             :     WordEntry  *entry;
     219             :     WordEntryPos *p;
     220          12 :     int         w = 0;
     221             : 
     222          12 :     switch (cw)
     223             :     {
     224           0 :         case 'A':
     225             :         case 'a':
     226           0 :             w = 3;
     227           0 :             break;
     228           0 :         case 'B':
     229             :         case 'b':
     230           0 :             w = 2;
     231           0 :             break;
     232          12 :         case 'C':
     233             :         case 'c':
     234          12 :             w = 1;
     235          12 :             break;
     236           0 :         case 'D':
     237             :         case 'd':
     238           0 :             w = 0;
     239           0 :             break;
     240           0 :         default:
     241             :             /* internal error */
     242           0 :             elog(ERROR, "unrecognized weight: %d", cw);
     243             :     }
     244             : 
     245          12 :     out = (TSVector) palloc(VARSIZE(in));
     246          12 :     memcpy(out, in, VARSIZE(in));
     247          12 :     entry = ARRPTR(out);
     248          12 :     i = out->size;
     249          60 :     while (i--)
     250             :     {
     251          48 :         if ((j = POSDATALEN(out, entry)) != 0)
     252             :         {
     253          48 :             p = POSDATAPTR(out, entry);
     254         168 :             while (j--)
     255             :             {
     256         120 :                 WEP_SETWEIGHT(*p, w);
     257         120 :                 p++;
     258             :             }
     259             :         }
     260          48 :         entry++;
     261             :     }
     262             : 
     263          12 :     PG_FREE_IF_COPY(in, 0);
     264          12 :     PG_RETURN_POINTER(out);
     265             : }
     266             : 
     267             : /*
     268             :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     269             :  *
     270             :  * Assign weight w to elements of tsin that are listed in lexemes.
     271             :  */
     272             : Datum
     273          24 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     274             : {
     275          24 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     276          24 :     char        char_weight = PG_GETARG_CHAR(1);
     277          24 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     278             : 
     279             :     TSVector    tsout;
     280             :     int         i,
     281             :                 j,
     282             :                 nlexemes,
     283             :                 weight;
     284             :     WordEntry  *entry;
     285             :     Datum      *dlexemes;
     286             :     bool       *nulls;
     287             : 
     288          24 :     switch (char_weight)
     289             :     {
     290           0 :         case 'A':
     291             :         case 'a':
     292           0 :             weight = 3;
     293           0 :             break;
     294           0 :         case 'B':
     295             :         case 'b':
     296           0 :             weight = 2;
     297           0 :             break;
     298          24 :         case 'C':
     299             :         case 'c':
     300          24 :             weight = 1;
     301          24 :             break;
     302           0 :         case 'D':
     303             :         case 'd':
     304           0 :             weight = 0;
     305           0 :             break;
     306           0 :         default:
     307             :             /* internal error */
     308           0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     309             :     }
     310             : 
     311          24 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     312          24 :     memcpy(tsout, tsin, VARSIZE(tsin));
     313          24 :     entry = ARRPTR(tsout);
     314             : 
     315          24 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
     316             : 
     317             :     /*
     318             :      * Assuming that lexemes array is significantly shorter than tsvector we
     319             :      * can iterate through lexemes performing binary search of each lexeme
     320             :      * from lexemes in tsvector.
     321             :      */
     322          72 :     for (i = 0; i < nlexemes; i++)
     323             :     {
     324             :         char       *lex;
     325             :         int         lex_len,
     326             :                     lex_pos;
     327             : 
     328             :         /* Ignore null array elements, they surely don't match */
     329          48 :         if (nulls[i])
     330           6 :             continue;
     331             : 
     332          42 :         lex = VARDATA(dlexemes[i]);
     333          42 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     334          42 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     335             : 
     336          42 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     337             :         {
     338          24 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     339             : 
     340          78 :             while (j--)
     341             :             {
     342          54 :                 WEP_SETWEIGHT(*p, weight);
     343          54 :                 p++;
     344             :             }
     345             :         }
     346             :     }
     347             : 
     348          24 :     PG_FREE_IF_COPY(tsin, 0);
     349          24 :     PG_FREE_IF_COPY(lexemes, 2);
     350             : 
     351          24 :     PG_RETURN_POINTER(tsout);
     352             : }
     353             : 
     354             : #define compareEntry(pa, a, pb, b) \
     355             :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     356             :                     (pb) + (b)->pos, (b)->len,    \
     357             :                     false)
     358             : 
     359             : /*
     360             :  * Add positions from src to dest after offsetting them by maxpos.
     361             :  * Return the number added (might be less than expected due to overflow)
     362             :  */
     363             : static int32
     364          12 : add_pos(TSVector src, WordEntry *srcptr,
     365             :         TSVector dest, WordEntry *destptr,
     366             :         int32 maxpos)
     367             : {
     368          12 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     369             :     int         i;
     370          12 :     uint16      slen = POSDATALEN(src, srcptr),
     371             :                 startlen;
     372          12 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     373          12 :                *dpos = POSDATAPTR(dest, destptr);
     374             : 
     375          12 :     if (!destptr->haspos)
     376           0 :         *clen = 0;
     377             : 
     378          12 :     startlen = *clen;
     379          12 :     for (i = 0;
     380          24 :          i < slen && *clen < MAXNUMPOS &&
     381          12 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     382          12 :          i++)
     383             :     {
     384          12 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     385          12 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     386          12 :         (*clen)++;
     387             :     }
     388             : 
     389          12 :     if (*clen != startlen)
     390          12 :         destptr->haspos = 1;
     391          12 :     return *clen - startlen;
     392             : }
     393             : 
     394             : /*
     395             :  * Perform binary search of given lexeme in TSVector.
     396             :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     397             :  * found.
     398             :  */
     399             : static int
     400         198 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     401             : {
     402         198 :     WordEntry  *arrin = ARRPTR(tsv);
     403         198 :     int         StopLow = 0,
     404         198 :                 StopHigh = tsv->size,
     405             :                 StopMiddle,
     406             :                 cmp;
     407             : 
     408         522 :     while (StopLow < StopHigh)
     409             :     {
     410         462 :         StopMiddle = (StopLow + StopHigh) / 2;
     411             : 
     412         462 :         cmp = tsCompareString(lexeme, lexeme_len,
     413         462 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     414         462 :                               arrin[StopMiddle].len,
     415             :                               false);
     416             : 
     417         462 :         if (cmp < 0)
     418         216 :             StopHigh = StopMiddle;
     419         246 :         else if (cmp > 0)
     420         108 :             StopLow = StopMiddle + 1;
     421             :         else                    /* found it */
     422         138 :             return StopMiddle;
     423             :     }
     424             : 
     425          60 :     return -1;
     426             : }
     427             : 
     428             : /*
     429             :  * qsort comparator functions
     430             :  */
     431             : 
     432             : static int
     433          78 : compare_int(const void *va, const void *vb)
     434             : {
     435          78 :     int         a = *((const int *) va);
     436          78 :     int         b = *((const int *) vb);
     437             : 
     438          78 :     if (a == b)
     439          12 :         return 0;
     440          66 :     return (a > b) ? 1 : -1;
     441             : }
     442             : 
     443             : static int
     444         102 : compare_text_lexemes(const void *va, const void *vb)
     445             : {
     446         102 :     Datum       a = *((const Datum *) va);
     447         102 :     Datum       b = *((const Datum *) vb);
     448         102 :     char       *alex = VARDATA_ANY(a);
     449         102 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     450         102 :     char       *blex = VARDATA_ANY(b);
     451         102 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     452             : 
     453         102 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     454             : }
     455             : 
     456             : /*
     457             :  * Internal routine to delete lexemes from TSVector by array of offsets.
     458             :  *
     459             :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     460             :  * int indices_count -- size of that array
     461             :  *
     462             :  * Returns new TSVector without given lexemes along with their positions
     463             :  * and weights.
     464             :  */
     465             : static TSVector
     466          66 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     467             :                            int indices_count)
     468             : {
     469             :     TSVector    tsout;
     470          66 :     WordEntry  *arrin = ARRPTR(tsv),
     471             :                *arrout;
     472          66 :     char       *data = STRPTR(tsv),
     473             :                *dataout;
     474             :     int         i,              /* index in arrin */
     475             :                 j,              /* index in arrout */
     476             :                 k,              /* index in indices_to_delete */
     477             :                 curoff;         /* index in dataout area */
     478             : 
     479             :     /*
     480             :      * Sort the filter array to simplify membership checks below.  Also, get
     481             :      * rid of any duplicate entries, so that we can assume that indices_count
     482             :      * is exactly equal to the number of lexemes that will be removed.
     483             :      */
     484          66 :     if (indices_count > 1)
     485             :     {
     486          30 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     487          30 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     488             :                                 compare_int);
     489             :     }
     490             : 
     491             :     /*
     492             :      * Here we overestimate tsout size, since we don't know how much space is
     493             :      * used by the deleted lexeme(s).  We will set exact size below.
     494             :      */
     495          66 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     496             : 
     497             :     /* This count must be correct because STRPTR(tsout) relies on it. */
     498          66 :     tsout->size = tsv->size - indices_count;
     499             : 
     500             :     /*
     501             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     502             :      */
     503          66 :     arrout = ARRPTR(tsout);
     504          66 :     dataout = STRPTR(tsout);
     505          66 :     curoff = 0;
     506         396 :     for (i = j = k = 0; i < tsv->size; i++)
     507             :     {
     508             :         /*
     509             :          * If current i is present in indices_to_delete, skip this lexeme.
     510             :          * Since indices_to_delete is already sorted, we only need to check
     511             :          * the current (k'th) entry.
     512             :          */
     513         330 :         if (k < indices_count && i == indices_to_delete[k])
     514             :         {
     515          96 :             k++;
     516          96 :             continue;
     517             :         }
     518             : 
     519             :         /* Copy lexeme and its positions and weights */
     520         234 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     521         234 :         arrout[j].haspos = arrin[i].haspos;
     522         234 :         arrout[j].len = arrin[i].len;
     523         234 :         arrout[j].pos = curoff;
     524         234 :         curoff += arrin[i].len;
     525         234 :         if (arrin[i].haspos)
     526             :         {
     527         156 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     528         156 :                 + sizeof(uint16);
     529             : 
     530         156 :             curoff = SHORTALIGN(curoff);
     531         156 :             memcpy(dataout + curoff,
     532         156 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     533             :                    len);
     534         156 :             curoff += len;
     535             :         }
     536             : 
     537         234 :         j++;
     538             :     }
     539             : 
     540             :     /*
     541             :      * k should now be exactly equal to indices_count. If it isn't then the
     542             :      * caller provided us with indices outside of [0, tsv->size) range and
     543             :      * estimation of tsout's size is wrong.
     544             :      */
     545             :     Assert(k == indices_count);
     546             : 
     547          66 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     548          66 :     return tsout;
     549             : }
     550             : 
     551             : /*
     552             :  * Delete given lexeme from tsvector.
     553             :  * Implementation of user-level ts_delete(tsvector, text).
     554             :  */
     555             : Datum
     556          36 : tsvector_delete_str(PG_FUNCTION_ARGS)
     557             : {
     558          36 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     559             :                 tsout;
     560          36 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     561          36 :     char       *lexeme = VARDATA_ANY(tlexeme);
     562          36 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     563             :                 skip_index;
     564             : 
     565          36 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     566          12 :         PG_RETURN_POINTER(tsin);
     567             : 
     568          24 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     569             : 
     570          24 :     PG_FREE_IF_COPY(tsin, 0);
     571          24 :     PG_FREE_IF_COPY(tlexeme, 1);
     572          24 :     PG_RETURN_POINTER(tsout);
     573             : }
     574             : 
     575             : /*
     576             :  * Delete given array of lexemes from tsvector.
     577             :  * Implementation of user-level ts_delete(tsvector, text[]).
     578             :  */
     579             : Datum
     580          42 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     581             : {
     582          42 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     583             :                 tsout;
     584          42 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     585             :     int         i,
     586             :                 nlex,
     587             :                 skip_count,
     588             :                *skip_indices;
     589             :     Datum      *dlexemes;
     590             :     bool       *nulls;
     591             : 
     592          42 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
     593             : 
     594             :     /*
     595             :      * In typical use case array of lexemes to delete is relatively small. So
     596             :      * here we optimize things for that scenario: iterate through lexarr
     597             :      * performing binary search of each lexeme from lexarr in tsvector.
     598             :      */
     599          42 :     skip_indices = palloc0(nlex * sizeof(int));
     600         168 :     for (i = skip_count = 0; i < nlex; i++)
     601             :     {
     602             :         char       *lex;
     603             :         int         lex_len,
     604             :                     lex_pos;
     605             : 
     606             :         /* Ignore null array elements, they surely don't match */
     607         126 :         if (nulls[i])
     608           6 :             continue;
     609             : 
     610         120 :         lex = VARDATA(dlexemes[i]);
     611         120 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     612         120 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     613             : 
     614         120 :         if (lex_pos >= 0)
     615          78 :             skip_indices[skip_count++] = lex_pos;
     616             :     }
     617             : 
     618          42 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     619             : 
     620          42 :     pfree(skip_indices);
     621          42 :     PG_FREE_IF_COPY(tsin, 0);
     622          42 :     PG_FREE_IF_COPY(lexemes, 1);
     623             : 
     624          42 :     PG_RETURN_POINTER(tsout);
     625             : }
     626             : 
     627             : /*
     628             :  * Expand tsvector as table with following columns:
     629             :  *     lexeme: lexeme text
     630             :  *     positions: integer array of lexeme positions
     631             :  *     weights: char array of weights corresponding to positions
     632             :  */
     633             : Datum
     634         180 : tsvector_unnest(PG_FUNCTION_ARGS)
     635             : {
     636             :     FuncCallContext *funcctx;
     637             :     TSVector    tsin;
     638             : 
     639         180 :     if (SRF_IS_FIRSTCALL())
     640             :     {
     641             :         MemoryContext oldcontext;
     642             :         TupleDesc   tupdesc;
     643             : 
     644          30 :         funcctx = SRF_FIRSTCALL_INIT();
     645          30 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     646             : 
     647          30 :         tupdesc = CreateTemplateTupleDesc(3);
     648          30 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     649             :                            TEXTOID, -1, 0);
     650          30 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     651             :                            INT2ARRAYOID, -1, 0);
     652          30 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     653             :                            TEXTARRAYOID, -1, 0);
     654          30 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     655           0 :             elog(ERROR, "return type must be a row type");
     656          30 :         funcctx->tuple_desc = tupdesc;
     657             : 
     658          30 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     659             : 
     660          30 :         MemoryContextSwitchTo(oldcontext);
     661             :     }
     662             : 
     663         180 :     funcctx = SRF_PERCALL_SETUP();
     664         180 :     tsin = (TSVector) funcctx->user_fctx;
     665             : 
     666         180 :     if (funcctx->call_cntr < tsin->size)
     667             :     {
     668         150 :         WordEntry  *arrin = ARRPTR(tsin);
     669         150 :         char       *data = STRPTR(tsin);
     670             :         HeapTuple   tuple;
     671             :         int         j,
     672         150 :                     i = funcctx->call_cntr;
     673         150 :         bool        nulls[] = {false, false, false};
     674             :         Datum       values[3];
     675             : 
     676         150 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     677             : 
     678         150 :         if (arrin[i].haspos)
     679             :         {
     680             :             WordEntryPosVector *posv;
     681             :             Datum      *positions;
     682             :             Datum      *weights;
     683             :             char        weight;
     684             : 
     685             :             /*
     686             :              * Internally tsvector stores position and weight in the same
     687             :              * uint16 (2 bits for weight, 14 for position). Here we extract
     688             :              * that in two separate arrays.
     689             :              */
     690          90 :             posv = _POSVECPTR(tsin, arrin + i);
     691          90 :             positions = palloc(posv->npos * sizeof(Datum));
     692          90 :             weights = palloc(posv->npos * sizeof(Datum));
     693         252 :             for (j = 0; j < posv->npos; j++)
     694             :             {
     695         162 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     696         162 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     697         162 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     698             :                                                                       1));
     699             :             }
     700             : 
     701          90 :             values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
     702          90 :             values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
     703             :         }
     704             :         else
     705             :         {
     706          60 :             nulls[1] = nulls[2] = true;
     707             :         }
     708             : 
     709         150 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     710         150 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     711             :     }
     712             :     else
     713             :     {
     714          30 :         SRF_RETURN_DONE(funcctx);
     715             :     }
     716             : }
     717             : 
     718             : /*
     719             :  * Convert tsvector to array of lexemes.
     720             :  */
     721             : Datum
     722          12 : tsvector_to_array(PG_FUNCTION_ARGS)
     723             : {
     724          12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     725          12 :     WordEntry  *arrin = ARRPTR(tsin);
     726             :     Datum      *elements;
     727             :     int         i;
     728             :     ArrayType  *array;
     729             : 
     730          12 :     elements = palloc(tsin->size * sizeof(Datum));
     731             : 
     732          72 :     for (i = 0; i < tsin->size; i++)
     733             :     {
     734          60 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     735          60 :                                                                arrin[i].len));
     736             :     }
     737             : 
     738          12 :     array = construct_array_builtin(elements, tsin->size, TEXTOID);
     739             : 
     740          12 :     pfree(elements);
     741          12 :     PG_FREE_IF_COPY(tsin, 0);
     742          12 :     PG_RETURN_POINTER(array);
     743             : }
     744             : 
     745             : /*
     746             :  * Build tsvector from array of lexemes.
     747             :  */
     748             : Datum
     749          24 : array_to_tsvector(PG_FUNCTION_ARGS)
     750             : {
     751          24 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     752             :     TSVector    tsout;
     753             :     Datum      *dlexemes;
     754             :     WordEntry  *arrout;
     755             :     bool       *nulls;
     756             :     int         nitems,
     757             :                 i,
     758             :                 tslen,
     759          24 :                 datalen = 0;
     760             :     char       *cur;
     761             : 
     762          24 :     deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
     763             : 
     764             :     /*
     765             :      * Reject nulls and zero length strings (maybe we should just ignore them,
     766             :      * instead?)
     767             :      */
     768         126 :     for (i = 0; i < nitems; i++)
     769             :     {
     770         114 :         if (nulls[i])
     771           6 :             ereport(ERROR,
     772             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     773             :                      errmsg("lexeme array may not contain nulls")));
     774             : 
     775         108 :         if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
     776           6 :             ereport(ERROR,
     777             :                     (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
     778             :                      errmsg("lexeme array may not contain empty strings")));
     779             :     }
     780             : 
     781             :     /* Sort and de-dup, because this is required for a valid tsvector. */
     782          12 :     if (nitems > 1)
     783             :     {
     784          12 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     785          12 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     786             :                          compare_text_lexemes);
     787             :     }
     788             : 
     789             :     /* Calculate space needed for surviving lexemes. */
     790          60 :     for (i = 0; i < nitems; i++)
     791          48 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     792          12 :     tslen = CALCDATASIZE(nitems, datalen);
     793             : 
     794             :     /* Allocate and fill tsvector. */
     795          12 :     tsout = (TSVector) palloc0(tslen);
     796          12 :     SET_VARSIZE(tsout, tslen);
     797          12 :     tsout->size = nitems;
     798             : 
     799          12 :     arrout = ARRPTR(tsout);
     800          12 :     cur = STRPTR(tsout);
     801          60 :     for (i = 0; i < nitems; i++)
     802             :     {
     803          48 :         char       *lex = VARDATA(dlexemes[i]);
     804          48 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     805             : 
     806          48 :         memcpy(cur, lex, lex_len);
     807          48 :         arrout[i].haspos = 0;
     808          48 :         arrout[i].len = lex_len;
     809          48 :         arrout[i].pos = cur - STRPTR(tsout);
     810          48 :         cur += lex_len;
     811             :     }
     812             : 
     813          12 :     PG_FREE_IF_COPY(v, 0);
     814          12 :     PG_RETURN_POINTER(tsout);
     815             : }
     816             : 
     817             : /*
     818             :  * ts_filter(): keep only lexemes with given weights in tsvector.
     819             :  */
     820             : Datum
     821          18 : tsvector_filter(PG_FUNCTION_ARGS)
     822             : {
     823          18 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     824             :                 tsout;
     825          18 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     826          18 :     WordEntry  *arrin = ARRPTR(tsin),
     827             :                *arrout;
     828          18 :     char       *datain = STRPTR(tsin),
     829             :                *dataout;
     830             :     Datum      *dweights;
     831             :     bool       *nulls;
     832             :     int         nweights;
     833             :     int         i,
     834             :                 j;
     835          18 :     int         cur_pos = 0;
     836          18 :     char        mask = 0;
     837             : 
     838          18 :     deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
     839             : 
     840          42 :     for (i = 0; i < nweights; i++)
     841             :     {
     842             :         char        char_weight;
     843             : 
     844          30 :         if (nulls[i])
     845           6 :             ereport(ERROR,
     846             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     847             :                      errmsg("weight array may not contain nulls")));
     848             : 
     849          24 :         char_weight = DatumGetChar(dweights[i]);
     850          24 :         switch (char_weight)
     851             :         {
     852          18 :             case 'A':
     853             :             case 'a':
     854          18 :                 mask = mask | 8;
     855          18 :                 break;
     856           6 :             case 'B':
     857             :             case 'b':
     858           6 :                 mask = mask | 4;
     859           6 :                 break;
     860           0 :             case 'C':
     861             :             case 'c':
     862           0 :                 mask = mask | 2;
     863           0 :                 break;
     864           0 :             case 'D':
     865             :             case 'd':
     866           0 :                 mask = mask | 1;
     867           0 :                 break;
     868           0 :             default:
     869           0 :                 ereport(ERROR,
     870             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     871             :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     872             :         }
     873             :     }
     874             : 
     875          12 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     876          12 :     tsout->size = tsin->size;
     877          12 :     arrout = ARRPTR(tsout);
     878          12 :     dataout = STRPTR(tsout);
     879             : 
     880         108 :     for (i = j = 0; i < tsin->size; i++)
     881             :     {
     882             :         WordEntryPosVector *posvin,
     883             :                    *posvout;
     884          96 :         int         npos = 0;
     885             :         int         k;
     886             : 
     887          96 :         if (!arrin[i].haspos)
     888          30 :             continue;
     889             : 
     890          66 :         posvin = _POSVECPTR(tsin, arrin + i);
     891          66 :         posvout = (WordEntryPosVector *)
     892          66 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     893             : 
     894         132 :         for (k = 0; k < posvin->npos; k++)
     895             :         {
     896          66 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     897          30 :                 posvout->pos[npos++] = posvin->pos[k];
     898             :         }
     899             : 
     900             :         /* if no satisfactory positions found, skip lexeme */
     901          66 :         if (!npos)
     902          36 :             continue;
     903             : 
     904          30 :         arrout[j].haspos = true;
     905          30 :         arrout[j].len = arrin[i].len;
     906          30 :         arrout[j].pos = cur_pos;
     907             : 
     908          30 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     909          30 :         posvout->npos = npos;
     910          30 :         cur_pos += SHORTALIGN(arrin[i].len);
     911          30 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     912             :             sizeof(uint16);
     913          30 :         j++;
     914             :     }
     915             : 
     916          12 :     tsout->size = j;
     917          12 :     if (dataout != STRPTR(tsout))
     918          12 :         memmove(STRPTR(tsout), dataout, cur_pos);
     919             : 
     920          12 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     921             : 
     922          12 :     PG_FREE_IF_COPY(tsin, 0);
     923          12 :     PG_RETURN_POINTER(tsout);
     924             : }
     925             : 
     926             : Datum
     927          12 : tsvector_concat(PG_FUNCTION_ARGS)
     928             : {
     929          12 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     930          12 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     931             :     TSVector    out;
     932             :     WordEntry  *ptr;
     933             :     WordEntry  *ptr1,
     934             :                *ptr2;
     935             :     WordEntryPos *p;
     936          12 :     int         maxpos = 0,
     937             :                 i,
     938             :                 j,
     939             :                 i1,
     940             :                 i2,
     941             :                 dataoff,
     942             :                 output_bytes,
     943             :                 output_size;
     944             :     char       *data,
     945             :                *data1,
     946             :                *data2;
     947             : 
     948             :     /* Get max position in in1; we'll need this to offset in2's positions */
     949          12 :     ptr = ARRPTR(in1);
     950          12 :     i = in1->size;
     951          30 :     while (i--)
     952             :     {
     953          18 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     954             :         {
     955          18 :             p = POSDATAPTR(in1, ptr);
     956          36 :             while (j--)
     957             :             {
     958          18 :                 if (WEP_GETPOS(*p) > maxpos)
     959          12 :                     maxpos = WEP_GETPOS(*p);
     960          18 :                 p++;
     961             :             }
     962             :         }
     963          18 :         ptr++;
     964             :     }
     965             : 
     966          12 :     ptr1 = ARRPTR(in1);
     967          12 :     ptr2 = ARRPTR(in2);
     968          12 :     data1 = STRPTR(in1);
     969          12 :     data2 = STRPTR(in2);
     970          12 :     i1 = in1->size;
     971          12 :     i2 = in2->size;
     972             : 
     973             :     /*
     974             :      * Conservative estimate of space needed.  We might need all the data in
     975             :      * both inputs, and conceivably add a pad byte before position data for
     976             :      * each item where there was none before.
     977             :      */
     978          12 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     979             : 
     980          12 :     out = (TSVector) palloc0(output_bytes);
     981          12 :     SET_VARSIZE(out, output_bytes);
     982             : 
     983             :     /*
     984             :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     985             :      * collapse out any unused space at the end.
     986             :      */
     987          12 :     out->size = in1->size + in2->size;
     988             : 
     989          12 :     ptr = ARRPTR(out);
     990          12 :     data = STRPTR(out);
     991          12 :     dataoff = 0;
     992          30 :     while (i1 && i2)
     993             :     {
     994          18 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     995             : 
     996          18 :         if (cmp < 0)
     997             :         {                       /* in1 first */
     998           6 :             ptr->haspos = ptr1->haspos;
     999           6 :             ptr->len = ptr1->len;
    1000           6 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1001           6 :             ptr->pos = dataoff;
    1002           6 :             dataoff += ptr1->len;
    1003           6 :             if (ptr->haspos)
    1004             :             {
    1005           6 :                 dataoff = SHORTALIGN(dataoff);
    1006           6 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1007           6 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1008             :             }
    1009             : 
    1010           6 :             ptr++;
    1011           6 :             ptr1++;
    1012           6 :             i1--;
    1013             :         }
    1014          12 :         else if (cmp > 0)
    1015             :         {                       /* in2 first */
    1016           6 :             ptr->haspos = ptr2->haspos;
    1017           6 :             ptr->len = ptr2->len;
    1018           6 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1019           6 :             ptr->pos = dataoff;
    1020           6 :             dataoff += ptr2->len;
    1021           6 :             if (ptr->haspos)
    1022             :             {
    1023           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1024             : 
    1025           0 :                 if (addlen == 0)
    1026           0 :                     ptr->haspos = 0;
    1027             :                 else
    1028             :                 {
    1029           0 :                     dataoff = SHORTALIGN(dataoff);
    1030           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1031             :                 }
    1032             :             }
    1033             : 
    1034           6 :             ptr++;
    1035           6 :             ptr2++;
    1036           6 :             i2--;
    1037             :         }
    1038             :         else
    1039             :         {
    1040           6 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1041           6 :             ptr->len = ptr1->len;
    1042           6 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1043           6 :             ptr->pos = dataoff;
    1044           6 :             dataoff += ptr1->len;
    1045           6 :             if (ptr->haspos)
    1046             :             {
    1047           6 :                 if (ptr1->haspos)
    1048             :                 {
    1049           6 :                     dataoff = SHORTALIGN(dataoff);
    1050           6 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1051           6 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1052           6 :                     if (ptr2->haspos)
    1053           6 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1054             :                 }
    1055             :                 else            /* must have ptr2->haspos */
    1056             :                 {
    1057           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1058             : 
    1059           0 :                     if (addlen == 0)
    1060           0 :                         ptr->haspos = 0;
    1061             :                     else
    1062             :                     {
    1063           0 :                         dataoff = SHORTALIGN(dataoff);
    1064           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1065             :                     }
    1066             :                 }
    1067             :             }
    1068             : 
    1069           6 :             ptr++;
    1070           6 :             ptr1++;
    1071           6 :             ptr2++;
    1072           6 :             i1--;
    1073           6 :             i2--;
    1074             :         }
    1075             :     }
    1076             : 
    1077          18 :     while (i1)
    1078             :     {
    1079           6 :         ptr->haspos = ptr1->haspos;
    1080           6 :         ptr->len = ptr1->len;
    1081           6 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1082           6 :         ptr->pos = dataoff;
    1083           6 :         dataoff += ptr1->len;
    1084           6 :         if (ptr->haspos)
    1085             :         {
    1086           6 :             dataoff = SHORTALIGN(dataoff);
    1087           6 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1088           6 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1089             :         }
    1090             : 
    1091           6 :         ptr++;
    1092           6 :         ptr1++;
    1093           6 :         i1--;
    1094             :     }
    1095             : 
    1096          18 :     while (i2)
    1097             :     {
    1098           6 :         ptr->haspos = ptr2->haspos;
    1099           6 :         ptr->len = ptr2->len;
    1100           6 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1101           6 :         ptr->pos = dataoff;
    1102           6 :         dataoff += ptr2->len;
    1103           6 :         if (ptr->haspos)
    1104             :         {
    1105           6 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1106             : 
    1107           6 :             if (addlen == 0)
    1108           0 :                 ptr->haspos = 0;
    1109             :             else
    1110             :             {
    1111           6 :                 dataoff = SHORTALIGN(dataoff);
    1112           6 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1113             :             }
    1114             :         }
    1115             : 
    1116           6 :         ptr++;
    1117           6 :         ptr2++;
    1118           6 :         i2--;
    1119             :     }
    1120             : 
    1121             :     /*
    1122             :      * Instead of checking each offset individually, we check for overflow of
    1123             :      * pos fields once at the end.
    1124             :      */
    1125          12 :     if (dataoff > MAXSTRPOS)
    1126           0 :         ereport(ERROR,
    1127             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1128             :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1129             : 
    1130             :     /*
    1131             :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1132             :      * and collapse out any unused array entries.
    1133             :      */
    1134          12 :     output_size = ptr - ARRPTR(out);
    1135             :     Assert(output_size <= out->size);
    1136          12 :     out->size = output_size;
    1137          12 :     if (data != STRPTR(out))
    1138           6 :         memmove(STRPTR(out), data, dataoff);
    1139          12 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1140             :     Assert(output_bytes <= VARSIZE(out));
    1141          12 :     SET_VARSIZE(out, output_bytes);
    1142             : 
    1143          12 :     PG_FREE_IF_COPY(in1, 0);
    1144          12 :     PG_FREE_IF_COPY(in2, 1);
    1145          12 :     PG_RETURN_POINTER(out);
    1146             : }
    1147             : 
    1148             : /*
    1149             :  * Compare two strings by tsvector rules.
    1150             :  *
    1151             :  * if prefix = true then it returns zero value iff b has prefix a
    1152             :  */
    1153             : int32
    1154     6215474 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1155             : {
    1156             :     int         cmp;
    1157             : 
    1158     6215474 :     if (lena == 0)
    1159             :     {
    1160          36 :         if (prefix)
    1161           0 :             cmp = 0;            /* empty string is prefix of anything */
    1162             :         else
    1163          36 :             cmp = (lenb > 0) ? -1 : 0;
    1164             :     }
    1165     6215438 :     else if (lenb == 0)
    1166             :     {
    1167           0 :         cmp = (lena > 0) ? 1 : 0;
    1168             :     }
    1169             :     else
    1170             :     {
    1171     6215438 :         cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
    1172             : 
    1173     6215438 :         if (prefix)
    1174             :         {
    1175       16458 :             if (cmp == 0 && lena > lenb)
    1176           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1177             :         }
    1178     6198980 :         else if (cmp == 0 && lena != lenb)
    1179             :         {
    1180       32270 :             cmp = (lena < lenb) ? -1 : 1;
    1181             :         }
    1182             :     }
    1183             : 
    1184     6215474 :     return cmp;
    1185             : }
    1186             : 
    1187             : /*
    1188             :  * Check weight info or/and fill 'data' with the required positions
    1189             :  */
    1190             : static TSTernaryValue
    1191       68082 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1192             :                ExecPhraseData *data)
    1193             : {
    1194       68082 :     TSTernaryValue result = TS_NO;
    1195             : 
    1196             :     Assert(data == NULL || data->npos == 0);
    1197             : 
    1198       68082 :     if (entry->haspos)
    1199             :     {
    1200             :         WordEntryPosVector *posvec;
    1201             : 
    1202             :         /*
    1203             :          * We can't use the _POSVECPTR macro here because the pointer to the
    1204             :          * tsvector's lexeme storage is already contained in chkval->values.
    1205             :          */
    1206        4488 :         posvec = (WordEntryPosVector *)
    1207        4488 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1208             : 
    1209        4488 :         if (val->weight && data)
    1210          48 :         {
    1211          48 :             WordEntryPos *posvec_iter = posvec->pos;
    1212             :             WordEntryPos *dptr;
    1213             : 
    1214             :             /*
    1215             :              * Filter position information by weights
    1216             :              */
    1217          48 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1218          48 :             data->allocated = true;
    1219             : 
    1220             :             /* Is there a position with a matching weight? */
    1221          96 :             while (posvec_iter < posvec->pos + posvec->npos)
    1222             :             {
    1223             :                 /* If true, append this position to the data->pos */
    1224          48 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1225             :                 {
    1226          24 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1227          24 :                     dptr++;
    1228             :                 }
    1229             : 
    1230          48 :                 posvec_iter++;
    1231             :             }
    1232             : 
    1233          48 :             data->npos = dptr - data->pos;
    1234             : 
    1235          48 :             if (data->npos > 0)
    1236          24 :                 result = TS_YES;
    1237             :             else
    1238             :             {
    1239          24 :                 pfree(data->pos);
    1240          24 :                 data->pos = NULL;
    1241          24 :                 data->allocated = false;
    1242             :             }
    1243             :         }
    1244        4440 :         else if (val->weight)
    1245             :         {
    1246         456 :             WordEntryPos *posvec_iter = posvec->pos;
    1247             : 
    1248             :             /* Is there a position with a matching weight? */
    1249         690 :             while (posvec_iter < posvec->pos + posvec->npos)
    1250             :             {
    1251         504 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1252             :                 {
    1253         270 :                     result = TS_YES;
    1254         270 :                     break;      /* no need to go further */
    1255             :                 }
    1256             : 
    1257         234 :                 posvec_iter++;
    1258             :             }
    1259             :         }
    1260        3984 :         else if (data)
    1261             :         {
    1262        2274 :             data->npos = posvec->npos;
    1263        2274 :             data->pos = posvec->pos;
    1264        2274 :             data->allocated = false;
    1265        2274 :             result = TS_YES;
    1266             :         }
    1267             :         else
    1268             :         {
    1269             :             /* simplest case: no weight check, positions not needed */
    1270        1710 :             result = TS_YES;
    1271             :         }
    1272             :     }
    1273             :     else
    1274             :     {
    1275             :         /*
    1276             :          * Position info is lacking, so if the caller requires it, we can only
    1277             :          * say that maybe there is a match.
    1278             :          *
    1279             :          * Notice, however, that we *don't* check val->weight here.
    1280             :          * Historically, stripped tsvectors are considered to match queries
    1281             :          * whether or not the query has a weight restriction; that's a little
    1282             :          * dubious but we'll preserve the behavior.
    1283             :          */
    1284       63594 :         if (data)
    1285       23058 :             result = TS_MAYBE;
    1286             :         else
    1287       40536 :             result = TS_YES;
    1288             :     }
    1289             : 
    1290       68082 :     return result;
    1291             : }
    1292             : 
    1293             : /*
    1294             :  * TS_execute callback for matching a tsquery operand to plain tsvector data
    1295             :  */
    1296             : static TSTernaryValue
    1297      284022 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1298             : {
    1299      284022 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1300      284022 :     WordEntry  *StopLow = chkval->arrb;
    1301      284022 :     WordEntry  *StopHigh = chkval->arre;
    1302      284022 :     WordEntry  *StopMiddle = StopHigh;
    1303      284022 :     TSTernaryValue res = TS_NO;
    1304             : 
    1305             :     /* Loop invariant: StopLow <= val < StopHigh */
    1306     1786806 :     while (StopLow < StopHigh)
    1307             :     {
    1308             :         int         difference;
    1309             : 
    1310     1555830 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1311     1555830 :         difference = tsCompareString(chkval->operand + val->distance,
    1312     1555830 :                                      val->length,
    1313     1555830 :                                      chkval->values + StopMiddle->pos,
    1314     1555830 :                                      StopMiddle->len,
    1315             :                                      false);
    1316             : 
    1317     1555830 :         if (difference == 0)
    1318             :         {
    1319             :             /* Check weight info & fill 'data' with positions */
    1320       53046 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1321       53046 :             break;
    1322             :         }
    1323     1502784 :         else if (difference > 0)
    1324      847512 :             StopLow = StopMiddle + 1;
    1325             :         else
    1326      655272 :             StopHigh = StopMiddle;
    1327             :     }
    1328             : 
    1329             :     /*
    1330             :      * If it's a prefix search, we should also consider lexemes that the
    1331             :      * search term is a prefix of (which will necessarily immediately follow
    1332             :      * the place we found in the above loop).  But we can skip them if there
    1333             :      * was a definite match on the exact term AND the caller doesn't need
    1334             :      * position info.
    1335             :      */
    1336      284022 :     if (val->prefix && (res != TS_YES || data))
    1337             :     {
    1338       16524 :         WordEntryPos *allpos = NULL;
    1339       16524 :         int         npos = 0,
    1340       16524 :                     totalpos = 0;
    1341             : 
    1342             :         /* adjust start position for corner case */
    1343       16524 :         if (StopLow >= StopHigh)
    1344       16512 :             StopMiddle = StopHigh;
    1345             : 
    1346             :         /* we don't try to re-use any data from the initial match */
    1347       16524 :         if (data)
    1348             :         {
    1349          36 :             if (data->allocated)
    1350           0 :                 pfree(data->pos);
    1351          36 :             data->pos = NULL;
    1352          36 :             data->allocated = false;
    1353          36 :             data->npos = 0;
    1354             :         }
    1355       16524 :         res = TS_NO;
    1356             : 
    1357       31560 :         while ((res != TS_YES || data) &&
    1358       32556 :                StopMiddle < chkval->arre &&
    1359       15930 :                tsCompareString(chkval->operand + val->distance,
    1360       15930 :                                val->length,
    1361       15930 :                                chkval->values + StopMiddle->pos,
    1362       15930 :                                StopMiddle->len,
    1363             :                                true) == 0)
    1364             :         {
    1365             :             TSTernaryValue subres;
    1366             : 
    1367       15036 :             subres = checkclass_str(chkval, StopMiddle, val, data);
    1368             : 
    1369       15036 :             if (subres != TS_NO)
    1370             :             {
    1371       14976 :                 if (data)
    1372             :                 {
    1373             :                     /*
    1374             :                      * We need to join position information
    1375             :                      */
    1376          42 :                     if (subres == TS_MAYBE)
    1377             :                     {
    1378             :                         /*
    1379             :                          * No position info for this match, so we must report
    1380             :                          * MAYBE overall.
    1381             :                          */
    1382           0 :                         res = TS_MAYBE;
    1383             :                         /* forget any previous positions */
    1384           0 :                         npos = 0;
    1385             :                         /* don't leak storage */
    1386           0 :                         if (allpos)
    1387           0 :                             pfree(allpos);
    1388           0 :                         break;
    1389             :                     }
    1390             : 
    1391          78 :                     while (npos + data->npos > totalpos)
    1392             :                     {
    1393          36 :                         if (totalpos == 0)
    1394             :                         {
    1395          36 :                             totalpos = 256;
    1396          36 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1397             :                         }
    1398             :                         else
    1399             :                         {
    1400           0 :                             totalpos *= 2;
    1401           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1402             :                         }
    1403             :                     }
    1404             : 
    1405          42 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1406          42 :                     npos += data->npos;
    1407             : 
    1408             :                     /* don't leak storage from individual matches */
    1409          42 :                     if (data->allocated)
    1410          24 :                         pfree(data->pos);
    1411          42 :                     data->pos = NULL;
    1412          42 :                     data->allocated = false;
    1413             :                     /* it's important to reset data->npos before next loop */
    1414          42 :                     data->npos = 0;
    1415             :                 }
    1416             :                 else
    1417             :                 {
    1418             :                     /* Don't need positions, just handle YES/MAYBE */
    1419       14934 :                     if (subres == TS_YES || res == TS_NO)
    1420       14934 :                         res = subres;
    1421             :                 }
    1422             :             }
    1423             : 
    1424       15036 :             StopMiddle++;
    1425             :         }
    1426             : 
    1427       16524 :         if (data && npos > 0)
    1428             :         {
    1429             :             /* Sort and make unique array of found positions */
    1430          36 :             data->pos = allpos;
    1431          36 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1432          36 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1433             :                                  compareWordEntryPos);
    1434          36 :             data->allocated = true;
    1435          36 :             res = TS_YES;
    1436             :         }
    1437             :     }
    1438             : 
    1439      284022 :     return res;
    1440             : }
    1441             : 
    1442             : /*
    1443             :  * Compute output position list for a tsquery operator in phrase mode.
    1444             :  *
    1445             :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1446             :  * returning the result list into *data.  The input position lists must be
    1447             :  * sorted and unique, and the output will be as well.
    1448             :  *
    1449             :  * data: pointer to initially-all-zeroes output struct, or NULL
    1450             :  * Ldata, Rdata: input position lists
    1451             :  * emit: bitmask of TSPO_XXX flags
    1452             :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1453             :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1454             :  * max_npos: maximum possible required size of output position array
    1455             :  *
    1456             :  * Loffset and Roffset should not be negative, else we risk trying to output
    1457             :  * negative positions, which won't fit into WordEntryPos.
    1458             :  *
    1459             :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1460             :  * we return it as TSTernaryValue.
    1461             :  *
    1462             :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1463             :  * returns TS_YES if any positions would have been emitted.
    1464             :  */
    1465             : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1466             : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1467             : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1468             : 
    1469             : static TSTernaryValue
    1470       29972 : TS_phrase_output(ExecPhraseData *data,
    1471             :                  ExecPhraseData *Ldata,
    1472             :                  ExecPhraseData *Rdata,
    1473             :                  int emit,
    1474             :                  int Loffset,
    1475             :                  int Roffset,
    1476             :                  int max_npos)
    1477             : {
    1478             :     int         Lindex,
    1479             :                 Rindex;
    1480             : 
    1481             :     /* Loop until both inputs are exhausted */
    1482       29972 :     Lindex = Rindex = 0;
    1483       31004 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1484             :     {
    1485             :         int         Lpos,
    1486             :                     Rpos;
    1487        2334 :         int         output_pos = 0;
    1488             : 
    1489             :         /*
    1490             :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1491             :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1492             :          */
    1493        2334 :         if (Lindex < Ldata->npos)
    1494        1686 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1495             :         else
    1496             :         {
    1497             :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1498         648 :             if (!(emit & TSPO_R_ONLY))
    1499         150 :                 break;
    1500         498 :             Lpos = INT_MAX;
    1501             :         }
    1502        2184 :         if (Rindex < Rdata->npos)
    1503        1938 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1504             :         else
    1505             :         {
    1506             :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1507         246 :             if (!(emit & TSPO_L_ONLY))
    1508         162 :                 break;
    1509          84 :             Rpos = INT_MAX;
    1510             :         }
    1511             : 
    1512             :         /* Merge-join the two input lists */
    1513        2022 :         if (Lpos < Rpos)
    1514             :         {
    1515             :             /* Lpos is not matched in Rdata, should we output it? */
    1516         486 :             if (emit & TSPO_L_ONLY)
    1517         144 :                 output_pos = Lpos;
    1518         486 :             Lindex++;
    1519             :         }
    1520        1536 :         else if (Lpos == Rpos)
    1521             :         {
    1522             :             /* Lpos and Rpos match ... should we output it? */
    1523         798 :             if (emit & TSPO_BOTH)
    1524         702 :                 output_pos = Rpos;
    1525         798 :             Lindex++;
    1526         798 :             Rindex++;
    1527             :         }
    1528             :         else                    /* Lpos > Rpos */
    1529             :         {
    1530             :             /* Rpos is not matched in Ldata, should we output it? */
    1531         738 :             if (emit & TSPO_R_ONLY)
    1532         540 :                 output_pos = Rpos;
    1533         738 :             Rindex++;
    1534             :         }
    1535             : 
    1536        2022 :         if (output_pos > 0)
    1537             :         {
    1538        1386 :             if (data)
    1539             :             {
    1540             :                 /* Store position, first allocating output array if needed */
    1541         396 :                 if (data->pos == NULL)
    1542             :                 {
    1543         318 :                     data->pos = (WordEntryPos *)
    1544         318 :                         palloc(max_npos * sizeof(WordEntryPos));
    1545         318 :                     data->allocated = true;
    1546             :                 }
    1547         396 :                 data->pos[data->npos++] = output_pos;
    1548             :             }
    1549             :             else
    1550             :             {
    1551             :                 /*
    1552             :                  * Exact positions not needed, so return TS_YES as soon as we
    1553             :                  * know there is at least one.
    1554             :                  */
    1555         990 :                 return TS_YES;
    1556             :             }
    1557             :         }
    1558             :     }
    1559             : 
    1560       28982 :     if (data && data->npos > 0)
    1561             :     {
    1562             :         /* Let's assert we didn't overrun the array */
    1563             :         Assert(data->npos <= max_npos);
    1564         318 :         return TS_YES;
    1565             :     }
    1566       28664 :     return TS_NO;
    1567             : }
    1568             : 
    1569             : /*
    1570             :  * Execute tsquery at or below an OP_PHRASE operator.
    1571             :  *
    1572             :  * This handles tsquery execution at recursion levels where we need to care
    1573             :  * about match locations.
    1574             :  *
    1575             :  * In addition to the same arguments used for TS_execute, the caller may pass
    1576             :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1577             :  * match position info on success.  data == NULL if no position data need be
    1578             :  * returned.
    1579             :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1580             :  * This is OK because an outside call always starts from an OP_PHRASE node,
    1581             :  * and all internal recursion cases pass data != NULL.
    1582             :  *
    1583             :  * The detailed semantics of the match data, given that the function returned
    1584             :  * TS_YES (successful match), are:
    1585             :  *
    1586             :  * npos > 0, negate = false:
    1587             :  *   query is matched at specified position(s) (and only those positions)
    1588             :  * npos > 0, negate = true:
    1589             :  *   query is matched at all positions *except* specified position(s)
    1590             :  * npos = 0, negate = true:
    1591             :  *   query is matched at all positions
    1592             :  * npos = 0, negate = false:
    1593             :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1594             :  *
    1595             :  * Successful matches also return a "width" value which is the match width in
    1596             :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1597             :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1598             :  * that when width > 0, the listed positions represent the ends of matches not
    1599             :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1600             :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1601             :  *
    1602             :  * If the TSExecuteCallback function reports that an operand is present
    1603             :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1604             :  * it is possible but not certain that the query is matched.
    1605             :  *
    1606             :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1607             :  * negate = false (which is the state initialized by the caller); but the
    1608             :  * "width" output in such cases is undefined.
    1609             :  */
    1610             : static TSTernaryValue
    1611      701340 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1612             :                   TSExecuteCallback chkcond,
    1613             :                   ExecPhraseData *data)
    1614             : {
    1615             :     ExecPhraseData Ldata,
    1616             :                 Rdata;
    1617             :     TSTernaryValue lmatch,
    1618             :                 rmatch;
    1619             :     int         Loffset,
    1620             :                 Roffset,
    1621             :                 maxwidth;
    1622             : 
    1623             :     /* since this function recurses, it could be driven to stack overflow */
    1624      701340 :     check_stack_depth();
    1625             : 
    1626             :     /* ... and let's check for query cancel while we're at it */
    1627      701340 :     CHECK_FOR_INTERRUPTS();
    1628             : 
    1629      701340 :     if (curitem->type == QI_VAL)
    1630      344972 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1631             : 
    1632      356368 :     switch (curitem->qoperator.oper)
    1633             :     {
    1634      120748 :         case OP_NOT:
    1635             : 
    1636             :             /*
    1637             :              * We need not touch data->width, since a NOT operation does not
    1638             :              * change the match width.
    1639             :              */
    1640      120748 :             if (flags & TS_EXEC_SKIP_NOT)
    1641             :             {
    1642             :                 /* with SKIP_NOT, report NOT as "match everywhere" */
    1643             :                 Assert(data->npos == 0 && !data->negate);
    1644           0 :                 data->negate = true;
    1645           0 :                 return TS_YES;
    1646             :             }
    1647      120748 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1648             :             {
    1649      105568 :                 case TS_NO:
    1650             :                     /* change "match nowhere" to "match everywhere" */
    1651             :                     Assert(data->npos == 0 && !data->negate);
    1652      105568 :                     data->negate = true;
    1653      105568 :                     return TS_YES;
    1654         390 :                 case TS_YES:
    1655         390 :                     if (data->npos > 0)
    1656             :                     {
    1657             :                         /* we have some positions, invert negate flag */
    1658         384 :                         data->negate = !data->negate;
    1659         384 :                         return TS_YES;
    1660             :                     }
    1661           6 :                     else if (data->negate)
    1662             :                     {
    1663             :                         /* change "match everywhere" to "match nowhere" */
    1664           6 :                         data->negate = false;
    1665           6 :                         return TS_NO;
    1666             :                     }
    1667             :                     /* Should not get here if result was TS_YES */
    1668             :                     Assert(false);
    1669           0 :                     break;
    1670       14790 :                 case TS_MAYBE:
    1671             :                     /* match positions are, and remain, uncertain */
    1672       14790 :                     return TS_MAYBE;
    1673             :             }
    1674           0 :             break;
    1675             : 
    1676      235464 :         case OP_PHRASE:
    1677             :         case OP_AND:
    1678      235464 :             memset(&Ldata, 0, sizeof(Ldata));
    1679      235464 :             memset(&Rdata, 0, sizeof(Rdata));
    1680             : 
    1681      235464 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1682             :                                        arg, flags, chkcond, &Ldata);
    1683      235464 :             if (lmatch == TS_NO)
    1684      125860 :                 return TS_NO;
    1685             : 
    1686      109604 :             rmatch = TS_phrase_execute(curitem + 1,
    1687             :                                        arg, flags, chkcond, &Rdata);
    1688      109604 :             if (rmatch == TS_NO)
    1689       54030 :                 return TS_NO;
    1690             : 
    1691             :             /*
    1692             :              * If either operand has no position information, then we can't
    1693             :              * return reliable position data, only a MAYBE result.
    1694             :              */
    1695       55574 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1696       25758 :                 return TS_MAYBE;
    1697             : 
    1698       29816 :             if (curitem->qoperator.oper == OP_PHRASE)
    1699             :             {
    1700             :                 /*
    1701             :                  * Compute Loffset and Roffset suitable for phrase match, and
    1702             :                  * compute overall width of whole phrase match.
    1703             :                  */
    1704       29810 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1705       29810 :                 Roffset = 0;
    1706       29810 :                 if (data)
    1707         186 :                     data->width = curitem->qoperator.distance +
    1708         186 :                         Ldata.width + Rdata.width;
    1709             :             }
    1710             :             else
    1711             :             {
    1712             :                 /*
    1713             :                  * For OP_AND, set output width and alignment like OP_OR (see
    1714             :                  * comment below)
    1715             :                  */
    1716           6 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1717           6 :                 Loffset = maxwidth - Ldata.width;
    1718           6 :                 Roffset = maxwidth - Rdata.width;
    1719           6 :                 if (data)
    1720           6 :                     data->width = maxwidth;
    1721             :             }
    1722             : 
    1723       29816 :             if (Ldata.negate && Rdata.negate)
    1724             :             {
    1725             :                 /* !L & !R: treat as !(L | R) */
    1726       28442 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1727             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1728             :                                         Loffset, Roffset,
    1729       28442 :                                         Ldata.npos + Rdata.npos);
    1730       28442 :                 if (data)
    1731           0 :                     data->negate = true;
    1732       28442 :                 return TS_YES;
    1733             :             }
    1734        1374 :             else if (Ldata.negate)
    1735             :             {
    1736             :                 /* !L & R */
    1737         450 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1738             :                                         TSPO_R_ONLY,
    1739             :                                         Loffset, Roffset,
    1740             :                                         Rdata.npos);
    1741             :             }
    1742         924 :             else if (Rdata.negate)
    1743             :             {
    1744             :                 /* L & !R */
    1745           6 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1746             :                                         TSPO_L_ONLY,
    1747             :                                         Loffset, Roffset,
    1748             :                                         Ldata.npos);
    1749             :             }
    1750             :             else
    1751             :             {
    1752             :                 /* straight AND */
    1753         918 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1754             :                                         TSPO_BOTH,
    1755             :                                         Loffset, Roffset,
    1756         918 :                                         Min(Ldata.npos, Rdata.npos));
    1757             :             }
    1758             : 
    1759         156 :         case OP_OR:
    1760         156 :             memset(&Ldata, 0, sizeof(Ldata));
    1761         156 :             memset(&Rdata, 0, sizeof(Rdata));
    1762             : 
    1763         156 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1764             :                                        arg, flags, chkcond, &Ldata);
    1765         156 :             rmatch = TS_phrase_execute(curitem + 1,
    1766             :                                        arg, flags, chkcond, &Rdata);
    1767             : 
    1768         156 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1769          12 :                 return TS_NO;
    1770             : 
    1771             :             /*
    1772             :              * If either operand has no position information, then we can't
    1773             :              * return reliable position data, only a MAYBE result.
    1774             :              */
    1775         144 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1776           0 :                 return TS_MAYBE;
    1777             : 
    1778             :             /*
    1779             :              * Cope with undefined output width from failed submatch.  (This
    1780             :              * takes less code than trying to ensure that all failure returns
    1781             :              * set data->width to zero.)
    1782             :              */
    1783         144 :             if (lmatch == TS_NO)
    1784          18 :                 Ldata.width = 0;
    1785         144 :             if (rmatch == TS_NO)
    1786          84 :                 Rdata.width = 0;
    1787             : 
    1788             :             /*
    1789             :              * For OP_AND and OP_OR, report the width of the wider of the two
    1790             :              * inputs, and align the narrower input's positions to the right
    1791             :              * end of that width.  This rule deals at least somewhat
    1792             :              * reasonably with cases like "x <-> (y | z <-> q)".
    1793             :              */
    1794         144 :             maxwidth = Max(Ldata.width, Rdata.width);
    1795         144 :             Loffset = maxwidth - Ldata.width;
    1796         144 :             Roffset = maxwidth - Rdata.width;
    1797         144 :             data->width = maxwidth;
    1798             : 
    1799         144 :             if (Ldata.negate && Rdata.negate)
    1800             :             {
    1801             :                 /* !L | !R: treat as !(L & R) */
    1802           6 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1803             :                                         TSPO_BOTH,
    1804             :                                         Loffset, Roffset,
    1805           6 :                                         Min(Ldata.npos, Rdata.npos));
    1806           6 :                 data->negate = true;
    1807           6 :                 return TS_YES;
    1808             :             }
    1809         138 :             else if (Ldata.negate)
    1810             :             {
    1811             :                 /* !L | R: treat as !(L & !R) */
    1812          30 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1813             :                                         TSPO_L_ONLY,
    1814             :                                         Loffset, Roffset,
    1815             :                                         Ldata.npos);
    1816          30 :                 data->negate = true;
    1817          30 :                 return TS_YES;
    1818             :             }
    1819         108 :             else if (Rdata.negate)
    1820             :             {
    1821             :                 /* L | !R: treat as !(!L & R) */
    1822           6 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1823             :                                         TSPO_R_ONLY,
    1824             :                                         Loffset, Roffset,
    1825             :                                         Rdata.npos);
    1826           6 :                 data->negate = true;
    1827           6 :                 return TS_YES;
    1828             :             }
    1829             :             else
    1830             :             {
    1831             :                 /* straight OR */
    1832         102 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1833             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1834             :                                         Loffset, Roffset,
    1835         102 :                                         Ldata.npos + Rdata.npos);
    1836             :             }
    1837             : 
    1838           0 :         default:
    1839           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1840             :     }
    1841             : 
    1842             :     /* not reachable, but keep compiler quiet */
    1843           0 :     return TS_NO;
    1844             : }
    1845             : 
    1846             : 
    1847             : /*
    1848             :  * Evaluate tsquery boolean expression.
    1849             :  *
    1850             :  * curitem: current tsquery item (initially, the first one)
    1851             :  * arg: opaque value to pass through to callback function
    1852             :  * flags: bitmask of flag bits shown in ts_utils.h
    1853             :  * chkcond: callback function to check whether a primitive value is present
    1854             :  */
    1855             : bool
    1856      519660 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1857             :            TSExecuteCallback chkcond)
    1858             : {
    1859             :     /*
    1860             :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1861             :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1862             :      * need to check again.
    1863             :      */
    1864      519660 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1865             : }
    1866             : 
    1867             : /*
    1868             :  * Evaluate tsquery boolean expression.
    1869             :  *
    1870             :  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
    1871             :  */
    1872             : TSTernaryValue
    1873       36942 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
    1874             :                    TSExecuteCallback chkcond)
    1875             : {
    1876       36942 :     return TS_execute_recurse(curitem, arg, flags, chkcond);
    1877             : }
    1878             : 
    1879             : /*
    1880             :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1881             :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1882             :  * operator, we pass it off to TS_phrase_execute which does worry.
    1883             :  */
    1884             : static TSTernaryValue
    1885     1054078 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1886             :                    TSExecuteCallback chkcond)
    1887             : {
    1888             :     TSTernaryValue lmatch;
    1889             : 
    1890             :     /* since this function recurses, it could be driven to stack overflow */
    1891     1054078 :     check_stack_depth();
    1892             : 
    1893             :     /* ... and let's check for query cancel while we're at it */
    1894     1054078 :     CHECK_FOR_INTERRUPTS();
    1895             : 
    1896     1054078 :     if (curitem->type == QI_VAL)
    1897      423322 :         return chkcond(arg, (QueryOperand *) curitem,
    1898             :                        NULL /* don't need position info */ );
    1899             : 
    1900      630756 :     switch (curitem->qoperator.oper)
    1901             :     {
    1902      203244 :         case OP_NOT:
    1903      203244 :             if (flags & TS_EXEC_SKIP_NOT)
    1904           0 :                 return TS_YES;
    1905      203244 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1906             :             {
    1907      191752 :                 case TS_NO:
    1908      191752 :                     return TS_YES;
    1909        4890 :                 case TS_YES:
    1910        4890 :                     return TS_NO;
    1911        6602 :                 case TS_MAYBE:
    1912        6602 :                     return TS_MAYBE;
    1913             :             }
    1914           0 :             break;
    1915             : 
    1916       83492 :         case OP_AND:
    1917       83492 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1918             :                                         flags, chkcond);
    1919       83492 :             if (lmatch == TS_NO)
    1920       66308 :                 return TS_NO;
    1921       17184 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1922             :             {
    1923       10112 :                 case TS_NO:
    1924       10112 :                     return TS_NO;
    1925        3300 :                 case TS_YES:
    1926        3300 :                     return lmatch;
    1927        3772 :                 case TS_MAYBE:
    1928        3772 :                     return TS_MAYBE;
    1929             :             }
    1930           0 :             break;
    1931             : 
    1932      108868 :         case OP_OR:
    1933      108868 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1934             :                                         flags, chkcond);
    1935      108868 :             if (lmatch == TS_YES)
    1936       24180 :                 return TS_YES;
    1937       84688 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1938             :             {
    1939       57426 :                 case TS_NO:
    1940       57426 :                     return lmatch;
    1941        7416 :                 case TS_YES:
    1942        7416 :                     return TS_YES;
    1943       19846 :                 case TS_MAYBE:
    1944       19846 :                     return TS_MAYBE;
    1945             :             }
    1946           0 :             break;
    1947             : 
    1948      235152 :         case OP_PHRASE:
    1949             : 
    1950             :             /*
    1951             :              * If we get a MAYBE result, and the caller doesn't want that,
    1952             :              * convert it to NO.  It would be more consistent, perhaps, to
    1953             :              * return the result of TS_phrase_execute() verbatim and then
    1954             :              * convert MAYBE results at the top of the recursion.  But
    1955             :              * converting at the topmost phrase operator gives results that
    1956             :              * are bug-compatible with the old implementation, so do it like
    1957             :              * this for now.
    1958             :              */
    1959      235152 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1960             :             {
    1961      180082 :                 case TS_NO:
    1962      180082 :                     return TS_NO;
    1963       29318 :                 case TS_YES:
    1964       29318 :                     return TS_YES;
    1965       25752 :                 case TS_MAYBE:
    1966       25752 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1967             :             }
    1968           0 :             break;
    1969             : 
    1970           0 :         default:
    1971           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1972             :     }
    1973             : 
    1974             :     /* not reachable, but keep compiler quiet */
    1975           0 :     return TS_NO;
    1976             : }
    1977             : 
    1978             : /*
    1979             :  * Evaluate tsquery and report locations of matching terms.
    1980             :  *
    1981             :  * This is like TS_execute except that it returns match locations not just
    1982             :  * success/failure status.  The callback function is required to provide
    1983             :  * position data (we report failure if it doesn't).
    1984             :  *
    1985             :  * On successful match, the result is a List of ExecPhraseData structs, one
    1986             :  * for each AND'ed term or phrase operator in the query.  Each struct includes
    1987             :  * a sorted array of lexeme positions matching that term.  (Recall that for
    1988             :  * phrase operators, the match includes width+1 lexemes, and the recorded
    1989             :  * position is that of the rightmost lexeme.)
    1990             :  *
    1991             :  * OR subexpressions are handled by union'ing their match locations into a
    1992             :  * single List element, which is valid since any of those locations contains
    1993             :  * a match.  However, when some of the OR'ed terms are phrase operators, we
    1994             :  * report the maximum width of any of the OR'ed terms, making such cases
    1995             :  * slightly imprecise in the conservative direction.  (For example, if the
    1996             :  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
    1997             :  * reported as though it includes the lexeme to the left of C.)
    1998             :  *
    1999             :  * Locations of NOT subexpressions are not reported.  (Obviously, there can
    2000             :  * be no successful NOT matches at top level, or the match would have failed.
    2001             :  * So this amounts to ignoring NOTs underneath ORs.)
    2002             :  *
    2003             :  * The result is NIL if no match, or if position data was not returned.
    2004             :  *
    2005             :  * Arguments are the same as for TS_execute, although flags is currently
    2006             :  * vestigial since none of the defined bits are sensible here.
    2007             :  */
    2008             : List *
    2009         362 : TS_execute_locations(QueryItem *curitem, void *arg,
    2010             :                      uint32 flags,
    2011             :                      TSExecuteCallback chkcond)
    2012             : {
    2013             :     List       *result;
    2014             : 
    2015             :     /* No flags supported, as yet */
    2016             :     Assert(flags == TS_EXEC_EMPTY);
    2017         362 :     if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
    2018         128 :         return result;
    2019         234 :     return NIL;
    2020             : }
    2021             : 
    2022             : /*
    2023             :  * TS_execute_locations recursion for operators above any phrase operator.
    2024             :  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
    2025             :  */
    2026             : static bool
    2027        1070 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
    2028             :                              TSExecuteCallback chkcond,
    2029             :                              List **locations)
    2030             : {
    2031             :     bool        lmatch,
    2032             :                 rmatch;
    2033             :     List       *llocations,
    2034             :                *rlocations;
    2035             :     ExecPhraseData *data;
    2036             : 
    2037             :     /* since this function recurses, it could be driven to stack overflow */
    2038        1070 :     check_stack_depth();
    2039             : 
    2040             :     /* ... and let's check for query cancel while we're at it */
    2041        1070 :     CHECK_FOR_INTERRUPTS();
    2042             : 
    2043             :     /* Default locations result is empty */
    2044        1070 :     *locations = NIL;
    2045             : 
    2046        1070 :     if (curitem->type == QI_VAL)
    2047             :     {
    2048         446 :         data = palloc0_object(ExecPhraseData);
    2049         446 :         if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
    2050             :         {
    2051         212 :             *locations = list_make1(data);
    2052         212 :             return true;
    2053             :         }
    2054         234 :         pfree(data);
    2055         234 :         return false;
    2056             :     }
    2057             : 
    2058         624 :     switch (curitem->qoperator.oper)
    2059             :     {
    2060          12 :         case OP_NOT:
    2061          12 :             if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
    2062             :                                               &llocations))
    2063           0 :                 return true;    /* we don't pass back any locations */
    2064          12 :             return false;
    2065             : 
    2066         528 :         case OP_AND:
    2067         528 :             if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2068             :                                               arg, chkcond,
    2069             :                                               &llocations))
    2070         408 :                 return false;
    2071         120 :             if (!TS_execute_locations_recurse(curitem + 1,
    2072             :                                               arg, chkcond,
    2073             :                                               &rlocations))
    2074          54 :                 return false;
    2075          66 :             *locations = list_concat(llocations, rlocations);
    2076          66 :             return true;
    2077             : 
    2078          24 :         case OP_OR:
    2079          24 :             lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2080             :                                                   arg, chkcond,
    2081             :                                                   &llocations);
    2082          24 :             rmatch = TS_execute_locations_recurse(curitem + 1,
    2083             :                                                   arg, chkcond,
    2084             :                                                   &rlocations);
    2085          24 :             if (lmatch || rmatch)
    2086             :             {
    2087             :                 /*
    2088             :                  * We generate an AND'able location struct from each
    2089             :                  * combination of sub-matches, following the disjunctive law
    2090             :                  * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
    2091             :                  *
    2092             :                  * However, if either input didn't produce locations (i.e., it
    2093             :                  * failed or was a NOT), we must just return the other list.
    2094             :                  */
    2095          24 :                 if (llocations == NIL)
    2096           0 :                     *locations = rlocations;
    2097          24 :                 else if (rlocations == NIL)
    2098          12 :                     *locations = llocations;
    2099             :                 else
    2100             :                 {
    2101             :                     ListCell   *ll;
    2102             : 
    2103          24 :                     foreach(ll, llocations)
    2104             :                     {
    2105          12 :                         ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
    2106             :                         ListCell   *lr;
    2107             : 
    2108          24 :                         foreach(lr, rlocations)
    2109             :                         {
    2110          12 :                             ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
    2111             : 
    2112          12 :                             data = palloc0_object(ExecPhraseData);
    2113          12 :                             (void) TS_phrase_output(data, ldata, rdata,
    2114             :                                                     TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    2115             :                                                     0, 0,
    2116          12 :                                                     ldata->npos + rdata->npos);
    2117             :                             /* Report the larger width, as explained above. */
    2118          12 :                             data->width = Max(ldata->width, rdata->width);
    2119          12 :                             *locations = lappend(*locations, data);
    2120             :                         }
    2121             :                     }
    2122             :                 }
    2123             : 
    2124          24 :                 return true;
    2125             :             }
    2126           0 :             return false;
    2127             : 
    2128          60 :         case OP_PHRASE:
    2129             :             /* We can hand this off to TS_phrase_execute */
    2130          60 :             data = palloc0_object(ExecPhraseData);
    2131          60 :             if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
    2132             :                                   data) == TS_YES)
    2133             :             {
    2134          60 :                 if (!data->negate)
    2135          60 :                     *locations = list_make1(data);
    2136          60 :                 return true;
    2137             :             }
    2138           0 :             pfree(data);
    2139           0 :             return false;
    2140             : 
    2141           0 :         default:
    2142           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2143             :     }
    2144             : 
    2145             :     /* not reachable, but keep compiler quiet */
    2146             :     return false;
    2147             : }
    2148             : 
    2149             : /*
    2150             :  * Detect whether a tsquery boolean expression requires any positive matches
    2151             :  * to values shown in the tsquery.
    2152             :  *
    2153             :  * This is needed to know whether a GIN index search requires full index scan.
    2154             :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    2155             :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    2156             :  */
    2157             : bool
    2158         834 : tsquery_requires_match(QueryItem *curitem)
    2159             : {
    2160             :     /* since this function recurses, it could be driven to stack overflow */
    2161         834 :     check_stack_depth();
    2162             : 
    2163         834 :     if (curitem->type == QI_VAL)
    2164         396 :         return true;
    2165             : 
    2166         438 :     switch (curitem->qoperator.oper)
    2167             :     {
    2168         168 :         case OP_NOT:
    2169             : 
    2170             :             /*
    2171             :              * Assume there are no required matches underneath a NOT.  For
    2172             :              * some cases with nested NOTs, we could prove there's a required
    2173             :              * match, but it seems unlikely to be worth the trouble.
    2174             :              */
    2175         168 :             return false;
    2176             : 
    2177         204 :         case OP_PHRASE:
    2178             : 
    2179             :             /*
    2180             :              * Treat OP_PHRASE as OP_AND here
    2181             :              */
    2182             :         case OP_AND:
    2183             :             /* If either side requires a match, we're good */
    2184         204 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2185         156 :                 return true;
    2186             :             else
    2187          48 :                 return tsquery_requires_match(curitem + 1);
    2188             : 
    2189          66 :         case OP_OR:
    2190             :             /* Both sides must require a match */
    2191          66 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2192          66 :                 return tsquery_requires_match(curitem + 1);
    2193             :             else
    2194           0 :                 return false;
    2195             : 
    2196           0 :         default:
    2197           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2198             :     }
    2199             : 
    2200             :     /* not reachable, but keep compiler quiet */
    2201             :     return false;
    2202             : }
    2203             : 
    2204             : /*
    2205             :  * boolean operations
    2206             :  */
    2207             : Datum
    2208          60 : ts_match_qv(PG_FUNCTION_ARGS)
    2209             : {
    2210          60 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    2211             :                                         PG_GETARG_DATUM(1),
    2212             :                                         PG_GETARG_DATUM(0)));
    2213             : }
    2214             : 
    2215             : Datum
    2216      220080 : ts_match_vq(PG_FUNCTION_ARGS)
    2217             : {
    2218      220080 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    2219      220080 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2220             :     CHKVAL      chkval;
    2221             :     bool        result;
    2222             : 
    2223             :     /* empty query matches nothing */
    2224      220080 :     if (!query->size)
    2225             :     {
    2226           0 :         PG_FREE_IF_COPY(val, 0);
    2227           0 :         PG_FREE_IF_COPY(query, 1);
    2228           0 :         PG_RETURN_BOOL(false);
    2229             :     }
    2230             : 
    2231      220080 :     chkval.arrb = ARRPTR(val);
    2232      220080 :     chkval.arre = chkval.arrb + val->size;
    2233      220080 :     chkval.values = STRPTR(val);
    2234      220080 :     chkval.operand = GETOPERAND(query);
    2235      220080 :     result = TS_execute(GETQUERY(query),
    2236             :                         &chkval,
    2237             :                         TS_EXEC_EMPTY,
    2238             :                         checkcondition_str);
    2239             : 
    2240      220080 :     PG_FREE_IF_COPY(val, 0);
    2241      220080 :     PG_FREE_IF_COPY(query, 1);
    2242      220080 :     PG_RETURN_BOOL(result);
    2243             : }
    2244             : 
    2245             : Datum
    2246           0 : ts_match_tt(PG_FUNCTION_ARGS)
    2247             : {
    2248             :     TSVector    vector;
    2249             :     TSQuery     query;
    2250             :     bool        res;
    2251             : 
    2252           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2253             :                                                   PG_GETARG_DATUM(0)));
    2254           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2255             :                                                 PG_GETARG_DATUM(1)));
    2256             : 
    2257           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2258             :                                            TSVectorGetDatum(vector),
    2259             :                                            TSQueryGetDatum(query)));
    2260             : 
    2261           0 :     pfree(vector);
    2262           0 :     pfree(query);
    2263             : 
    2264           0 :     PG_RETURN_BOOL(res);
    2265             : }
    2266             : 
    2267             : Datum
    2268           0 : ts_match_tq(PG_FUNCTION_ARGS)
    2269             : {
    2270             :     TSVector    vector;
    2271           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2272             :     bool        res;
    2273             : 
    2274           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2275             :                                                   PG_GETARG_DATUM(0)));
    2276             : 
    2277           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2278             :                                            TSVectorGetDatum(vector),
    2279             :                                            TSQueryGetDatum(query)));
    2280             : 
    2281           0 :     pfree(vector);
    2282           0 :     PG_FREE_IF_COPY(query, 1);
    2283             : 
    2284           0 :     PG_RETURN_BOOL(res);
    2285             : }
    2286             : 
    2287             : /*
    2288             :  * ts_stat statistic function support
    2289             :  */
    2290             : 
    2291             : 
    2292             : /*
    2293             :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2294             :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2295             :  */
    2296             : static int
    2297        8178 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2298             : {
    2299        8178 :     int         len = POSDATALEN(txt, wptr);
    2300        8178 :     int         num = 0;
    2301        8178 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2302             : 
    2303       16650 :     while (len--)
    2304             :     {
    2305        8472 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2306          12 :             num++;
    2307        8472 :         ptr++;
    2308             :     }
    2309        8178 :     return num;
    2310             : }
    2311             : 
    2312             : #define compareStatWord(a,e,t)                          \
    2313             :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2314             :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2315             :                     false)
    2316             : 
    2317             : static void
    2318      345624 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2319             : {
    2320      345624 :     WordEntry  *we = ARRPTR(txt) + off;
    2321      345624 :     StatEntry  *node = stat->root,
    2322      345624 :                *pnode = NULL;
    2323             :     int         n,
    2324      345624 :                 res = 0;
    2325      345624 :     uint32      depth = 1;
    2326             : 
    2327      345624 :     if (stat->weight == 0)
    2328      172812 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2329             :     else
    2330      172812 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2331             : 
    2332      345624 :     if (n == 0)
    2333      172806 :         return;                 /* nothing to insert */
    2334             : 
    2335     1745394 :     while (node)
    2336             :     {
    2337     1738530 :         res = compareStatWord(node, we, txt);
    2338             : 
    2339     1738530 :         if (res == 0)
    2340             :         {
    2341      165954 :             break;
    2342             :         }
    2343             :         else
    2344             :         {
    2345     1572576 :             pnode = node;
    2346     1572576 :             node = (res < 0) ? node->left : node->right;
    2347             :         }
    2348     1572576 :         depth++;
    2349             :     }
    2350             : 
    2351      172818 :     if (depth > stat->maxdepth)
    2352         126 :         stat->maxdepth = depth;
    2353             : 
    2354      172818 :     if (node == NULL)
    2355             :     {
    2356        6864 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2357        6864 :         node->left = node->right = NULL;
    2358        6864 :         node->ndoc = 1;
    2359        6864 :         node->nentry = n;
    2360        6864 :         node->lenlexeme = we->len;
    2361        6864 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2362             : 
    2363        6864 :         if (pnode == NULL)
    2364             :         {
    2365          12 :             stat->root = node;
    2366             :         }
    2367             :         else
    2368             :         {
    2369        6852 :             if (res < 0)
    2370        3380 :                 pnode->left = node;
    2371             :             else
    2372        3472 :                 pnode->right = node;
    2373             :         }
    2374             :     }
    2375             :     else
    2376             :     {
    2377      165954 :         node->ndoc++;
    2378      165954 :         node->nentry += n;
    2379             :     }
    2380             : }
    2381             : 
    2382             : static void
    2383      495384 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2384             :                     uint32 low, uint32 high, uint32 offset)
    2385             : {
    2386             :     uint32      pos;
    2387      495384 :     uint32      middle = (low + high) >> 1;
    2388             : 
    2389      495384 :     pos = (low + middle) >> 1;
    2390      495384 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2391      170328 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2392      495384 :     pos = (high + middle + 1) >> 1;
    2393      495384 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2394      169284 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2395             : 
    2396      495384 :     if (low != middle)
    2397      247692 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2398      495384 :     if (high != middle + 1)
    2399      241680 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2400      495384 : }
    2401             : 
    2402             : /*
    2403             :  * This is written like a custom aggregate function, because the
    2404             :  * original plan was to do just that. Unfortunately, an aggregate function
    2405             :  * can't return a set, so that plan was abandoned. If that limitation is
    2406             :  * lifted in the future, ts_stat could be a real aggregate function so that
    2407             :  * you could use it like this:
    2408             :  *
    2409             :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2410             :  *
    2411             :  *  where vector_column is a tsvector-type column in vector_table.
    2412             :  */
    2413             : 
    2414             : static TSVectorStat *
    2415        6108 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2416             : {
    2417        6108 :     TSVector    txt = DatumGetTSVector(data);
    2418             :     uint32      i,
    2419        6108 :                 nbit = 0,
    2420             :                 offset;
    2421             : 
    2422        6108 :     if (stat == NULL)
    2423             :     {                           /* Init in first */
    2424           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2425           0 :         stat->maxdepth = 1;
    2426             :     }
    2427             : 
    2428             :     /* simple check of correctness */
    2429        6108 :     if (txt == NULL || txt->size == 0)
    2430             :     {
    2431          96 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2432          96 :             pfree(txt);
    2433          96 :         return stat;
    2434             :     }
    2435             : 
    2436        6012 :     i = txt->size - 1;
    2437       42720 :     for (; i > 0; i >>= 1)
    2438       36708 :         nbit++;
    2439             : 
    2440        6012 :     nbit = 1 << nbit;
    2441        6012 :     offset = (nbit - txt->size) / 2;
    2442             : 
    2443        6012 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2444        6012 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2445             : 
    2446        6012 :     return stat;
    2447             : }
    2448             : 
    2449             : static void
    2450          12 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2451             :                    TSVectorStat *stat)
    2452             : {
    2453             :     TupleDesc   tupdesc;
    2454             :     MemoryContext oldcontext;
    2455             :     StatEntry  *node;
    2456             : 
    2457          12 :     funcctx->user_fctx = (void *) stat;
    2458             : 
    2459          12 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2460             : 
    2461          12 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2462          12 :     stat->stackpos = 0;
    2463             : 
    2464          12 :     node = stat->root;
    2465             :     /* find leftmost value */
    2466          12 :     if (node == NULL)
    2467           0 :         stat->stack[stat->stackpos] = NULL;
    2468             :     else
    2469             :         for (;;)
    2470             :         {
    2471          48 :             stat->stack[stat->stackpos] = node;
    2472          48 :             if (node->left)
    2473             :             {
    2474          36 :                 stat->stackpos++;
    2475          36 :                 node = node->left;
    2476             :             }
    2477             :             else
    2478          12 :                 break;
    2479             :         }
    2480             :     Assert(stat->stackpos <= stat->maxdepth);
    2481             : 
    2482          12 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2483           0 :         elog(ERROR, "return type must be a row type");
    2484          12 :     funcctx->tuple_desc = tupdesc;
    2485          12 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2486             : 
    2487          12 :     MemoryContextSwitchTo(oldcontext);
    2488          12 : }
    2489             : 
    2490             : static StatEntry *
    2491       13728 : walkStatEntryTree(TSVectorStat *stat)
    2492             : {
    2493       13728 :     StatEntry  *node = stat->stack[stat->stackpos];
    2494             : 
    2495       13728 :     if (node == NULL)
    2496           0 :         return NULL;
    2497             : 
    2498       13728 :     if (node->ndoc != 0)
    2499             :     {
    2500             :         /* return entry itself: we already was at left sublink */
    2501        3392 :         return node;
    2502             :     }
    2503       10336 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2504             :     {
    2505             :         /* go on right sublink */
    2506        3472 :         stat->stackpos++;
    2507        3472 :         node = node->right;
    2508             : 
    2509             :         /* find most-left value */
    2510             :         for (;;)
    2511             :         {
    2512        6816 :             stat->stack[stat->stackpos] = node;
    2513        6816 :             if (node->left)
    2514             :             {
    2515        3344 :                 stat->stackpos++;
    2516        3344 :                 node = node->left;
    2517             :             }
    2518             :             else
    2519        3472 :                 break;
    2520             :         }
    2521        3472 :         Assert(stat->stackpos <= stat->maxdepth);
    2522             :     }
    2523             :     else
    2524             :     {
    2525             :         /* we already return all left subtree, itself and  right subtree */
    2526        6864 :         if (stat->stackpos == 0)
    2527          12 :             return NULL;
    2528             : 
    2529        6852 :         stat->stackpos--;
    2530        6852 :         return walkStatEntryTree(stat);
    2531             :     }
    2532             : 
    2533        3472 :     return node;
    2534             : }
    2535             : 
    2536             : static Datum
    2537        6876 : ts_process_call(FuncCallContext *funcctx)
    2538             : {
    2539             :     TSVectorStat *st;
    2540             :     StatEntry  *entry;
    2541             : 
    2542        6876 :     st = (TSVectorStat *) funcctx->user_fctx;
    2543             : 
    2544        6876 :     entry = walkStatEntryTree(st);
    2545             : 
    2546        6876 :     if (entry != NULL)
    2547             :     {
    2548             :         Datum       result;
    2549             :         char       *values[3];
    2550             :         char        ndoc[16];
    2551             :         char        nentry[16];
    2552             :         HeapTuple   tuple;
    2553             : 
    2554        6864 :         values[0] = palloc(entry->lenlexeme + 1);
    2555        6864 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2556        6864 :         (values[0])[entry->lenlexeme] = '\0';
    2557        6864 :         sprintf(ndoc, "%d", entry->ndoc);
    2558        6864 :         values[1] = ndoc;
    2559        6864 :         sprintf(nentry, "%d", entry->nentry);
    2560        6864 :         values[2] = nentry;
    2561             : 
    2562        6864 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2563        6864 :         result = HeapTupleGetDatum(tuple);
    2564             : 
    2565        6864 :         pfree(values[0]);
    2566             : 
    2567             :         /* mark entry as already visited */
    2568        6864 :         entry->ndoc = 0;
    2569             : 
    2570        6864 :         return result;
    2571             :     }
    2572             : 
    2573          12 :     return (Datum) 0;
    2574             : }
    2575             : 
    2576             : static TSVectorStat *
    2577          12 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2578             : {
    2579          12 :     char       *query = text_to_cstring(txt);
    2580             :     TSVectorStat *stat;
    2581             :     bool        isnull;
    2582             :     Portal      portal;
    2583             :     SPIPlanPtr  plan;
    2584             : 
    2585          12 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2586             :         /* internal error */
    2587           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2588             : 
    2589          12 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2590             :         /* internal error */
    2591           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2592             : 
    2593          12 :     SPI_cursor_fetch(portal, true, 100);
    2594             : 
    2595          12 :     if (SPI_tuptable == NULL ||
    2596          12 :         SPI_tuptable->tupdesc->natts != 1 ||
    2597          12 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2598             :                            TSVECTOROID))
    2599           0 :         ereport(ERROR,
    2600             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2601             :                  errmsg("ts_stat query must return one tsvector column")));
    2602             : 
    2603          12 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2604          12 :     stat->maxdepth = 1;
    2605             : 
    2606          12 :     if (ws)
    2607             :     {
    2608             :         char       *buf;
    2609             : 
    2610           6 :         buf = VARDATA_ANY(ws);
    2611          18 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2612             :         {
    2613          12 :             if (pg_mblen(buf) == 1)
    2614             :             {
    2615          12 :                 switch (*buf)
    2616             :                 {
    2617           6 :                     case 'A':
    2618             :                     case 'a':
    2619           6 :                         stat->weight |= 1 << 3;
    2620           6 :                         break;
    2621           6 :                     case 'B':
    2622             :                     case 'b':
    2623           6 :                         stat->weight |= 1 << 2;
    2624           6 :                         break;
    2625           0 :                     case 'C':
    2626             :                     case 'c':
    2627           0 :                         stat->weight |= 1 << 1;
    2628           0 :                         break;
    2629           0 :                     case 'D':
    2630             :                     case 'd':
    2631           0 :                         stat->weight |= 1;
    2632           0 :                         break;
    2633           0 :                     default:
    2634           0 :                         stat->weight |= 0;
    2635             :                 }
    2636           0 :             }
    2637          12 :             buf += pg_mblen(buf);
    2638             :         }
    2639             :     }
    2640             : 
    2641          84 :     while (SPI_processed > 0)
    2642             :     {
    2643             :         uint64      i;
    2644             : 
    2645        6180 :         for (i = 0; i < SPI_processed; i++)
    2646             :         {
    2647        6108 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2648             : 
    2649        6108 :             if (!isnull)
    2650        6108 :                 stat = ts_accum(persistentContext, stat, data);
    2651             :         }
    2652             : 
    2653          72 :         SPI_freetuptable(SPI_tuptable);
    2654          72 :         SPI_cursor_fetch(portal, true, 100);
    2655             :     }
    2656             : 
    2657          12 :     SPI_freetuptable(SPI_tuptable);
    2658          12 :     SPI_cursor_close(portal);
    2659          12 :     SPI_freeplan(plan);
    2660          12 :     pfree(query);
    2661             : 
    2662          12 :     return stat;
    2663             : }
    2664             : 
    2665             : Datum
    2666        6864 : ts_stat1(PG_FUNCTION_ARGS)
    2667             : {
    2668             :     FuncCallContext *funcctx;
    2669             :     Datum       result;
    2670             : 
    2671        6864 :     if (SRF_IS_FIRSTCALL())
    2672             :     {
    2673             :         TSVectorStat *stat;
    2674           6 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2675             : 
    2676           6 :         funcctx = SRF_FIRSTCALL_INIT();
    2677           6 :         SPI_connect();
    2678           6 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2679           6 :         PG_FREE_IF_COPY(txt, 0);
    2680           6 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2681           6 :         SPI_finish();
    2682             :     }
    2683             : 
    2684        6864 :     funcctx = SRF_PERCALL_SETUP();
    2685        6864 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2686        6858 :         SRF_RETURN_NEXT(funcctx, result);
    2687           6 :     SRF_RETURN_DONE(funcctx);
    2688             : }
    2689             : 
    2690             : Datum
    2691          12 : ts_stat2(PG_FUNCTION_ARGS)
    2692             : {
    2693             :     FuncCallContext *funcctx;
    2694             :     Datum       result;
    2695             : 
    2696          12 :     if (SRF_IS_FIRSTCALL())
    2697             :     {
    2698             :         TSVectorStat *stat;
    2699           6 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2700           6 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2701             : 
    2702           6 :         funcctx = SRF_FIRSTCALL_INIT();
    2703           6 :         SPI_connect();
    2704           6 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2705           6 :         PG_FREE_IF_COPY(txt, 0);
    2706           6 :         PG_FREE_IF_COPY(ws, 1);
    2707           6 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2708           6 :         SPI_finish();
    2709             :     }
    2710             : 
    2711          12 :     funcctx = SRF_PERCALL_SETUP();
    2712          12 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2713           6 :         SRF_RETURN_NEXT(funcctx, result);
    2714           6 :     SRF_RETURN_DONE(funcctx);
    2715             : }
    2716             : 
    2717             : 
    2718             : /*
    2719             :  * Triggers for automatic update of a tsvector column from text column(s)
    2720             :  *
    2721             :  * Trigger arguments are either
    2722             :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2723             :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2724             :  * ie, tsconfig can either be specified by name, or indirectly as the
    2725             :  * contents of a regconfig field in the row.  If the name is used, it must
    2726             :  * be explicitly schema-qualified.
    2727             :  */
    2728             : Datum
    2729          18 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2730             : {
    2731          18 :     return tsvector_update_trigger(fcinfo, false);
    2732             : }
    2733             : 
    2734             : Datum
    2735           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2736             : {
    2737           0 :     return tsvector_update_trigger(fcinfo, true);
    2738             : }
    2739             : 
    2740             : static Datum
    2741          18 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2742             : {
    2743             :     TriggerData *trigdata;
    2744             :     Trigger    *trigger;
    2745             :     Relation    rel;
    2746          18 :     HeapTuple   rettuple = NULL;
    2747             :     int         tsvector_attr_num,
    2748             :                 i;
    2749             :     ParsedText  prs;
    2750             :     Datum       datum;
    2751             :     bool        isnull;
    2752             :     text       *txt;
    2753             :     Oid         cfgId;
    2754             :     bool        update_needed;
    2755             : 
    2756             :     /* Check call context */
    2757          18 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2758           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2759             : 
    2760          18 :     trigdata = (TriggerData *) fcinfo->context;
    2761          18 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2762           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2763          18 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2764           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2765             : 
    2766          18 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2767             :     {
    2768          12 :         rettuple = trigdata->tg_trigtuple;
    2769          12 :         update_needed = true;
    2770             :     }
    2771           6 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2772             :     {
    2773           6 :         rettuple = trigdata->tg_newtuple;
    2774           6 :         update_needed = false;  /* computed below */
    2775             :     }
    2776             :     else
    2777           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2778             : 
    2779          18 :     trigger = trigdata->tg_trigger;
    2780          18 :     rel = trigdata->tg_relation;
    2781             : 
    2782          18 :     if (trigger->tgnargs < 3)
    2783           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2784             : 
    2785             :     /* Find the target tsvector column */
    2786          18 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2787          18 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2788           0 :         ereport(ERROR,
    2789             :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2790             :                  errmsg("tsvector column \"%s\" does not exist",
    2791             :                         trigger->tgargs[0])));
    2792             :     /* This will effectively reject system columns, so no separate test: */
    2793          18 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2794             :                            TSVECTOROID))
    2795           0 :         ereport(ERROR,
    2796             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2797             :                  errmsg("column \"%s\" is not of tsvector type",
    2798             :                         trigger->tgargs[0])));
    2799             : 
    2800             :     /* Find the configuration to use */
    2801          18 :     if (config_column)
    2802             :     {
    2803             :         int         config_attr_num;
    2804             : 
    2805           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2806           0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2807           0 :             ereport(ERROR,
    2808             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2809             :                      errmsg("configuration column \"%s\" does not exist",
    2810             :                             trigger->tgargs[1])));
    2811           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2812             :                                REGCONFIGOID))
    2813           0 :             ereport(ERROR,
    2814             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2815             :                      errmsg("column \"%s\" is not of regconfig type",
    2816             :                             trigger->tgargs[1])));
    2817             : 
    2818           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2819           0 :         if (isnull)
    2820           0 :             ereport(ERROR,
    2821             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2822             :                      errmsg("configuration column \"%s\" must not be null",
    2823             :                             trigger->tgargs[1])));
    2824           0 :         cfgId = DatumGetObjectId(datum);
    2825             :     }
    2826             :     else
    2827             :     {
    2828             :         List       *names;
    2829             : 
    2830          18 :         names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
    2831             :         /* require a schema so that results are not search path dependent */
    2832          18 :         if (list_length(names) < 2)
    2833           0 :             ereport(ERROR,
    2834             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2835             :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2836             :                             trigger->tgargs[1])));
    2837          18 :         cfgId = get_ts_config_oid(names, false);
    2838             :     }
    2839             : 
    2840             :     /* initialize parse state */
    2841          18 :     prs.lenwords = 32;
    2842          18 :     prs.curwords = 0;
    2843          18 :     prs.pos = 0;
    2844          18 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2845             : 
    2846             :     /* find all words in indexable column(s) */
    2847          36 :     for (i = 2; i < trigger->tgnargs; i++)
    2848             :     {
    2849             :         int         numattr;
    2850             : 
    2851          18 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2852          18 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2853           0 :             ereport(ERROR,
    2854             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2855             :                      errmsg("column \"%s\" does not exist",
    2856             :                             trigger->tgargs[i])));
    2857          18 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2858           0 :             ereport(ERROR,
    2859             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2860             :                      errmsg("column \"%s\" is not of a character type",
    2861             :                             trigger->tgargs[i])));
    2862             : 
    2863          18 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2864           6 :             update_needed = true;
    2865             : 
    2866          18 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2867          18 :         if (isnull)
    2868           6 :             continue;
    2869             : 
    2870          12 :         txt = DatumGetTextPP(datum);
    2871             : 
    2872          12 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2873             : 
    2874          12 :         if (txt != (text *) DatumGetPointer(datum))
    2875           0 :             pfree(txt);
    2876             :     }
    2877             : 
    2878          18 :     if (update_needed)
    2879             :     {
    2880             :         /* make tsvector value */
    2881          18 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2882          18 :         isnull = false;
    2883             : 
    2884             :         /* and insert it into tuple */
    2885          18 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2886             :                                              1, &tsvector_attr_num,
    2887             :                                              &datum, &isnull);
    2888             : 
    2889          18 :         pfree(DatumGetPointer(datum));
    2890             :     }
    2891             : 
    2892          18 :     return PointerGetDatum(rettuple);
    2893             : }

Generated by: LCOV version 1.14