LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 941 1089 86.4 %
Date: 2020-06-03 11:07:14 Functions: 40 49 81.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsvector_op.c
       4             :  *    operations over tsvector
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsvector_op.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <limits.h>
      17             : 
      18             : #include "access/htup_details.h"
      19             : #include "catalog/namespace.h"
      20             : #include "catalog/pg_type.h"
      21             : #include "commands/trigger.h"
      22             : #include "executor/spi.h"
      23             : #include "funcapi.h"
      24             : #include "lib/qunique.h"
      25             : #include "mb/pg_wchar.h"
      26             : #include "miscadmin.h"
      27             : #include "parser/parse_coerce.h"
      28             : #include "tsearch/ts_utils.h"
      29             : #include "utils/array.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "utils/regproc.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : 
      36             : typedef struct
      37             : {
      38             :     WordEntry  *arrb;
      39             :     WordEntry  *arre;
      40             :     char       *values;
      41             :     char       *operand;
      42             : } CHKVAL;
      43             : 
      44             : 
      45             : typedef struct StatEntry
      46             : {
      47             :     uint32      ndoc;           /* zero indicates that we were already here
      48             :                                  * while walking through the tree */
      49             :     uint32      nentry;
      50             :     struct StatEntry *left;
      51             :     struct StatEntry *right;
      52             :     uint32      lenlexeme;
      53             :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54             : } StatEntry;
      55             : 
      56             : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57             : 
      58             : typedef struct
      59             : {
      60             :     int32       weight;
      61             : 
      62             :     uint32      maxdepth;
      63             : 
      64             :     StatEntry **stack;
      65             :     uint32      stackpos;
      66             : 
      67             :     StatEntry  *root;
      68             : } TSVectorStat;
      69             : 
      70             : /* TS_execute requires ternary logic to handle NOT with phrase matches */
      71             : typedef enum
      72             : {
      73             :     TS_NO,                      /* definitely no match */
      74             :     TS_YES,                     /* definitely does match */
      75             :     TS_MAYBE                    /* can't verify match for lack of pos data */
      76             : } TSTernaryValue;
      77             : 
      78             : 
      79             : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      80             :                                          uint32 flags,
      81             :                                          TSExecuteCallback chkcond);
      82             : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      83             : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      84             : 
      85             : 
      86             : /*
      87             :  * Order: haspos, len, word, for all positions (pos, weight)
      88             :  */
      89             : static int
      90           2 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      91             : {
      92           2 :     if (VARSIZE(a) < VARSIZE(b))
      93           0 :         return -1;
      94           2 :     else if (VARSIZE(a) > VARSIZE(b))
      95           0 :         return 1;
      96           2 :     else if (a->size < b->size)
      97           0 :         return -1;
      98           2 :     else if (a->size > b->size)
      99           0 :         return 1;
     100             :     else
     101             :     {
     102           2 :         WordEntry  *aptr = ARRPTR(a);
     103           2 :         WordEntry  *bptr = ARRPTR(b);
     104           2 :         int         i = 0;
     105             :         int         res;
     106             : 
     107             : 
     108           8 :         for (i = 0; i < a->size; i++)
     109             :         {
     110           6 :             if (aptr->haspos != bptr->haspos)
     111             :             {
     112           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     113             :             }
     114           6 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     115             :             {
     116           0 :                 return res;
     117             :             }
     118           6 :             else if (aptr->haspos)
     119             :             {
     120           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     121           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     122             :                 int         j;
     123             : 
     124           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     125           0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     126             : 
     127           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     128             :                 {
     129           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     130             :                     {
     131           0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     132             :                     }
     133           0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     134             :                     {
     135           0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     136             :                     }
     137           0 :                     ap++, bp++;
     138             :                 }
     139             :             }
     140             : 
     141           6 :             aptr++;
     142           6 :             bptr++;
     143             :         }
     144             :     }
     145             : 
     146           2 :     return 0;
     147             : }
     148             : 
     149             : #define TSVECTORCMPFUNC( type, action, ret )            \
     150             : Datum                                                   \
     151             : tsvector_##type(PG_FUNCTION_ARGS)                       \
     152             : {                                                       \
     153             :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     154             :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     155             :     int         res = silly_cmp_tsvector(a, b);         \
     156             :     PG_FREE_IF_COPY(a,0);                               \
     157             :     PG_FREE_IF_COPY(b,1);                               \
     158             :     PG_RETURN_##ret( res action 0 );                    \
     159             : }   \
     160             : /* keep compiler quiet - no extra ; */                  \
     161             : extern int no_such_variable
     162             : 
     163           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     164           0 : TSVECTORCMPFUNC(le, <=, BOOL);
     165           2 : TSVECTORCMPFUNC(eq, ==, BOOL);
     166           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     167           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     168           0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     169           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     170             : 
     171             : Datum
     172          44 : tsvector_strip(PG_FUNCTION_ARGS)
     173             : {
     174          44 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     175             :     TSVector    out;
     176             :     int         i,
     177          44 :                 len = 0;
     178          44 :     WordEntry  *arrin = ARRPTR(in),
     179             :                *arrout;
     180             :     char       *cur;
     181             : 
     182         180 :     for (i = 0; i < in->size; i++)
     183         136 :         len += arrin[i].len;
     184             : 
     185          44 :     len = CALCDATASIZE(in->size, len);
     186          44 :     out = (TSVector) palloc0(len);
     187          44 :     SET_VARSIZE(out, len);
     188          44 :     out->size = in->size;
     189          44 :     arrout = ARRPTR(out);
     190          44 :     cur = STRPTR(out);
     191         180 :     for (i = 0; i < in->size; i++)
     192             :     {
     193         136 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     194         136 :         arrout[i].haspos = 0;
     195         136 :         arrout[i].len = arrin[i].len;
     196         136 :         arrout[i].pos = cur - STRPTR(out);
     197         136 :         cur += arrout[i].len;
     198             :     }
     199             : 
     200          44 :     PG_FREE_IF_COPY(in, 0);
     201          44 :     PG_RETURN_POINTER(out);
     202             : }
     203             : 
     204             : Datum
     205           4 : tsvector_length(PG_FUNCTION_ARGS)
     206             : {
     207           4 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     208           4 :     int32       ret = in->size;
     209             : 
     210           4 :     PG_FREE_IF_COPY(in, 0);
     211           4 :     PG_RETURN_INT32(ret);
     212             : }
     213             : 
     214             : Datum
     215           8 : tsvector_setweight(PG_FUNCTION_ARGS)
     216             : {
     217           8 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     218           8 :     char        cw = PG_GETARG_CHAR(1);
     219             :     TSVector    out;
     220             :     int         i,
     221             :                 j;
     222             :     WordEntry  *entry;
     223             :     WordEntryPos *p;
     224           8 :     int         w = 0;
     225             : 
     226           8 :     switch (cw)
     227             :     {
     228           0 :         case 'A':
     229             :         case 'a':
     230           0 :             w = 3;
     231           0 :             break;
     232           0 :         case 'B':
     233             :         case 'b':
     234           0 :             w = 2;
     235           0 :             break;
     236           8 :         case 'C':
     237             :         case 'c':
     238           8 :             w = 1;
     239           8 :             break;
     240           0 :         case 'D':
     241             :         case 'd':
     242           0 :             w = 0;
     243           0 :             break;
     244           0 :         default:
     245             :             /* internal error */
     246           0 :             elog(ERROR, "unrecognized weight: %d", cw);
     247             :     }
     248             : 
     249           8 :     out = (TSVector) palloc(VARSIZE(in));
     250           8 :     memcpy(out, in, VARSIZE(in));
     251           8 :     entry = ARRPTR(out);
     252           8 :     i = out->size;
     253          40 :     while (i--)
     254             :     {
     255          32 :         if ((j = POSDATALEN(out, entry)) != 0)
     256             :         {
     257          32 :             p = POSDATAPTR(out, entry);
     258         112 :             while (j--)
     259             :             {
     260          80 :                 WEP_SETWEIGHT(*p, w);
     261          80 :                 p++;
     262             :             }
     263             :         }
     264          32 :         entry++;
     265             :     }
     266             : 
     267           8 :     PG_FREE_IF_COPY(in, 0);
     268           8 :     PG_RETURN_POINTER(out);
     269             : }
     270             : 
     271             : /*
     272             :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     273             :  *
     274             :  * Assign weight w to elements of tsin that are listed in lexemes.
     275             :  */
     276             : Datum
     277          20 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     278             : {
     279          20 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     280          20 :     char        char_weight = PG_GETARG_CHAR(1);
     281          20 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     282             : 
     283             :     TSVector    tsout;
     284             :     int         i,
     285             :                 j,
     286             :                 nlexemes,
     287             :                 weight;
     288             :     WordEntry  *entry;
     289             :     Datum      *dlexemes;
     290             :     bool       *nulls;
     291             : 
     292          20 :     switch (char_weight)
     293             :     {
     294           0 :         case 'A':
     295             :         case 'a':
     296           0 :             weight = 3;
     297           0 :             break;
     298           0 :         case 'B':
     299             :         case 'b':
     300           0 :             weight = 2;
     301           0 :             break;
     302          20 :         case 'C':
     303             :         case 'c':
     304          20 :             weight = 1;
     305          20 :             break;
     306           0 :         case 'D':
     307             :         case 'd':
     308           0 :             weight = 0;
     309           0 :             break;
     310           0 :         default:
     311             :             /* internal error */
     312           0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     313             :     }
     314             : 
     315          20 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     316          20 :     memcpy(tsout, tsin, VARSIZE(tsin));
     317          20 :     entry = ARRPTR(tsout);
     318             : 
     319          20 :     deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
     320             :                       &dlexemes, &nulls, &nlexemes);
     321             : 
     322             :     /*
     323             :      * Assuming that lexemes array is significantly shorter than tsvector we
     324             :      * can iterate through lexemes performing binary search of each lexeme
     325             :      * from lexemes in tsvector.
     326             :      */
     327          52 :     for (i = 0; i < nlexemes; i++)
     328             :     {
     329             :         char       *lex;
     330             :         int         lex_len,
     331             :                     lex_pos;
     332             : 
     333          36 :         if (nulls[i])
     334           4 :             ereport(ERROR,
     335             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     336             :                      errmsg("lexeme array may not contain nulls")));
     337             : 
     338          32 :         lex = VARDATA(dlexemes[i]);
     339          32 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     340          32 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     341             : 
     342          32 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     343             :         {
     344          16 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     345             : 
     346          52 :             while (j--)
     347             :             {
     348          36 :                 WEP_SETWEIGHT(*p, weight);
     349          36 :                 p++;
     350             :             }
     351             :         }
     352             :     }
     353             : 
     354          16 :     PG_FREE_IF_COPY(tsin, 0);
     355          16 :     PG_FREE_IF_COPY(lexemes, 2);
     356             : 
     357          16 :     PG_RETURN_POINTER(tsout);
     358             : }
     359             : 
     360             : #define compareEntry(pa, a, pb, b) \
     361             :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     362             :                     (pb) + (b)->pos, (b)->len,    \
     363             :                     false)
     364             : 
     365             : /*
     366             :  * Add positions from src to dest after offsetting them by maxpos.
     367             :  * Return the number added (might be less than expected due to overflow)
     368             :  */
     369             : static int32
     370           8 : add_pos(TSVector src, WordEntry *srcptr,
     371             :         TSVector dest, WordEntry *destptr,
     372             :         int32 maxpos)
     373             : {
     374           8 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     375             :     int         i;
     376           8 :     uint16      slen = POSDATALEN(src, srcptr),
     377             :                 startlen;
     378           8 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     379           8 :                *dpos = POSDATAPTR(dest, destptr);
     380             : 
     381           8 :     if (!destptr->haspos)
     382           0 :         *clen = 0;
     383             : 
     384           8 :     startlen = *clen;
     385           8 :     for (i = 0;
     386          16 :          i < slen && *clen < MAXNUMPOS &&
     387           8 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     388           8 :          i++)
     389             :     {
     390           8 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     391           8 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     392           8 :         (*clen)++;
     393             :     }
     394             : 
     395           8 :     if (*clen != startlen)
     396           8 :         destptr->haspos = 1;
     397           8 :     return *clen - startlen;
     398             : }
     399             : 
     400             : /*
     401             :  * Perform binary search of given lexeme in TSVector.
     402             :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     403             :  * found.
     404             :  */
     405             : static int
     406         132 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     407             : {
     408         132 :     WordEntry  *arrin = ARRPTR(tsv);
     409         132 :     int         StopLow = 0,
     410         132 :                 StopHigh = tsv->size,
     411             :                 StopMiddle,
     412             :                 cmp;
     413             : 
     414         336 :     while (StopLow < StopHigh)
     415             :     {
     416         304 :         StopMiddle = (StopLow + StopHigh) / 2;
     417             : 
     418         608 :         cmp = tsCompareString(lexeme, lexeme_len,
     419         304 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     420         304 :                               arrin[StopMiddle].len,
     421             :                               false);
     422             : 
     423         304 :         if (cmp < 0)
     424         128 :             StopHigh = StopMiddle;
     425         176 :         else if (cmp > 0)
     426          76 :             StopLow = StopMiddle + 1;
     427             :         else                    /* found it */
     428         100 :             return StopMiddle;
     429             :     }
     430             : 
     431          32 :     return -1;
     432             : }
     433             : 
     434             : /*
     435             :  * qsort comparator functions
     436             :  */
     437             : 
     438             : static int
     439          44 : compare_int(const void *va, const void *vb)
     440             : {
     441          44 :     int         a = *((const int *) va);
     442          44 :     int         b = *((const int *) vb);
     443             : 
     444          44 :     if (a == b)
     445           8 :         return 0;
     446          36 :     return (a > b) ? 1 : -1;
     447             : }
     448             : 
     449             : static int
     450          68 : compare_text_lexemes(const void *va, const void *vb)
     451             : {
     452          68 :     Datum       a = *((const Datum *) va);
     453          68 :     Datum       b = *((const Datum *) vb);
     454          68 :     char       *alex = VARDATA_ANY(a);
     455          68 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     456          68 :     char       *blex = VARDATA_ANY(b);
     457          68 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     458             : 
     459          68 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     460             : }
     461             : 
     462             : /*
     463             :  * Internal routine to delete lexemes from TSVector by array of offsets.
     464             :  *
     465             :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     466             :  * int indices_count -- size of that array
     467             :  *
     468             :  * Returns new TSVector without given lexemes along with their positions
     469             :  * and weights.
     470             :  */
     471             : static TSVector
     472          40 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     473             :                            int indices_count)
     474             : {
     475             :     TSVector    tsout;
     476          40 :     WordEntry  *arrin = ARRPTR(tsv),
     477             :                *arrout;
     478          40 :     char       *data = STRPTR(tsv),
     479             :                *dataout;
     480             :     int         i,              /* index in arrin */
     481             :                 j,              /* index in arrout */
     482             :                 k,              /* index in indices_to_delete */
     483             :                 curoff;         /* index in dataout area */
     484             : 
     485             :     /*
     486             :      * Sort the filter array to simplify membership checks below.  Also, get
     487             :      * rid of any duplicate entries, so that we can assume that indices_count
     488             :      * is exactly equal to the number of lexemes that will be removed.
     489             :      */
     490          40 :     if (indices_count > 1)
     491             :     {
     492          16 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     493          16 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     494             :                                 compare_int);
     495             :     }
     496             : 
     497             :     /*
     498             :      * Here we overestimate tsout size, since we don't know how much space is
     499             :      * used by the deleted lexeme(s).  We will set exact size below.
     500             :      */
     501          40 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     502             : 
     503             :     /* This count must be correct because STRPTR(tsout) relies on it. */
     504          40 :     tsout->size = tsv->size - indices_count;
     505             : 
     506             :     /*
     507             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     508             :      */
     509          40 :     arrout = ARRPTR(tsout);
     510          40 :     dataout = STRPTR(tsout);
     511          40 :     curoff = 0;
     512         240 :     for (i = j = k = 0; i < tsv->size; i++)
     513             :     {
     514             :         /*
     515             :          * If current i is present in indices_to_delete, skip this lexeme.
     516             :          * Since indices_to_delete is already sorted, we only need to check
     517             :          * the current (k'th) entry.
     518             :          */
     519         200 :         if (k < indices_count && i == indices_to_delete[k])
     520             :         {
     521          56 :             k++;
     522          56 :             continue;
     523             :         }
     524             : 
     525             :         /* Copy lexeme and its positions and weights */
     526         144 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     527         144 :         arrout[j].haspos = arrin[i].haspos;
     528         144 :         arrout[j].len = arrin[i].len;
     529         144 :         arrout[j].pos = curoff;
     530         144 :         curoff += arrin[i].len;
     531         144 :         if (arrin[i].haspos)
     532             :         {
     533         104 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     534             :             + sizeof(uint16);
     535             : 
     536         104 :             curoff = SHORTALIGN(curoff);
     537         208 :             memcpy(dataout + curoff,
     538         104 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     539             :                    len);
     540         104 :             curoff += len;
     541             :         }
     542             : 
     543         144 :         j++;
     544             :     }
     545             : 
     546             :     /*
     547             :      * k should now be exactly equal to indices_count. If it isn't then the
     548             :      * caller provided us with indices outside of [0, tsv->size) range and
     549             :      * estimation of tsout's size is wrong.
     550             :      */
     551             :     Assert(k == indices_count);
     552             : 
     553          40 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     554          40 :     return tsout;
     555             : }
     556             : 
     557             : /*
     558             :  * Delete given lexeme from tsvector.
     559             :  * Implementation of user-level ts_delete(tsvector, text).
     560             :  */
     561             : Datum
     562          24 : tsvector_delete_str(PG_FUNCTION_ARGS)
     563             : {
     564          24 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     565             :                 tsout;
     566          24 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     567          24 :     char       *lexeme = VARDATA_ANY(tlexeme);
     568          24 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     569             :                 skip_index;
     570             : 
     571          24 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     572           8 :         PG_RETURN_POINTER(tsin);
     573             : 
     574          16 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     575             : 
     576          16 :     PG_FREE_IF_COPY(tsin, 0);
     577          16 :     PG_FREE_IF_COPY(tlexeme, 1);
     578          16 :     PG_RETURN_POINTER(tsout);
     579             : }
     580             : 
     581             : /*
     582             :  * Delete given array of lexemes from tsvector.
     583             :  * Implementation of user-level ts_delete(tsvector, text[]).
     584             :  */
     585             : Datum
     586          28 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     587             : {
     588          28 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     589             :                 tsout;
     590          28 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     591             :     int         i,
     592             :                 nlex,
     593             :                 skip_count,
     594             :                *skip_indices;
     595             :     Datum      *dlexemes;
     596             :     bool       *nulls;
     597             : 
     598          28 :     deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
     599             :                       &dlexemes, &nulls, &nlex);
     600             : 
     601             :     /*
     602             :      * In typical use case array of lexemes to delete is relatively small. So
     603             :      * here we optimize things for that scenario: iterate through lexarr
     604             :      * performing binary search of each lexeme from lexarr in tsvector.
     605             :      */
     606          28 :     skip_indices = palloc0(nlex * sizeof(int));
     607         104 :     for (i = skip_count = 0; i < nlex; i++)
     608             :     {
     609             :         char       *lex;
     610             :         int         lex_len,
     611             :                     lex_pos;
     612             : 
     613          80 :         if (nulls[i])
     614           4 :             ereport(ERROR,
     615             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     616             :                      errmsg("lexeme array may not contain nulls")));
     617             : 
     618          76 :         lex = VARDATA(dlexemes[i]);
     619          76 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     620          76 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     621             : 
     622          76 :         if (lex_pos >= 0)
     623          52 :             skip_indices[skip_count++] = lex_pos;
     624             :     }
     625             : 
     626          24 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     627             : 
     628          24 :     pfree(skip_indices);
     629          24 :     PG_FREE_IF_COPY(tsin, 0);
     630          24 :     PG_FREE_IF_COPY(lexemes, 1);
     631             : 
     632          24 :     PG_RETURN_POINTER(tsout);
     633             : }
     634             : 
     635             : /*
     636             :  * Expand tsvector as table with following columns:
     637             :  *     lexeme: lexeme text
     638             :  *     positions: integer array of lexeme positions
     639             :  *     weights: char array of weights corresponding to positions
     640             :  */
     641             : Datum
     642         120 : tsvector_unnest(PG_FUNCTION_ARGS)
     643             : {
     644             :     FuncCallContext *funcctx;
     645             :     TSVector    tsin;
     646             : 
     647         120 :     if (SRF_IS_FIRSTCALL())
     648             :     {
     649             :         MemoryContext oldcontext;
     650             :         TupleDesc   tupdesc;
     651             : 
     652          20 :         funcctx = SRF_FIRSTCALL_INIT();
     653          20 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     654             : 
     655          20 :         tupdesc = CreateTemplateTupleDesc(3);
     656          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     657             :                            TEXTOID, -1, 0);
     658          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     659             :                            INT2ARRAYOID, -1, 0);
     660          20 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     661             :                            TEXTARRAYOID, -1, 0);
     662          20 :         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     663             : 
     664          20 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     665             : 
     666          20 :         MemoryContextSwitchTo(oldcontext);
     667             :     }
     668             : 
     669         120 :     funcctx = SRF_PERCALL_SETUP();
     670         120 :     tsin = (TSVector) funcctx->user_fctx;
     671             : 
     672         120 :     if (funcctx->call_cntr < tsin->size)
     673             :     {
     674         100 :         WordEntry  *arrin = ARRPTR(tsin);
     675         100 :         char       *data = STRPTR(tsin);
     676             :         HeapTuple   tuple;
     677             :         int         j,
     678         100 :                     i = funcctx->call_cntr;
     679         100 :         bool        nulls[] = {false, false, false};
     680             :         Datum       values[3];
     681             : 
     682         100 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     683             : 
     684         100 :         if (arrin[i].haspos)
     685             :         {
     686             :             WordEntryPosVector *posv;
     687             :             Datum      *positions;
     688             :             Datum      *weights;
     689             :             char        weight;
     690             : 
     691             :             /*
     692             :              * Internally tsvector stores position and weight in the same
     693             :              * uint16 (2 bits for weight, 14 for position). Here we extract
     694             :              * that in two separate arrays.
     695             :              */
     696          60 :             posv = _POSVECPTR(tsin, arrin + i);
     697          60 :             positions = palloc(posv->npos * sizeof(Datum));
     698          60 :             weights = palloc(posv->npos * sizeof(Datum));
     699         168 :             for (j = 0; j < posv->npos; j++)
     700             :             {
     701         108 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     702         108 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     703         108 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     704             :                                                                       1));
     705             :             }
     706             : 
     707          60 :             values[1] = PointerGetDatum(construct_array(positions, posv->npos,
     708             :                                                         INT2OID, 2, true, TYPALIGN_SHORT));
     709          60 :             values[2] = PointerGetDatum(construct_array(weights, posv->npos,
     710             :                                                         TEXTOID, -1, false, TYPALIGN_INT));
     711             :         }
     712             :         else
     713             :         {
     714          40 :             nulls[1] = nulls[2] = true;
     715             :         }
     716             : 
     717         100 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     718         100 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     719             :     }
     720             :     else
     721             :     {
     722          20 :         SRF_RETURN_DONE(funcctx);
     723             :     }
     724             : }
     725             : 
     726             : /*
     727             :  * Convert tsvector to array of lexemes.
     728             :  */
     729             : Datum
     730           8 : tsvector_to_array(PG_FUNCTION_ARGS)
     731             : {
     732           8 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     733           8 :     WordEntry  *arrin = ARRPTR(tsin);
     734             :     Datum      *elements;
     735             :     int         i;
     736             :     ArrayType  *array;
     737             : 
     738           8 :     elements = palloc(tsin->size * sizeof(Datum));
     739             : 
     740          48 :     for (i = 0; i < tsin->size; i++)
     741             :     {
     742          40 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     743             :                                                                arrin[i].len));
     744             :     }
     745             : 
     746           8 :     array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
     747             : 
     748           8 :     pfree(elements);
     749           8 :     PG_FREE_IF_COPY(tsin, 0);
     750           8 :     PG_RETURN_POINTER(array);
     751             : }
     752             : 
     753             : /*
     754             :  * Build tsvector from array of lexemes.
     755             :  */
     756             : Datum
     757          12 : array_to_tsvector(PG_FUNCTION_ARGS)
     758             : {
     759          12 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     760             :     TSVector    tsout;
     761             :     Datum      *dlexemes;
     762             :     WordEntry  *arrout;
     763             :     bool       *nulls;
     764             :     int         nitems,
     765             :                 i,
     766             :                 tslen,
     767          12 :                 datalen = 0;
     768             :     char       *cur;
     769             : 
     770          12 :     deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
     771             : 
     772             :     /* Reject nulls (maybe we should just ignore them, instead?) */
     773          64 :     for (i = 0; i < nitems; i++)
     774             :     {
     775          56 :         if (nulls[i])
     776           4 :             ereport(ERROR,
     777             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     778             :                      errmsg("lexeme array may not contain nulls")));
     779             :     }
     780             : 
     781             :     /* Sort and de-dup, because this is required for a valid tsvector. */
     782           8 :     if (nitems > 1)
     783             :     {
     784           8 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     785           8 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     786             :                          compare_text_lexemes);
     787             :     }
     788             : 
     789             :     /* Calculate space needed for surviving lexemes. */
     790          40 :     for (i = 0; i < nitems; i++)
     791          32 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     792           8 :     tslen = CALCDATASIZE(nitems, datalen);
     793             : 
     794             :     /* Allocate and fill tsvector. */
     795           8 :     tsout = (TSVector) palloc0(tslen);
     796           8 :     SET_VARSIZE(tsout, tslen);
     797           8 :     tsout->size = nitems;
     798             : 
     799           8 :     arrout = ARRPTR(tsout);
     800           8 :     cur = STRPTR(tsout);
     801          40 :     for (i = 0; i < nitems; i++)
     802             :     {
     803          32 :         char       *lex = VARDATA(dlexemes[i]);
     804          32 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     805             : 
     806          32 :         memcpy(cur, lex, lex_len);
     807          32 :         arrout[i].haspos = 0;
     808          32 :         arrout[i].len = lex_len;
     809          32 :         arrout[i].pos = cur - STRPTR(tsout);
     810          32 :         cur += lex_len;
     811             :     }
     812             : 
     813           8 :     PG_FREE_IF_COPY(v, 0);
     814           8 :     PG_RETURN_POINTER(tsout);
     815             : }
     816             : 
     817             : /*
     818             :  * ts_filter(): keep only lexemes with given weights in tsvector.
     819             :  */
     820             : Datum
     821          12 : tsvector_filter(PG_FUNCTION_ARGS)
     822             : {
     823          12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     824             :                 tsout;
     825          12 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     826          12 :     WordEntry  *arrin = ARRPTR(tsin),
     827             :                *arrout;
     828          12 :     char       *datain = STRPTR(tsin),
     829             :                *dataout;
     830             :     Datum      *dweights;
     831             :     bool       *nulls;
     832             :     int         nweights;
     833             :     int         i,
     834             :                 j;
     835          12 :     int         cur_pos = 0;
     836          12 :     char        mask = 0;
     837             : 
     838          12 :     deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
     839             :                       &dweights, &nulls, &nweights);
     840             : 
     841          28 :     for (i = 0; i < nweights; i++)
     842             :     {
     843             :         char        char_weight;
     844             : 
     845          20 :         if (nulls[i])
     846           4 :             ereport(ERROR,
     847             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     848             :                      errmsg("weight array may not contain nulls")));
     849             : 
     850          16 :         char_weight = DatumGetChar(dweights[i]);
     851          16 :         switch (char_weight)
     852             :         {
     853          12 :             case 'A':
     854             :             case 'a':
     855          12 :                 mask = mask | 8;
     856          12 :                 break;
     857           4 :             case 'B':
     858             :             case 'b':
     859           4 :                 mask = mask | 4;
     860           4 :                 break;
     861           0 :             case 'C':
     862             :             case 'c':
     863           0 :                 mask = mask | 2;
     864           0 :                 break;
     865           0 :             case 'D':
     866             :             case 'd':
     867           0 :                 mask = mask | 1;
     868           0 :                 break;
     869           0 :             default:
     870           0 :                 ereport(ERROR,
     871             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     872             :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     873             :         }
     874             :     }
     875             : 
     876           8 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     877           8 :     tsout->size = tsin->size;
     878           8 :     arrout = ARRPTR(tsout);
     879           8 :     dataout = STRPTR(tsout);
     880             : 
     881          72 :     for (i = j = 0; i < tsin->size; i++)
     882             :     {
     883             :         WordEntryPosVector *posvin,
     884             :                    *posvout;
     885          64 :         int         npos = 0;
     886             :         int         k;
     887             : 
     888          64 :         if (!arrin[i].haspos)
     889          20 :             continue;
     890             : 
     891          44 :         posvin = _POSVECPTR(tsin, arrin + i);
     892          44 :         posvout = (WordEntryPosVector *)
     893          44 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     894             : 
     895          88 :         for (k = 0; k < posvin->npos; k++)
     896             :         {
     897          44 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     898          20 :                 posvout->pos[npos++] = posvin->pos[k];
     899             :         }
     900             : 
     901             :         /* if no satisfactory positions found, skip lexeme */
     902          44 :         if (!npos)
     903          24 :             continue;
     904             : 
     905          20 :         arrout[j].haspos = true;
     906          20 :         arrout[j].len = arrin[i].len;
     907          20 :         arrout[j].pos = cur_pos;
     908             : 
     909          20 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     910          20 :         posvout->npos = npos;
     911          20 :         cur_pos += SHORTALIGN(arrin[i].len);
     912          20 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     913             :             sizeof(uint16);
     914          20 :         j++;
     915             :     }
     916             : 
     917           8 :     tsout->size = j;
     918           8 :     if (dataout != STRPTR(tsout))
     919           8 :         memmove(STRPTR(tsout), dataout, cur_pos);
     920             : 
     921           8 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     922             : 
     923           8 :     PG_FREE_IF_COPY(tsin, 0);
     924           8 :     PG_RETURN_POINTER(tsout);
     925             : }
     926             : 
     927             : Datum
     928           8 : tsvector_concat(PG_FUNCTION_ARGS)
     929             : {
     930           8 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     931           8 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     932             :     TSVector    out;
     933             :     WordEntry  *ptr;
     934             :     WordEntry  *ptr1,
     935             :                *ptr2;
     936             :     WordEntryPos *p;
     937           8 :     int         maxpos = 0,
     938             :                 i,
     939             :                 j,
     940             :                 i1,
     941             :                 i2,
     942             :                 dataoff,
     943             :                 output_bytes,
     944             :                 output_size;
     945             :     char       *data,
     946             :                *data1,
     947             :                *data2;
     948             : 
     949             :     /* Get max position in in1; we'll need this to offset in2's positions */
     950           8 :     ptr = ARRPTR(in1);
     951           8 :     i = in1->size;
     952          20 :     while (i--)
     953             :     {
     954          12 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     955             :         {
     956          12 :             p = POSDATAPTR(in1, ptr);
     957          24 :             while (j--)
     958             :             {
     959          12 :                 if (WEP_GETPOS(*p) > maxpos)
     960           8 :                     maxpos = WEP_GETPOS(*p);
     961          12 :                 p++;
     962             :             }
     963             :         }
     964          12 :         ptr++;
     965             :     }
     966             : 
     967           8 :     ptr1 = ARRPTR(in1);
     968           8 :     ptr2 = ARRPTR(in2);
     969           8 :     data1 = STRPTR(in1);
     970           8 :     data2 = STRPTR(in2);
     971           8 :     i1 = in1->size;
     972           8 :     i2 = in2->size;
     973             : 
     974             :     /*
     975             :      * Conservative estimate of space needed.  We might need all the data in
     976             :      * both inputs, and conceivably add a pad byte before position data for
     977             :      * each item where there was none before.
     978             :      */
     979           8 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     980             : 
     981           8 :     out = (TSVector) palloc0(output_bytes);
     982           8 :     SET_VARSIZE(out, output_bytes);
     983             : 
     984             :     /*
     985             :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     986             :      * collapse out any unused space at the end.
     987             :      */
     988           8 :     out->size = in1->size + in2->size;
     989             : 
     990           8 :     ptr = ARRPTR(out);
     991           8 :     data = STRPTR(out);
     992           8 :     dataoff = 0;
     993          20 :     while (i1 && i2)
     994             :     {
     995          12 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     996             : 
     997          12 :         if (cmp < 0)
     998             :         {                       /* in1 first */
     999           4 :             ptr->haspos = ptr1->haspos;
    1000           4 :             ptr->len = ptr1->len;
    1001           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1002           4 :             ptr->pos = dataoff;
    1003           4 :             dataoff += ptr1->len;
    1004           4 :             if (ptr->haspos)
    1005             :             {
    1006           4 :                 dataoff = SHORTALIGN(dataoff);
    1007           4 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1008           4 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1009             :             }
    1010             : 
    1011           4 :             ptr++;
    1012           4 :             ptr1++;
    1013           4 :             i1--;
    1014             :         }
    1015           8 :         else if (cmp > 0)
    1016             :         {                       /* in2 first */
    1017           4 :             ptr->haspos = ptr2->haspos;
    1018           4 :             ptr->len = ptr2->len;
    1019           4 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1020           4 :             ptr->pos = dataoff;
    1021           4 :             dataoff += ptr2->len;
    1022           4 :             if (ptr->haspos)
    1023             :             {
    1024           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1025             : 
    1026           0 :                 if (addlen == 0)
    1027           0 :                     ptr->haspos = 0;
    1028             :                 else
    1029             :                 {
    1030           0 :                     dataoff = SHORTALIGN(dataoff);
    1031           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1032             :                 }
    1033             :             }
    1034             : 
    1035           4 :             ptr++;
    1036           4 :             ptr2++;
    1037           4 :             i2--;
    1038             :         }
    1039             :         else
    1040             :         {
    1041           4 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1042           4 :             ptr->len = ptr1->len;
    1043           4 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1044           4 :             ptr->pos = dataoff;
    1045           4 :             dataoff += ptr1->len;
    1046           4 :             if (ptr->haspos)
    1047             :             {
    1048           4 :                 if (ptr1->haspos)
    1049             :                 {
    1050           4 :                     dataoff = SHORTALIGN(dataoff);
    1051           4 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1052           4 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1053           4 :                     if (ptr2->haspos)
    1054           4 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1055             :                 }
    1056             :                 else            /* must have ptr2->haspos */
    1057             :                 {
    1058           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1059             : 
    1060           0 :                     if (addlen == 0)
    1061           0 :                         ptr->haspos = 0;
    1062             :                     else
    1063             :                     {
    1064           0 :                         dataoff = SHORTALIGN(dataoff);
    1065           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1066             :                     }
    1067             :                 }
    1068             :             }
    1069             : 
    1070           4 :             ptr++;
    1071           4 :             ptr1++;
    1072           4 :             ptr2++;
    1073           4 :             i1--;
    1074           4 :             i2--;
    1075             :         }
    1076             :     }
    1077             : 
    1078          12 :     while (i1)
    1079             :     {
    1080           4 :         ptr->haspos = ptr1->haspos;
    1081           4 :         ptr->len = ptr1->len;
    1082           4 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1083           4 :         ptr->pos = dataoff;
    1084           4 :         dataoff += ptr1->len;
    1085           4 :         if (ptr->haspos)
    1086             :         {
    1087           4 :             dataoff = SHORTALIGN(dataoff);
    1088           4 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1089           4 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1090             :         }
    1091             : 
    1092           4 :         ptr++;
    1093           4 :         ptr1++;
    1094           4 :         i1--;
    1095             :     }
    1096             : 
    1097          12 :     while (i2)
    1098             :     {
    1099           4 :         ptr->haspos = ptr2->haspos;
    1100           4 :         ptr->len = ptr2->len;
    1101           4 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1102           4 :         ptr->pos = dataoff;
    1103           4 :         dataoff += ptr2->len;
    1104           4 :         if (ptr->haspos)
    1105             :         {
    1106           4 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1107             : 
    1108           4 :             if (addlen == 0)
    1109           0 :                 ptr->haspos = 0;
    1110             :             else
    1111             :             {
    1112           4 :                 dataoff = SHORTALIGN(dataoff);
    1113           4 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1114             :             }
    1115             :         }
    1116             : 
    1117           4 :         ptr++;
    1118           4 :         ptr2++;
    1119           4 :         i2--;
    1120             :     }
    1121             : 
    1122             :     /*
    1123             :      * Instead of checking each offset individually, we check for overflow of
    1124             :      * pos fields once at the end.
    1125             :      */
    1126           8 :     if (dataoff > MAXSTRPOS)
    1127           0 :         ereport(ERROR,
    1128             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1129             :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1130             : 
    1131             :     /*
    1132             :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1133             :      * and collapse out any unused array entries.
    1134             :      */
    1135           8 :     output_size = ptr - ARRPTR(out);
    1136             :     Assert(output_size <= out->size);
    1137           8 :     out->size = output_size;
    1138           8 :     if (data != STRPTR(out))
    1139           4 :         memmove(STRPTR(out), data, dataoff);
    1140           8 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1141             :     Assert(output_bytes <= VARSIZE(out));
    1142           8 :     SET_VARSIZE(out, output_bytes);
    1143             : 
    1144           8 :     PG_FREE_IF_COPY(in1, 0);
    1145           8 :     PG_FREE_IF_COPY(in2, 1);
    1146           8 :     PG_RETURN_POINTER(out);
    1147             : }
    1148             : 
    1149             : /*
    1150             :  * Compare two strings by tsvector rules.
    1151             :  *
    1152             :  * if prefix = true then it returns zero value iff b has prefix a
    1153             :  */
    1154             : int32
    1155     3973950 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1156             : {
    1157             :     int         cmp;
    1158             : 
    1159     3973950 :     if (lena == 0)
    1160             :     {
    1161           0 :         if (prefix)
    1162           0 :             cmp = 0;            /* empty string is prefix of anything */
    1163             :         else
    1164           0 :             cmp = (lenb > 0) ? -1 : 0;
    1165             :     }
    1166     3973950 :     else if (lenb == 0)
    1167             :     {
    1168           0 :         cmp = (lena > 0) ? 1 : 0;
    1169             :     }
    1170             :     else
    1171             :     {
    1172     3973950 :         cmp = memcmp(a, b, Min(lena, lenb));
    1173             : 
    1174     3973950 :         if (prefix)
    1175             :         {
    1176       10940 :             if (cmp == 0 && lena > lenb)
    1177           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1178             :         }
    1179     3963010 :         else if (cmp == 0 && lena != lenb)
    1180             :         {
    1181       21468 :             cmp = (lena < lenb) ? -1 : 1;
    1182             :         }
    1183             :     }
    1184             : 
    1185     3973950 :     return cmp;
    1186             : }
    1187             : 
    1188             : /*
    1189             :  * Check weight info or/and fill 'data' with the required positions
    1190             :  */
    1191             : static bool
    1192       39812 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1193             :                ExecPhraseData *data)
    1194             : {
    1195       39812 :     bool        result = false;
    1196             : 
    1197       39812 :     if (entry->haspos && (val->weight || data))
    1198        1644 :     {
    1199             :         WordEntryPosVector *posvec;
    1200             : 
    1201             :         /*
    1202             :          * We can't use the _POSVECPTR macro here because the pointer to the
    1203             :          * tsvector's lexeme storage is already contained in chkval->values.
    1204             :          */
    1205        1644 :         posvec = (WordEntryPosVector *)
    1206        1644 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1207             : 
    1208        1644 :         if (val->weight && data)
    1209          32 :         {
    1210          32 :             WordEntryPos *posvec_iter = posvec->pos;
    1211             :             WordEntryPos *dptr;
    1212             : 
    1213             :             /*
    1214             :              * Filter position information by weights
    1215             :              */
    1216          32 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1217          32 :             data->allocated = true;
    1218             : 
    1219             :             /* Is there a position with a matching weight? */
    1220          64 :             while (posvec_iter < posvec->pos + posvec->npos)
    1221             :             {
    1222             :                 /* If true, append this position to the data->pos */
    1223          32 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1224             :                 {
    1225          16 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1226          16 :                     dptr++;
    1227             :                 }
    1228             : 
    1229          32 :                 posvec_iter++;
    1230             :             }
    1231             : 
    1232          32 :             data->npos = dptr - data->pos;
    1233             : 
    1234          32 :             if (data->npos > 0)
    1235          16 :                 result = true;
    1236             :         }
    1237        1612 :         else if (val->weight)
    1238             :         {
    1239          96 :             WordEntryPos *posvec_iter = posvec->pos;
    1240             : 
    1241             :             /* Is there a position with a matching weight? */
    1242         148 :             while (posvec_iter < posvec->pos + posvec->npos)
    1243             :             {
    1244         128 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1245             :                 {
    1246          76 :                     result = true;
    1247          76 :                     break;      /* no need to go further */
    1248             :                 }
    1249             : 
    1250          52 :                 posvec_iter++;
    1251             :             }
    1252             :         }
    1253             :         else                    /* data != NULL */
    1254             :         {
    1255        1516 :             data->npos = posvec->npos;
    1256        1516 :             data->pos = posvec->pos;
    1257        1516 :             data->allocated = false;
    1258        1516 :             result = true;
    1259             :         }
    1260             :     }
    1261             :     else
    1262             :     {
    1263       38168 :         result = true;
    1264             :     }
    1265             : 
    1266       39812 :     return result;
    1267             : }
    1268             : 
    1269             : /*
    1270             :  * is there value 'val' in array or not ?
    1271             :  */
    1272             : static bool
    1273      158988 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1274             : {
    1275      158988 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1276      158988 :     WordEntry  *StopLow = chkval->arrb;
    1277      158988 :     WordEntry  *StopHigh = chkval->arre;
    1278      158988 :     WordEntry  *StopMiddle = StopHigh;
    1279      158988 :     bool        res = false;
    1280             : 
    1281             :     /* Loop invariant: StopLow <= val < StopHigh */
    1282      998740 :     while (StopLow < StopHigh)
    1283             :     {
    1284             :         int         difference;
    1285             : 
    1286      869572 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1287     2608716 :         difference = tsCompareString(chkval->operand + val->distance,
    1288      869572 :                                      val->length,
    1289      869572 :                                      chkval->values + StopMiddle->pos,
    1290      869572 :                                      StopMiddle->len,
    1291             :                                      false);
    1292             : 
    1293      869572 :         if (difference == 0)
    1294             :         {
    1295             :             /* Check weight info & fill 'data' with positions */
    1296       29820 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1297       29820 :             break;
    1298             :         }
    1299      839752 :         else if (difference > 0)
    1300      464164 :             StopLow = StopMiddle + 1;
    1301             :         else
    1302      375588 :             StopHigh = StopMiddle;
    1303             :     }
    1304             : 
    1305      158988 :     if ((!res || data) && val->prefix)
    1306             :     {
    1307       10984 :         WordEntryPos *allpos = NULL;
    1308       10984 :         int         npos = 0,
    1309       10984 :                     totalpos = 0;
    1310             : 
    1311             :         /*
    1312             :          * there was a failed exact search, so we should scan further to find
    1313             :          * a prefix match. We also need to do so if caller needs position info
    1314             :          */
    1315       10984 :         if (StopLow >= StopHigh)
    1316       10976 :             StopMiddle = StopHigh;
    1317             : 
    1318       31564 :         while ((!res || data) && StopMiddle < chkval->arre &&
    1319       31764 :                tsCompareString(chkval->operand + val->distance,
    1320       10588 :                                val->length,
    1321       10588 :                                chkval->values + StopMiddle->pos,
    1322       10588 :                                StopMiddle->len,
    1323             :                                true) == 0)
    1324             :         {
    1325        9992 :             if (data)
    1326             :             {
    1327             :                 /*
    1328             :                  * We need to join position information
    1329             :                  */
    1330          44 :                 res = checkclass_str(chkval, StopMiddle, val, data);
    1331             : 
    1332          44 :                 if (res)
    1333             :                 {
    1334          52 :                     while (npos + data->npos >= totalpos)
    1335             :                     {
    1336          24 :                         if (totalpos == 0)
    1337             :                         {
    1338          24 :                             totalpos = 256;
    1339          24 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1340             :                         }
    1341             :                         else
    1342             :                         {
    1343           0 :                             totalpos *= 2;
    1344           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1345             :                         }
    1346             :                     }
    1347             : 
    1348          28 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1349          28 :                     npos += data->npos;
    1350             :                 }
    1351             :                 else
    1352             :                 {
    1353             :                     /* at loop exit, res must be true if we found matches */
    1354          16 :                     res = (npos > 0);
    1355             :                 }
    1356             :             }
    1357             :             else
    1358             :             {
    1359        9948 :                 res = checkclass_str(chkval, StopMiddle, val, NULL);
    1360             :             }
    1361             : 
    1362        9992 :             StopMiddle++;
    1363             :         }
    1364             : 
    1365       10984 :         if (res && data)
    1366             :         {
    1367             :             /* Sort and make unique array of found positions */
    1368          24 :             data->pos = allpos;
    1369          24 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1370          24 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1371             :                                  compareWordEntryPos);
    1372          24 :             data->allocated = true;
    1373             :         }
    1374             :     }
    1375             : 
    1376      158988 :     return res;
    1377             : }
    1378             : 
    1379             : /*
    1380             :  * Compute output position list for a tsquery operator in phrase mode.
    1381             :  *
    1382             :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1383             :  * returning the result list into *data.  The input position lists must be
    1384             :  * sorted and unique, and the output will be as well.
    1385             :  *
    1386             :  * data: pointer to initially-all-zeroes output struct, or NULL
    1387             :  * Ldata, Rdata: input position lists
    1388             :  * emit: bitmask of TSPO_XXX flags
    1389             :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1390             :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1391             :  * max_npos: maximum possible required size of output position array
    1392             :  *
    1393             :  * Loffset and Roffset should not be negative, else we risk trying to output
    1394             :  * negative positions, which won't fit into WordEntryPos.
    1395             :  *
    1396             :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1397             :  * we return it as TSTernaryValue.
    1398             :  *
    1399             :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1400             :  * returns TS_YES if any positions would have been emitted.
    1401             :  */
    1402             : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1403             : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1404             : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1405             : 
    1406             : static TSTernaryValue
    1407       18474 : TS_phrase_output(ExecPhraseData *data,
    1408             :                  ExecPhraseData *Ldata,
    1409             :                  ExecPhraseData *Rdata,
    1410             :                  int emit,
    1411             :                  int Loffset,
    1412             :                  int Roffset,
    1413             :                  int max_npos)
    1414             : {
    1415             :     int         Lindex,
    1416             :                 Rindex;
    1417             : 
    1418             :     /* Loop until both inputs are exhausted */
    1419       18474 :     Lindex = Rindex = 0;
    1420       19126 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1421             :     {
    1422             :         int         Lpos,
    1423             :                     Rpos;
    1424        1536 :         int         output_pos = 0;
    1425             : 
    1426             :         /*
    1427             :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1428             :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1429             :          */
    1430        1536 :         if (Lindex < Ldata->npos)
    1431        1108 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1432             :         else
    1433             :         {
    1434             :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1435         428 :             if (!(emit & TSPO_R_ONLY))
    1436         104 :                 break;
    1437         324 :             Lpos = INT_MAX;
    1438             :         }
    1439        1432 :         if (Rindex < Rdata->npos)
    1440        1260 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1441             :         else
    1442             :         {
    1443             :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1444         172 :             if (!(emit & TSPO_L_ONLY))
    1445         116 :                 break;
    1446          56 :             Rpos = INT_MAX;
    1447             :         }
    1448             : 
    1449             :         /* Merge-join the two input lists */
    1450        1316 :         if (Lpos < Rpos)
    1451             :         {
    1452             :             /* Lpos is not matched in Rdata, should we output it? */
    1453         320 :             if (emit & TSPO_L_ONLY)
    1454          76 :                 output_pos = Lpos;
    1455         320 :             Lindex++;
    1456             :         }
    1457         996 :         else if (Lpos == Rpos)
    1458             :         {
    1459             :             /* Lpos and Rpos match ... should we output it? */
    1460         496 :             if (emit & TSPO_BOTH)
    1461         432 :                 output_pos = Rpos;
    1462         496 :             Lindex++;
    1463         496 :             Rindex++;
    1464             :         }
    1465             :         else                    /* Lpos > Rpos */
    1466             :         {
    1467             :             /* Rpos is not matched in Ldata, should we output it? */
    1468         500 :             if (emit & TSPO_R_ONLY)
    1469         352 :                 output_pos = Rpos;
    1470         500 :             Rindex++;
    1471             :         }
    1472             : 
    1473        1316 :         if (output_pos > 0)
    1474             :         {
    1475         860 :             if (data)
    1476             :             {
    1477             :                 /* Store position, first allocating output array if needed */
    1478         196 :                 if (data->pos == NULL)
    1479             :                 {
    1480         164 :                     data->pos = (WordEntryPos *)
    1481         164 :                         palloc(max_npos * sizeof(WordEntryPos));
    1482         164 :                     data->allocated = true;
    1483             :                 }
    1484         196 :                 data->pos[data->npos++] = output_pos;
    1485             :             }
    1486             :             else
    1487             :             {
    1488             :                 /*
    1489             :                  * Exact positions not needed, so return TS_YES as soon as we
    1490             :                  * know there is at least one.
    1491             :                  */
    1492         664 :                 return TS_YES;
    1493             :             }
    1494             :         }
    1495             :     }
    1496             : 
    1497       17810 :     if (data && data->npos > 0)
    1498             :     {
    1499             :         /* Let's assert we didn't overrun the array */
    1500             :         Assert(data->npos <= max_npos);
    1501         164 :         return TS_YES;
    1502             :     }
    1503       17646 :     return TS_NO;
    1504             : }
    1505             : 
    1506             : /*
    1507             :  * Execute tsquery at or below an OP_PHRASE operator.
    1508             :  *
    1509             :  * This handles tsquery execution at recursion levels where we need to care
    1510             :  * about match locations.
    1511             :  *
    1512             :  * In addition to the same arguments used for TS_execute, the caller may pass
    1513             :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1514             :  * match position info on success.  data == NULL if no position data need be
    1515             :  * returned.  (In practice, outside callers pass NULL, and only the internal
    1516             :  * recursion cases pass a data pointer.)
    1517             :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1518             :  * This is OK because an outside call always starts from an OP_PHRASE node.
    1519             :  *
    1520             :  * The detailed semantics of the match data, given that the function returned
    1521             :  * TS_YES (successful match), are:
    1522             :  *
    1523             :  * npos > 0, negate = false:
    1524             :  *   query is matched at specified position(s) (and only those positions)
    1525             :  * npos > 0, negate = true:
    1526             :  *   query is matched at all positions *except* specified position(s)
    1527             :  * npos = 0, negate = true:
    1528             :  *   query is matched at all positions
    1529             :  * npos = 0, negate = false:
    1530             :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1531             :  *
    1532             :  * Successful matches also return a "width" value which is the match width in
    1533             :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1534             :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1535             :  * that when width > 0, the listed positions represent the ends of matches not
    1536             :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1537             :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1538             :  *
    1539             :  * If the TSExecuteCallback function reports that an operand is present
    1540             :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1541             :  * it is possible but not certain that the query is matched.
    1542             :  *
    1543             :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1544             :  * negate = false (which is the state initialized by the caller); but the
    1545             :  * "width" output in such cases is undefined.
    1546             :  */
    1547             : static TSTernaryValue
    1548      434138 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1549             :                   TSExecuteCallback chkcond,
    1550             :                   ExecPhraseData *data)
    1551             : {
    1552             :     ExecPhraseData Ldata,
    1553             :                 Rdata;
    1554             :     TSTernaryValue lmatch,
    1555             :                 rmatch;
    1556             :     int         Loffset,
    1557             :                 Roffset,
    1558             :                 maxwidth;
    1559             : 
    1560             :     /* since this function recurses, it could be driven to stack overflow */
    1561      434138 :     check_stack_depth();
    1562             : 
    1563      434138 :     if (curitem->type == QI_VAL)
    1564             :     {
    1565      212714 :         if (!chkcond(arg, (QueryOperand *) curitem, data))
    1566      178792 :             return TS_NO;
    1567       33922 :         if (data->npos > 0 || data->negate)
    1568        1948 :             return TS_YES;
    1569             :         /* If we have no position data, we must return TS_MAYBE */
    1570       31974 :         return TS_MAYBE;
    1571             :     }
    1572             : 
    1573      221424 :     switch (curitem->qoperator.oper)
    1574             :     {
    1575       75068 :         case OP_NOT:
    1576             : 
    1577             :             /*
    1578             :              * We need not touch data->width, since a NOT operation does not
    1579             :              * change the match width.
    1580             :              */
    1581       75068 :             if (!(flags & TS_EXEC_CALC_NOT))
    1582             :             {
    1583             :                 /* without CALC_NOT, report NOT as "match everywhere" */
    1584             :                 Assert(data->npos == 0 && !data->negate);
    1585        1170 :                 data->negate = true;
    1586        1170 :                 return TS_YES;
    1587             :             }
    1588       73898 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1589             :             {
    1590       65818 :                 case TS_NO:
    1591             :                     /* change "match nowhere" to "match everywhere" */
    1592             :                     Assert(data->npos == 0 && !data->negate);
    1593       65818 :                     data->negate = true;
    1594       65818 :                     return TS_YES;
    1595         260 :                 case TS_YES:
    1596         260 :                     if (data->npos > 0)
    1597             :                     {
    1598             :                         /* we have some positions, invert negate flag */
    1599         256 :                         data->negate = !data->negate;
    1600         256 :                         return TS_YES;
    1601             :                     }
    1602           4 :                     else if (data->negate)
    1603             :                     {
    1604             :                         /* change "match everywhere" to "match nowhere" */
    1605           4 :                         data->negate = false;
    1606           4 :                         return TS_NO;
    1607             :                     }
    1608             :                     /* Should not get here if result was TS_YES */
    1609             :                     Assert(false);
    1610           0 :                     break;
    1611        7820 :                 case TS_MAYBE:
    1612             :                     /* match positions are, and remain, uncertain */
    1613        7820 :                     return TS_MAYBE;
    1614             :             }
    1615           0 :             break;
    1616             : 
    1617      146252 :         case OP_PHRASE:
    1618             :         case OP_AND:
    1619      146252 :             memset(&Ldata, 0, sizeof(Ldata));
    1620      146252 :             memset(&Rdata, 0, sizeof(Rdata));
    1621             : 
    1622      146252 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1623             :                                        arg, flags, chkcond, &Ldata);
    1624      146252 :             if (lmatch == TS_NO)
    1625       78544 :                 return TS_NO;
    1626             : 
    1627       67708 :             rmatch = TS_phrase_execute(curitem + 1,
    1628             :                                        arg, flags, chkcond, &Rdata);
    1629       67708 :             if (rmatch == TS_NO)
    1630       34454 :                 return TS_NO;
    1631             : 
    1632             :             /*
    1633             :              * If either operand has no position information, then we can't
    1634             :              * return reliable position data, only a MAYBE result.
    1635             :              */
    1636       33254 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1637       14876 :                 return TS_MAYBE;
    1638             : 
    1639       18378 :             if (curitem->qoperator.oper == OP_PHRASE)
    1640             :             {
    1641             :                 /*
    1642             :                  * Compute Loffset and Roffset suitable for phrase match, and
    1643             :                  * compute overall width of whole phrase match.
    1644             :                  */
    1645       18374 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1646       18374 :                 Roffset = 0;
    1647       18374 :                 if (data)
    1648         252 :                     data->width = curitem->qoperator.distance +
    1649          84 :                         Ldata.width + Rdata.width;
    1650             :             }
    1651             :             else
    1652             :             {
    1653             :                 /*
    1654             :                  * For OP_AND, set output width and alignment like OP_OR (see
    1655             :                  * comment below)
    1656             :                  */
    1657           4 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1658           4 :                 Loffset = maxwidth - Ldata.width;
    1659           4 :                 Roffset = maxwidth - Rdata.width;
    1660           4 :                 if (data)
    1661           4 :                     data->width = maxwidth;
    1662             :             }
    1663             : 
    1664       18378 :             if (Ldata.negate && Rdata.negate)
    1665             :             {
    1666             :                 /* !L & !R: treat as !(L | R) */
    1667       17470 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1668             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1669             :                                         Loffset, Roffset,
    1670       17470 :                                         Ldata.npos + Rdata.npos);
    1671       17470 :                 if (data)
    1672           0 :                     data->negate = true;
    1673       17470 :                 return TS_YES;
    1674             :             }
    1675         908 :             else if (Ldata.negate)
    1676             :             {
    1677             :                 /* !L & R */
    1678         300 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1679             :                                         TSPO_R_ONLY,
    1680             :                                         Loffset, Roffset,
    1681             :                                         Rdata.npos);
    1682             :             }
    1683         608 :             else if (Rdata.negate)
    1684             :             {
    1685             :                 /* L & !R */
    1686           4 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1687             :                                         TSPO_L_ONLY,
    1688             :                                         Loffset, Roffset,
    1689             :                                         Ldata.npos);
    1690             :             }
    1691             :             else
    1692             :             {
    1693             :                 /* straight AND */
    1694         604 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1695             :                                         TSPO_BOTH,
    1696             :                                         Loffset, Roffset,
    1697         604 :                                         Min(Ldata.npos, Rdata.npos));
    1698             :             }
    1699             : 
    1700         104 :         case OP_OR:
    1701         104 :             memset(&Ldata, 0, sizeof(Ldata));
    1702         104 :             memset(&Rdata, 0, sizeof(Rdata));
    1703             : 
    1704         104 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1705             :                                        arg, flags, chkcond, &Ldata);
    1706         104 :             rmatch = TS_phrase_execute(curitem + 1,
    1707             :                                        arg, flags, chkcond, &Rdata);
    1708             : 
    1709         104 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1710           8 :                 return TS_NO;
    1711             : 
    1712             :             /*
    1713             :              * If either operand has no position information, then we can't
    1714             :              * return reliable position data, only a MAYBE result.
    1715             :              */
    1716          96 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1717           0 :                 return TS_MAYBE;
    1718             : 
    1719             :             /*
    1720             :              * Cope with undefined output width from failed submatch.  (This
    1721             :              * takes less code than trying to ensure that all failure returns
    1722             :              * set data->width to zero.)
    1723             :              */
    1724          96 :             if (lmatch == TS_NO)
    1725          12 :                 Ldata.width = 0;
    1726          96 :             if (rmatch == TS_NO)
    1727          56 :                 Rdata.width = 0;
    1728             : 
    1729             :             /*
    1730             :              * For OP_AND and OP_OR, report the width of the wider of the two
    1731             :              * inputs, and align the narrower input's positions to the right
    1732             :              * end of that width.  This rule deals at least somewhat
    1733             :              * reasonably with cases like "x <-> (y | z <-> q)".
    1734             :              */
    1735          96 :             maxwidth = Max(Ldata.width, Rdata.width);
    1736          96 :             Loffset = maxwidth - Ldata.width;
    1737          96 :             Roffset = maxwidth - Rdata.width;
    1738          96 :             data->width = maxwidth;
    1739             : 
    1740          96 :             if (Ldata.negate && Rdata.negate)
    1741             :             {
    1742             :                 /* !L | !R: treat as !(L & R) */
    1743           4 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1744             :                                         TSPO_BOTH,
    1745             :                                         Loffset, Roffset,
    1746           4 :                                         Min(Ldata.npos, Rdata.npos));
    1747           4 :                 data->negate = true;
    1748           4 :                 return TS_YES;
    1749             :             }
    1750          92 :             else if (Ldata.negate)
    1751             :             {
    1752             :                 /* !L | R: treat as !(L & !R) */
    1753          20 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1754             :                                         TSPO_L_ONLY,
    1755             :                                         Loffset, Roffset,
    1756             :                                         Ldata.npos);
    1757          20 :                 data->negate = true;
    1758          20 :                 return TS_YES;
    1759             :             }
    1760          72 :             else if (Rdata.negate)
    1761             :             {
    1762             :                 /* L | !R: treat as !(!L & R) */
    1763           4 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1764             :                                         TSPO_R_ONLY,
    1765             :                                         Loffset, Roffset,
    1766             :                                         Rdata.npos);
    1767           4 :                 data->negate = true;
    1768           4 :                 return TS_YES;
    1769             :             }
    1770             :             else
    1771             :             {
    1772             :                 /* straight OR */
    1773          68 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1774             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1775             :                                         Loffset, Roffset,
    1776          68 :                                         Ldata.npos + Rdata.npos);
    1777             :             }
    1778             : 
    1779           0 :         default:
    1780           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1781             :     }
    1782             : 
    1783             :     /* not reachable, but keep compiler quiet */
    1784           0 :     return TS_NO;
    1785             : }
    1786             : 
    1787             : 
    1788             : /*
    1789             :  * Evaluate tsquery boolean expression.
    1790             :  *
    1791             :  * curitem: current tsquery item (initially, the first one)
    1792             :  * arg: opaque value to pass through to callback function
    1793             :  * flags: bitmask of flag bits shown in ts_utils.h
    1794             :  * chkcond: callback function to check whether a primitive value is present
    1795             :  */
    1796             : bool
    1797      283814 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1798             :            TSExecuteCallback chkcond)
    1799             : {
    1800             :     /*
    1801             :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1802             :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1803             :      * need to check again.
    1804             :      */
    1805      283814 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1806             : }
    1807             : 
    1808             : /*
    1809             :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1810             :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1811             :  * operator, we pass it off to TS_phrase_execute which does worry.
    1812             :  */
    1813             : static TSTernaryValue
    1814      536904 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1815             :                    TSExecuteCallback chkcond)
    1816             : {
    1817             :     TSTernaryValue lmatch;
    1818             : 
    1819             :     /* since this function recurses, it could be driven to stack overflow */
    1820      536904 :     check_stack_depth();
    1821             : 
    1822      536904 :     if (curitem->type == QI_VAL)
    1823      188592 :         return chkcond(arg, (QueryOperand *) curitem,
    1824      188592 :                        NULL /* don't need position info */ ) ? TS_YES : TS_NO;
    1825             : 
    1826      348312 :     switch (curitem->qoperator.oper)
    1827             :     {
    1828       80208 :         case OP_NOT:
    1829       80208 :             if (!(flags & TS_EXEC_CALC_NOT))
    1830         936 :                 return TS_YES;
    1831       79272 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1832             :             {
    1833       78548 :                 case TS_NO:
    1834       78548 :                     return TS_YES;
    1835          52 :                 case TS_YES:
    1836          52 :                     return TS_NO;
    1837         672 :                 case TS_MAYBE:
    1838         672 :                     return TS_MAYBE;
    1839             :             }
    1840           0 :             break;
    1841             : 
    1842       53916 :         case OP_AND:
    1843       53916 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1844             :                                         flags, chkcond);
    1845       53916 :             if (lmatch == TS_NO)
    1846       43898 :                 return TS_NO;
    1847       10018 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1848             :             {
    1849        5866 :                 case TS_NO:
    1850        5866 :                     return TS_NO;
    1851        4152 :                 case TS_YES:
    1852        4152 :                     return lmatch;
    1853           0 :                 case TS_MAYBE:
    1854           0 :                     return TS_MAYBE;
    1855             :             }
    1856           0 :             break;
    1857             : 
    1858       68116 :         case OP_OR:
    1859       68116 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1860             :                                         flags, chkcond);
    1861       68116 :             if (lmatch == TS_YES)
    1862       26348 :                 return TS_YES;
    1863       41768 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1864             :             {
    1865       34560 :                 case TS_NO:
    1866       34560 :                     return lmatch;
    1867        7208 :                 case TS_YES:
    1868        7208 :                     return TS_YES;
    1869           0 :                 case TS_MAYBE:
    1870           0 :                     return TS_MAYBE;
    1871             :             }
    1872           0 :             break;
    1873             : 
    1874      146072 :         case OP_PHRASE:
    1875             : 
    1876             :             /*
    1877             :              * If we get a MAYBE result, and the caller doesn't want that,
    1878             :              * convert it to NO.  It would be more consistent, perhaps, to
    1879             :              * return the result of TS_phrase_execute() verbatim and then
    1880             :              * convert MAYBE results at the top of the recursion.  But
    1881             :              * converting at the topmost phrase operator gives results that
    1882             :              * are bug-compatible with the old implementation, so do it like
    1883             :              * this for now.
    1884             :              */
    1885      146072 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1886             :             {
    1887      113142 :                 case TS_NO:
    1888      113142 :                     return TS_NO;
    1889       18058 :                 case TS_YES:
    1890       18058 :                     return TS_YES;
    1891       14872 :                 case TS_MAYBE:
    1892       14872 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1893             :             }
    1894           0 :             break;
    1895             : 
    1896           0 :         default:
    1897           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1898             :     }
    1899             : 
    1900             :     /* not reachable, but keep compiler quiet */
    1901           0 :     return TS_NO;
    1902             : }
    1903             : 
    1904             : /*
    1905             :  * Detect whether a tsquery boolean expression requires any positive matches
    1906             :  * to values shown in the tsquery.
    1907             :  *
    1908             :  * This is needed to know whether a GIN index search requires full index scan.
    1909             :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    1910             :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    1911             :  */
    1912             : bool
    1913         524 : tsquery_requires_match(QueryItem *curitem)
    1914             : {
    1915             :     /* since this function recurses, it could be driven to stack overflow */
    1916         524 :     check_stack_depth();
    1917             : 
    1918         524 :     if (curitem->type == QI_VAL)
    1919         248 :         return true;
    1920             : 
    1921         276 :     switch (curitem->qoperator.oper)
    1922             :     {
    1923          96 :         case OP_NOT:
    1924             : 
    1925             :             /*
    1926             :              * Assume there are no required matches underneath a NOT.  For
    1927             :              * some cases with nested NOTs, we could prove there's a required
    1928             :              * match, but it seems unlikely to be worth the trouble.
    1929             :              */
    1930          96 :             return false;
    1931             : 
    1932         136 :         case OP_PHRASE:
    1933             : 
    1934             :             /*
    1935             :              * Treat OP_PHRASE as OP_AND here
    1936             :              */
    1937             :         case OP_AND:
    1938             :             /* If either side requires a match, we're good */
    1939         136 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1940         104 :                 return true;
    1941             :             else
    1942          32 :                 return tsquery_requires_match(curitem + 1);
    1943             : 
    1944          44 :         case OP_OR:
    1945             :             /* Both sides must require a match */
    1946          44 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1947          44 :                 return tsquery_requires_match(curitem + 1);
    1948             :             else
    1949           0 :                 return false;
    1950             : 
    1951           0 :         default:
    1952           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1953             :     }
    1954             : 
    1955             :     /* not reachable, but keep compiler quiet */
    1956             :     return false;
    1957             : }
    1958             : 
    1959             : /*
    1960             :  * boolean operations
    1961             :  */
    1962             : Datum
    1963          40 : ts_match_qv(PG_FUNCTION_ARGS)
    1964             : {
    1965          40 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    1966             :                                         PG_GETARG_DATUM(1),
    1967             :                                         PG_GETARG_DATUM(0)));
    1968             : }
    1969             : 
    1970             : Datum
    1971      116336 : ts_match_vq(PG_FUNCTION_ARGS)
    1972             : {
    1973      116336 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    1974      116336 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    1975             :     CHKVAL      chkval;
    1976             :     bool        result;
    1977             : 
    1978             :     /* empty query matches nothing */
    1979      116336 :     if (!query->size)
    1980             :     {
    1981           0 :         PG_FREE_IF_COPY(val, 0);
    1982           0 :         PG_FREE_IF_COPY(query, 1);
    1983           0 :         PG_RETURN_BOOL(false);
    1984             :     }
    1985             : 
    1986      116336 :     chkval.arrb = ARRPTR(val);
    1987      116336 :     chkval.arre = chkval.arrb + val->size;
    1988      116336 :     chkval.values = STRPTR(val);
    1989      116336 :     chkval.operand = GETOPERAND(query);
    1990      116336 :     result = TS_execute(GETQUERY(query),
    1991             :                         &chkval,
    1992             :                         TS_EXEC_CALC_NOT,
    1993             :                         checkcondition_str);
    1994             : 
    1995      116336 :     PG_FREE_IF_COPY(val, 0);
    1996      116336 :     PG_FREE_IF_COPY(query, 1);
    1997      116336 :     PG_RETURN_BOOL(result);
    1998             : }
    1999             : 
    2000             : Datum
    2001           0 : ts_match_tt(PG_FUNCTION_ARGS)
    2002             : {
    2003             :     TSVector    vector;
    2004             :     TSQuery     query;
    2005             :     bool        res;
    2006             : 
    2007           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2008             :                                                   PG_GETARG_DATUM(0)));
    2009           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2010             :                                                 PG_GETARG_DATUM(1)));
    2011             : 
    2012           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2013             :                                            TSVectorGetDatum(vector),
    2014             :                                            TSQueryGetDatum(query)));
    2015             : 
    2016           0 :     pfree(vector);
    2017           0 :     pfree(query);
    2018             : 
    2019           0 :     PG_RETURN_BOOL(res);
    2020             : }
    2021             : 
    2022             : Datum
    2023           0 : ts_match_tq(PG_FUNCTION_ARGS)
    2024             : {
    2025             :     TSVector    vector;
    2026           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2027             :     bool        res;
    2028             : 
    2029           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2030             :                                                   PG_GETARG_DATUM(0)));
    2031             : 
    2032           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2033             :                                            TSVectorGetDatum(vector),
    2034             :                                            TSQueryGetDatum(query)));
    2035             : 
    2036           0 :     pfree(vector);
    2037           0 :     PG_FREE_IF_COPY(query, 1);
    2038             : 
    2039           0 :     PG_RETURN_BOOL(res);
    2040             : }
    2041             : 
    2042             : /*
    2043             :  * ts_stat statistic function support
    2044             :  */
    2045             : 
    2046             : 
    2047             : /*
    2048             :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2049             :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2050             :  */
    2051             : static int
    2052        5452 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2053             : {
    2054        5452 :     int         len = POSDATALEN(txt, wptr);
    2055        5452 :     int         num = 0;
    2056        5452 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2057             : 
    2058       11100 :     while (len--)
    2059             :     {
    2060        5648 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2061           8 :             num++;
    2062        5648 :         ptr++;
    2063             :     }
    2064        5452 :     return num;
    2065             : }
    2066             : 
    2067             : #define compareStatWord(a,e,t)                          \
    2068             :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2069             :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2070             :                     false)
    2071             : 
    2072             : static void
    2073      230416 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2074             : {
    2075      230416 :     WordEntry  *we = ARRPTR(txt) + off;
    2076      230416 :     StatEntry  *node = stat->root,
    2077      230416 :                *pnode = NULL;
    2078             :     int         n,
    2079      230416 :                 res = 0;
    2080      230416 :     uint32      depth = 1;
    2081             : 
    2082      230416 :     if (stat->weight == 0)
    2083      115208 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2084             :     else
    2085      115208 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2086             : 
    2087      230416 :     if (n == 0)
    2088      115204 :         return;                 /* nothing to insert */
    2089             : 
    2090     1163640 :     while (node)
    2091             :     {
    2092     1159064 :         res = compareStatWord(node, we, txt);
    2093             : 
    2094     1159064 :         if (res == 0)
    2095             :         {
    2096      110636 :             break;
    2097             :         }
    2098             :         else
    2099             :         {
    2100     1048428 :             pnode = node;
    2101     1048428 :             node = (res < 0) ? node->left : node->right;
    2102             :         }
    2103     1048428 :         depth++;
    2104             :     }
    2105             : 
    2106      115212 :     if (depth > stat->maxdepth)
    2107          84 :         stat->maxdepth = depth;
    2108             : 
    2109      115212 :     if (node == NULL)
    2110             :     {
    2111        4576 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2112        4576 :         node->left = node->right = NULL;
    2113        4576 :         node->ndoc = 1;
    2114        4576 :         node->nentry = n;
    2115        4576 :         node->lenlexeme = we->len;
    2116        4576 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2117             : 
    2118        4576 :         if (pnode == NULL)
    2119             :         {
    2120           8 :             stat->root = node;
    2121             :         }
    2122             :         else
    2123             :         {
    2124        4568 :             if (res < 0)
    2125        2248 :                 pnode->left = node;
    2126             :             else
    2127        2320 :                 pnode->right = node;
    2128             :         }
    2129             : 
    2130             :     }
    2131             :     else
    2132             :     {
    2133      110636 :         node->ndoc++;
    2134      110636 :         node->nentry += n;
    2135             :     }
    2136             : }
    2137             : 
    2138             : static void
    2139      330256 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2140             :                     uint32 low, uint32 high, uint32 offset)
    2141             : {
    2142             :     uint32      pos;
    2143      330256 :     uint32      middle = (low + high) >> 1;
    2144             : 
    2145      330256 :     pos = (low + middle) >> 1;
    2146      330256 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2147      113552 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2148      330256 :     pos = (high + middle + 1) >> 1;
    2149      330256 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2150      112856 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2151             : 
    2152      330256 :     if (low != middle)
    2153      165128 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2154      330256 :     if (high != middle + 1)
    2155      161120 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2156      330256 : }
    2157             : 
    2158             : /*
    2159             :  * This is written like a custom aggregate function, because the
    2160             :  * original plan was to do just that. Unfortunately, an aggregate function
    2161             :  * can't return a set, so that plan was abandoned. If that limitation is
    2162             :  * lifted in the future, ts_stat could be a real aggregate function so that
    2163             :  * you could use it like this:
    2164             :  *
    2165             :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2166             :  *
    2167             :  *  where vector_column is a tsvector-type column in vector_table.
    2168             :  */
    2169             : 
    2170             : static TSVectorStat *
    2171        4072 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2172             : {
    2173        4072 :     TSVector    txt = DatumGetTSVector(data);
    2174             :     uint32      i,
    2175        4072 :                 nbit = 0,
    2176             :                 offset;
    2177             : 
    2178        4072 :     if (stat == NULL)
    2179             :     {                           /* Init in first */
    2180           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2181           0 :         stat->maxdepth = 1;
    2182             :     }
    2183             : 
    2184             :     /* simple check of correctness */
    2185        4072 :     if (txt == NULL || txt->size == 0)
    2186             :     {
    2187          64 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2188          64 :             pfree(txt);
    2189          64 :         return stat;
    2190             :     }
    2191             : 
    2192        4008 :     i = txt->size - 1;
    2193       28480 :     for (; i > 0; i >>= 1)
    2194       24472 :         nbit++;
    2195             : 
    2196        4008 :     nbit = 1 << nbit;
    2197        4008 :     offset = (nbit - txt->size) / 2;
    2198             : 
    2199        4008 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2200        4008 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2201             : 
    2202        4008 :     return stat;
    2203             : }
    2204             : 
    2205             : static void
    2206           8 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2207             :                    TSVectorStat *stat)
    2208             : {
    2209             :     TupleDesc   tupdesc;
    2210             :     MemoryContext oldcontext;
    2211             :     StatEntry  *node;
    2212             : 
    2213           8 :     funcctx->user_fctx = (void *) stat;
    2214             : 
    2215           8 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2216             : 
    2217           8 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2218           8 :     stat->stackpos = 0;
    2219             : 
    2220           8 :     node = stat->root;
    2221             :     /* find leftmost value */
    2222           8 :     if (node == NULL)
    2223           0 :         stat->stack[stat->stackpos] = NULL;
    2224             :     else
    2225             :         for (;;)
    2226             :         {
    2227          24 :             stat->stack[stat->stackpos] = node;
    2228          32 :             if (node->left)
    2229             :             {
    2230          24 :                 stat->stackpos++;
    2231          24 :                 node = node->left;
    2232             :             }
    2233             :             else
    2234           8 :                 break;
    2235             :         }
    2236             :     Assert(stat->stackpos <= stat->maxdepth);
    2237             : 
    2238           8 :     tupdesc = CreateTemplateTupleDesc(3);
    2239           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
    2240             :                        TEXTOID, -1, 0);
    2241           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
    2242             :                        INT4OID, -1, 0);
    2243           8 :     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
    2244             :                        INT4OID, -1, 0);
    2245           8 :     funcctx->tuple_desc = BlessTupleDesc(tupdesc);
    2246           8 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2247             : 
    2248           8 :     MemoryContextSwitchTo(oldcontext);
    2249           8 : }
    2250             : 
    2251             : static StatEntry *
    2252        9152 : walkStatEntryTree(TSVectorStat *stat)
    2253             : {
    2254        9152 :     StatEntry  *node = stat->stack[stat->stackpos];
    2255             : 
    2256        9152 :     if (node == NULL)
    2257           0 :         return NULL;
    2258             : 
    2259        9152 :     if (node->ndoc != 0)
    2260             :     {
    2261             :         /* return entry itself: we already was at left sublink */
    2262        2256 :         return node;
    2263             :     }
    2264        6896 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2265             :     {
    2266             :         /* go on right sublink */
    2267        2320 :         stat->stackpos++;
    2268        2320 :         node = node->right;
    2269             : 
    2270             :         /* find most-left value */
    2271             :         for (;;)
    2272             :         {
    2273        2224 :             stat->stack[stat->stackpos] = node;
    2274        4544 :             if (node->left)
    2275             :             {
    2276        2224 :                 stat->stackpos++;
    2277        2224 :                 node = node->left;
    2278             :             }
    2279             :             else
    2280        2320 :                 break;
    2281             :         }
    2282        2320 :         Assert(stat->stackpos <= stat->maxdepth);
    2283             :     }
    2284             :     else
    2285             :     {
    2286             :         /* we already return all left subtree, itself and  right subtree */
    2287        4576 :         if (stat->stackpos == 0)
    2288           8 :             return NULL;
    2289             : 
    2290        4568 :         stat->stackpos--;
    2291        4568 :         return walkStatEntryTree(stat);
    2292             :     }
    2293             : 
    2294        2320 :     return node;
    2295             : }
    2296             : 
    2297             : static Datum
    2298        4584 : ts_process_call(FuncCallContext *funcctx)
    2299             : {
    2300             :     TSVectorStat *st;
    2301             :     StatEntry  *entry;
    2302             : 
    2303        4584 :     st = (TSVectorStat *) funcctx->user_fctx;
    2304             : 
    2305        4584 :     entry = walkStatEntryTree(st);
    2306             : 
    2307        4584 :     if (entry != NULL)
    2308             :     {
    2309             :         Datum       result;
    2310             :         char       *values[3];
    2311             :         char        ndoc[16];
    2312             :         char        nentry[16];
    2313             :         HeapTuple   tuple;
    2314             : 
    2315        4576 :         values[0] = palloc(entry->lenlexeme + 1);
    2316        4576 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2317        4576 :         (values[0])[entry->lenlexeme] = '\0';
    2318        4576 :         sprintf(ndoc, "%d", entry->ndoc);
    2319        4576 :         values[1] = ndoc;
    2320        4576 :         sprintf(nentry, "%d", entry->nentry);
    2321        4576 :         values[2] = nentry;
    2322             : 
    2323        4576 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2324        4576 :         result = HeapTupleGetDatum(tuple);
    2325             : 
    2326        4576 :         pfree(values[0]);
    2327             : 
    2328             :         /* mark entry as already visited */
    2329        4576 :         entry->ndoc = 0;
    2330             : 
    2331        4576 :         return result;
    2332             :     }
    2333             : 
    2334           8 :     return (Datum) 0;
    2335             : }
    2336             : 
    2337             : static TSVectorStat *
    2338           8 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2339             : {
    2340           8 :     char       *query = text_to_cstring(txt);
    2341             :     TSVectorStat *stat;
    2342             :     bool        isnull;
    2343             :     Portal      portal;
    2344             :     SPIPlanPtr  plan;
    2345             : 
    2346           8 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2347             :         /* internal error */
    2348           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2349             : 
    2350           8 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2351             :         /* internal error */
    2352           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2353             : 
    2354           8 :     SPI_cursor_fetch(portal, true, 100);
    2355             : 
    2356           8 :     if (SPI_tuptable == NULL ||
    2357           8 :         SPI_tuptable->tupdesc->natts != 1 ||
    2358           8 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2359             :                            TSVECTOROID))
    2360           0 :         ereport(ERROR,
    2361             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2362             :                  errmsg("ts_stat query must return one tsvector column")));
    2363             : 
    2364           8 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2365           8 :     stat->maxdepth = 1;
    2366             : 
    2367           8 :     if (ws)
    2368             :     {
    2369             :         char       *buf;
    2370             : 
    2371           4 :         buf = VARDATA_ANY(ws);
    2372          12 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2373             :         {
    2374           8 :             if (pg_mblen(buf) == 1)
    2375             :             {
    2376           8 :                 switch (*buf)
    2377             :                 {
    2378           4 :                     case 'A':
    2379             :                     case 'a':
    2380           4 :                         stat->weight |= 1 << 3;
    2381           4 :                         break;
    2382           4 :                     case 'B':
    2383             :                     case 'b':
    2384           4 :                         stat->weight |= 1 << 2;
    2385           4 :                         break;
    2386           0 :                     case 'C':
    2387             :                     case 'c':
    2388           0 :                         stat->weight |= 1 << 1;
    2389           0 :                         break;
    2390           0 :                     case 'D':
    2391             :                     case 'd':
    2392           0 :                         stat->weight |= 1;
    2393           0 :                         break;
    2394           0 :                     default:
    2395           0 :                         stat->weight |= 0;
    2396             :                 }
    2397           0 :             }
    2398           8 :             buf += pg_mblen(buf);
    2399             :         }
    2400             :     }
    2401             : 
    2402          56 :     while (SPI_processed > 0)
    2403             :     {
    2404             :         uint64      i;
    2405             : 
    2406        4120 :         for (i = 0; i < SPI_processed; i++)
    2407             :         {
    2408        4072 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2409             : 
    2410        4072 :             if (!isnull)
    2411        4072 :                 stat = ts_accum(persistentContext, stat, data);
    2412             :         }
    2413             : 
    2414          48 :         SPI_freetuptable(SPI_tuptable);
    2415          48 :         SPI_cursor_fetch(portal, true, 100);
    2416             :     }
    2417             : 
    2418           8 :     SPI_freetuptable(SPI_tuptable);
    2419           8 :     SPI_cursor_close(portal);
    2420           8 :     SPI_freeplan(plan);
    2421           8 :     pfree(query);
    2422             : 
    2423           8 :     return stat;
    2424             : }
    2425             : 
    2426             : Datum
    2427        4576 : ts_stat1(PG_FUNCTION_ARGS)
    2428             : {
    2429             :     FuncCallContext *funcctx;
    2430             :     Datum       result;
    2431             : 
    2432        4576 :     if (SRF_IS_FIRSTCALL())
    2433             :     {
    2434             :         TSVectorStat *stat;
    2435           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2436             : 
    2437           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2438           4 :         SPI_connect();
    2439           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2440           4 :         PG_FREE_IF_COPY(txt, 0);
    2441           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2442           4 :         SPI_finish();
    2443             :     }
    2444             : 
    2445        4576 :     funcctx = SRF_PERCALL_SETUP();
    2446        4576 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2447        4572 :         SRF_RETURN_NEXT(funcctx, result);
    2448           4 :     SRF_RETURN_DONE(funcctx);
    2449             : }
    2450             : 
    2451             : Datum
    2452           8 : ts_stat2(PG_FUNCTION_ARGS)
    2453             : {
    2454             :     FuncCallContext *funcctx;
    2455             :     Datum       result;
    2456             : 
    2457           8 :     if (SRF_IS_FIRSTCALL())
    2458             :     {
    2459             :         TSVectorStat *stat;
    2460           4 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2461           4 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2462             : 
    2463           4 :         funcctx = SRF_FIRSTCALL_INIT();
    2464           4 :         SPI_connect();
    2465           4 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2466           4 :         PG_FREE_IF_COPY(txt, 0);
    2467           4 :         PG_FREE_IF_COPY(ws, 1);
    2468           4 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2469           4 :         SPI_finish();
    2470             :     }
    2471             : 
    2472           8 :     funcctx = SRF_PERCALL_SETUP();
    2473           8 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2474           4 :         SRF_RETURN_NEXT(funcctx, result);
    2475           4 :     SRF_RETURN_DONE(funcctx);
    2476             : }
    2477             : 
    2478             : 
    2479             : /*
    2480             :  * Triggers for automatic update of a tsvector column from text column(s)
    2481             :  *
    2482             :  * Trigger arguments are either
    2483             :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2484             :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2485             :  * ie, tsconfig can either be specified by name, or indirectly as the
    2486             :  * contents of a regconfig field in the row.  If the name is used, it must
    2487             :  * be explicitly schema-qualified.
    2488             :  */
    2489             : Datum
    2490          12 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2491             : {
    2492          12 :     return tsvector_update_trigger(fcinfo, false);
    2493             : }
    2494             : 
    2495             : Datum
    2496           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2497             : {
    2498           0 :     return tsvector_update_trigger(fcinfo, true);
    2499             : }
    2500             : 
    2501             : static Datum
    2502          12 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2503             : {
    2504             :     TriggerData *trigdata;
    2505             :     Trigger    *trigger;
    2506             :     Relation    rel;
    2507          12 :     HeapTuple   rettuple = NULL;
    2508             :     int         tsvector_attr_num,
    2509             :                 i;
    2510             :     ParsedText  prs;
    2511             :     Datum       datum;
    2512             :     bool        isnull;
    2513             :     text       *txt;
    2514             :     Oid         cfgId;
    2515             :     bool        update_needed;
    2516             : 
    2517             :     /* Check call context */
    2518          12 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2519           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2520             : 
    2521          12 :     trigdata = (TriggerData *) fcinfo->context;
    2522          12 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2523           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2524          12 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2525           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2526             : 
    2527          12 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2528             :     {
    2529           8 :         rettuple = trigdata->tg_trigtuple;
    2530           8 :         update_needed = true;
    2531             :     }
    2532           4 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2533             :     {
    2534           4 :         rettuple = trigdata->tg_newtuple;
    2535           4 :         update_needed = false;  /* computed below */
    2536             :     }
    2537             :     else
    2538           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2539             : 
    2540          12 :     trigger = trigdata->tg_trigger;
    2541          12 :     rel = trigdata->tg_relation;
    2542             : 
    2543          12 :     if (trigger->tgnargs < 3)
    2544           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2545             : 
    2546             :     /* Find the target tsvector column */
    2547          12 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2548          12 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2549           0 :         ereport(ERROR,
    2550             :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2551             :                  errmsg("tsvector column \"%s\" does not exist",
    2552             :                         trigger->tgargs[0])));
    2553             :     /* This will effectively reject system columns, so no separate test: */
    2554          12 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2555             :                            TSVECTOROID))
    2556           0 :         ereport(ERROR,
    2557             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2558             :                  errmsg("column \"%s\" is not of tsvector type",
    2559             :                         trigger->tgargs[0])));
    2560             : 
    2561             :     /* Find the configuration to use */
    2562          12 :     if (config_column)
    2563             :     {
    2564             :         int         config_attr_num;
    2565             : 
    2566           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2567           0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2568           0 :             ereport(ERROR,
    2569             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2570             :                      errmsg("configuration column \"%s\" does not exist",
    2571             :                             trigger->tgargs[1])));
    2572           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2573             :                                REGCONFIGOID))
    2574           0 :             ereport(ERROR,
    2575             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2576             :                      errmsg("column \"%s\" is not of regconfig type",
    2577             :                             trigger->tgargs[1])));
    2578             : 
    2579           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2580           0 :         if (isnull)
    2581           0 :             ereport(ERROR,
    2582             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2583             :                      errmsg("configuration column \"%s\" must not be null",
    2584             :                             trigger->tgargs[1])));
    2585           0 :         cfgId = DatumGetObjectId(datum);
    2586             :     }
    2587             :     else
    2588             :     {
    2589             :         List       *names;
    2590             : 
    2591          12 :         names = stringToQualifiedNameList(trigger->tgargs[1]);
    2592             :         /* require a schema so that results are not search path dependent */
    2593          12 :         if (list_length(names) < 2)
    2594           0 :             ereport(ERROR,
    2595             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2596             :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2597             :                             trigger->tgargs[1])));
    2598          12 :         cfgId = get_ts_config_oid(names, false);
    2599             :     }
    2600             : 
    2601             :     /* initialize parse state */
    2602          12 :     prs.lenwords = 32;
    2603          12 :     prs.curwords = 0;
    2604          12 :     prs.pos = 0;
    2605          12 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2606             : 
    2607             :     /* find all words in indexable column(s) */
    2608          24 :     for (i = 2; i < trigger->tgnargs; i++)
    2609             :     {
    2610             :         int         numattr;
    2611             : 
    2612          12 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2613          12 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2614           0 :             ereport(ERROR,
    2615             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2616             :                      errmsg("column \"%s\" does not exist",
    2617             :                             trigger->tgargs[i])));
    2618          12 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2619           0 :             ereport(ERROR,
    2620             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2621             :                      errmsg("column \"%s\" is not of a character type",
    2622             :                             trigger->tgargs[i])));
    2623             : 
    2624          12 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2625           4 :             update_needed = true;
    2626             : 
    2627          12 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2628          12 :         if (isnull)
    2629           4 :             continue;
    2630             : 
    2631           8 :         txt = DatumGetTextPP(datum);
    2632             : 
    2633           8 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2634             : 
    2635           8 :         if (txt != (text *) DatumGetPointer(datum))
    2636           0 :             pfree(txt);
    2637             :     }
    2638             : 
    2639          12 :     if (update_needed)
    2640             :     {
    2641             :         /* make tsvector value */
    2642          12 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2643          12 :         isnull = false;
    2644             : 
    2645             :         /* and insert it into tuple */
    2646          12 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2647             :                                              1, &tsvector_attr_num,
    2648             :                                              &datum, &isnull);
    2649             : 
    2650          12 :         pfree(DatumGetPointer(datum));
    2651             :     }
    2652             : 
    2653          12 :     return PointerGetDatum(rettuple);
    2654             : }

Generated by: LCOV version 1.13